# create llama env for llama-cpp OpenAI compatible webserver

Xcode must be installed

(base) > xcode-select -p
/Applications/Xcode.app/Contents/Developer

—

Conda with Metal GPU support must be installed

wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
bash Miniforge3-MacOSX-arm64.sh

—

Create a Python env

conda create -n llama python=3.9.16
conda activate llama

—

Install the LATEST llama-cpp-python

pip uninstall llama-cpp-python -y
CMAKE_ARGS="-DLLAMA_METAL=on"
pip install -U llama-cpp-python --no-cache-dir
pip install 'llama-cpp-python[server]'

—

Run OpenAI API compatible server

export MODEL=zephyr-7b-beta.Q5_K_M.gguf
python -m llama_cpp.server --model $MODEL --n_gpu_layers 1

# Create Python env for the RAG app

conda create --name RAG_ENV python=3.9
pip install ipykernel
python -m ipykernel install --user --name=RAG_ENV

pip install llama-index

pip uninstall llama-cpp-python -y
CMAKE_ARGS="-DLLAMA_METAL=on"
pip install -U llama-cpp-python --no-cache-dir
pip install 'llama-cpp-python[server]'

pip install transformers

pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu

pip install pypdf

pip install sentencepiece

pip install chromadb

pip install gradio

pip install langchain

pip install -U sentence-transformers