# create llama env for llama-cpp OpenAI compatible webserver Xcode must be installed (base) > xcode-select -p /Applications/Xcode.app/Contents/Developer — Conda with Metal GPU support must be installed wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh bash Miniforge3-MacOSX-arm64.sh — Create a Python env conda create -n llama python=3.9.16 conda activate llama — Install the LATEST llama-cpp-python pip uninstall llama-cpp-python -y CMAKE_ARGS="-DLLAMA_METAL=on" pip install -U llama-cpp-python --no-cache-dir pip install 'llama-cpp-python[server]' — Run OpenAI API compatible server export MODEL=zephyr-7b-beta.Q5_K_M.gguf python -m llama_cpp.server --model $MODEL --n_gpu_layers 1 # Create Python env for the RAG app conda create --name RAG_ENV python=3.9 pip install ipykernel python -m ipykernel install --user --name=RAG_ENV pip install llama-index pip uninstall llama-cpp-python -y CMAKE_ARGS="-DLLAMA_METAL=on" pip install -U llama-cpp-python --no-cache-dir pip install 'llama-cpp-python[server]' pip install transformers pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu pip install pypdf pip install sentencepiece pip install chromadb pip install gradio pip install langchain pip install -U sentence-transformers