diff --git a/dev-llama-3-large.yaml b/dev-llama-3-large.yaml index 70ba3cdd58f6cd337c61d1082ce0843fed639220..9029eedc2d0b90931c77644beb1d61a7187ae7d2 100644 --- a/dev-llama-3-large.yaml +++ b/dev-llama-3-large.yaml @@ -9,7 +9,7 @@ config_file: | f16: true gpu_layers: 85 parameters: - model: downloads/llama_backend/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf + model: downloads/llama_backend/Llama-3.1-Nemotron-70B-Instruct-HF-Q6_K template: chat_message: | @@ -54,8 +54,9 @@ config_file: | "model": "dev-llama-3", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] }' -files: -- filename: "downloads/llama_backend/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf" - sha256: "" - uri: "https://huggingface.co/bartowski/Hermes-2-Theta-Llama-3-70B-GGUF/resolve/main/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf" - \ No newline at end of file +# files: +# - filename: "downloads/llama_backend/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf" +# sha256: "" +# uri: "https://huggingface.co/bartowski/Hermes-2-Theta-Llama-3-70B-GGUF/resolve/main/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf" + +# download model with huggingface-cli download bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF --local-dir /data/localai-data/downloads/llama_backend --include '*Q6_K*'