From 3db8e5e068ec1b159dcb776819d530749e26730d Mon Sep 17 00:00:00 2001 From: Martin Forell <martin.forell@kit.edu> Date: Fri, 18 Oct 2024 09:08:42 +0000 Subject: [PATCH] new llama nvidia model --- dev-llama-3-large.yaml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/dev-llama-3-large.yaml b/dev-llama-3-large.yaml index 70ba3cd..9029eed 100644 --- a/dev-llama-3-large.yaml +++ b/dev-llama-3-large.yaml @@ -9,7 +9,7 @@ config_file: | f16: true gpu_layers: 85 parameters: - model: downloads/llama_backend/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf + model: downloads/llama_backend/Llama-3.1-Nemotron-70B-Instruct-HF-Q6_K template: chat_message: | @@ -54,8 +54,9 @@ config_file: | "model": "dev-llama-3", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] }' -files: -- filename: "downloads/llama_backend/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf" - sha256: "" - uri: "https://huggingface.co/bartowski/Hermes-2-Theta-Llama-3-70B-GGUF/resolve/main/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf" - \ No newline at end of file +# files: +# - filename: "downloads/llama_backend/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf" +# sha256: "" +# uri: "https://huggingface.co/bartowski/Hermes-2-Theta-Llama-3-70B-GGUF/resolve/main/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf" + +# download model with huggingface-cli download bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF --local-dir /data/localai-data/downloads/llama_backend --include '*Q6_K*' -- GitLab