From e60da2535800dd6e7517d0f0930e9097bda9cd40 Mon Sep 17 00:00:00 2001 From: Martin Forell <martin.forell@kit.edu> Date: Fri, 18 Oct 2024 09:13:22 +0000 Subject: [PATCH] Update file dev-llama-3-large.yaml --- dev-llama-3-large.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dev-llama-3-large.yaml b/dev-llama-3-large.yaml index a2c52eb..fcfd94c 100644 --- a/dev-llama-3-large.yaml +++ b/dev-llama-3-large.yaml @@ -9,7 +9,7 @@ config_file: | f16: true gpu_layers: 85 parameters: - model: downloads/llama_backend/Llama-3.1-Nemotron-70B-Instruct-HF-Q6_K.gguf + model: downloads/llama_backend/Llama-3.1-Nemotron-70B-Instruct-HF-Q5_K_S.gguf template: chat_message: | @@ -54,9 +54,9 @@ config_file: | "model": "dev-llama-3", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] }' -# files: -# - filename: "downloads/llama_backend/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf" -# sha256: "" -# uri: "https://huggingface.co/bartowski/Hermes-2-Theta-Llama-3-70B-GGUF/resolve/main/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf" +files: +- filename: "downloads/llama_backend/Llama-3.1-Nemotron-70B-Instruct-HF-Q5_K_S.gguf" + sha256: "" + uri: "https://huggingface.co/bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF/resolve/main/Llama-3.1-Nemotron-70B-Instruct-HF-Q5_K_S.gguf" # download model with huggingface-cli download bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF --local-dir /data/localai-data/downloads/llama_backend --include '*Q6_K*' -- GitLab