Update file dev-llama-3-large.yaml

e60da253 · Martin Forell · 81df9d30 · e60da253
Commit e60da253 authored 6 months ago by Martin Forell
--- a/dev-llama-3-large.yaml
+++ b/dev-llama-3-large.yaml
@@ -9,7 +9,7 @@ config_file: |
  f16: true 
  gpu_layers: 85
  parameters:
-    model: downloads/llama_backend/Llama-3.1-Nemotron-70B-Instruct-HF-Q6_K.gguf 
+    model: downloads/llama_backend/Llama-3.1-Nemotron-70B-Instruct-HF-Q5_K_S.gguf
  template:
    chat_message: |
@@ -54,9 +54,9 @@ config_file: |
            "model": "dev-llama-3",
            "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
        }'
-# files:
+files:
-# - filename: "downloads/llama_backend/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf"
+- filename: "downloads/llama_backend/Llama-3.1-Nemotron-70B-Instruct-HF-Q5_K_S.gguf"
-#   sha256: ""
+  sha256: ""
-#   uri: "https://huggingface.co/bartowski/Hermes-2-Theta-Llama-3-70B-GGUF/resolve/main/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf"
+  uri: "https://huggingface.co/bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF/resolve/main/Llama-3.1-Nemotron-70B-Instruct-HF-Q5_K_S.gguf"
 # download model with    huggingface-cli download bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF --local-dir /data/localai-data/downloads/llama_backend  --include '*Q6_K*'