Update 2 files

- /dev-llama-3-competence.yaml - /index.yaml

Update 2 files
7510865b · Martin Forell · 09b0d4d8 · 7510865b · 7510865b
Commit 7510865b authored 3 months ago by Martin Forell
--- a/dev-llama-3-competence.yaml
+++ b/dev-llama-3-competence.yaml
+name: "dev-llama-3-competence"
+
+description: |
+  This alias is for model with a high throughput. 
+config_file: |
+  name: dev-llama-3-competence
+  mmap: true
+  backend: llama
+  f16: true 
+  gpu_layers: 85
+  parameters:
+    model: downloads/llama_backend/competency-extraction-finetune-v2_q5_1.gguf
+  
+  template:
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ toJson .FunctionCall -}}
+      {{ end -}}
+      <|eot_id|>
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+
+      You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+      <tools>
+      {{range .Functions}}
+      {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+      {{end}}
+      </tools>
+      Use the following pydantic model json schema for each tool call you will make:
+      {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+      Function call:
+    chat: |
+      <|begin_of_text|>{{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    completion: |
+      {{.Input}}
+  context_size: 8192
+  stopwords:
+    - <|im_end|>
+    - <dummy32000>
+    - "<|eot_id|>"
+    - <|end_of_text|>
+  usage: |
+        curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+            "model": "dev-llama-3",
+            "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+        }'
+files:
+- filename: "downloads/llama_backend/competency-extraction-finetune-v2_q5_1.gguf"
+  sha256: ""
+  uri: "https://huggingface.co/zebrazinker/competency-extraction-finetune-v2/resolve/main/competency-extraction-finetune-v2_q5_1.gguf"
+
+# download model with    huggingface-cli download bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF --local-dir /data/localai-data/downloads/llama_backend  --include '*Q6_K*'
--- a/index.yaml
+++ b/index.yaml
@@ -129,6 +129,13 @@
    - Text generation
    - 70B Model
    - functions
+- url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-llama-3-competence.yaml
+  name: dev-llama-3-competence
+  tags:
+    - gpu
+    - Text generation
+    - 70B Model
+    - functions
  
 - url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-llama-3-large-hermes.yaml
  name: dev-llama-3-large-hermes