llama 32

fbd39490 · Martin Forell · ee7d6588 · fbd39490 · fbd39490
Commit fbd39490 authored 6 months ago by Martin Forell
--- a/dev-llama-3-tiny.yaml
+++ b/dev-llama-3-tiny.yaml
+name: "dev-llama-3-tiny"
+
+description: |
+  This alias is for model with a high throughput. 
+config_file: |
+  name: dev-llama-3-tiny
+  mmap: true
+  backend: llama
+  f16: true 
+  gpu_layers: 35
+  parameters:
+    model: downloads/llama_backend/Llama-3.2-3B-Instruct-Q8_0.gguf
+  
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - <|eot_id|>
+  - <|end_of_text|>
+  template:
+    chat: |
+      <|begin_of_text|>{{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+  
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ toJson .FunctionCall -}}
+      {{ end -}}
+      <|eot_id|>
+    completion: |
+      {{.Input}}
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+  
+      You have access to the following functions:
+  
+      {{range .Functions}}
+      Use the function '{{.Name}}' to '{{.Description}}'
+      {{toJson .Parameters}}
+      {{end}}
+  
+      Think very carefully before calling functions.
+      If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
+  
+      <function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
+  
+      Reminder:
+      - If looking for real time information use relevant functions before falling back to searching on internet
+      - Function calls MUST follow the specified format, start with <function= and end with </function>
+      - Required parameters MUST be specified
+      - Only call one function at a time
+      - Put the entire function call reply on one line
+      <|eot_id|>
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+  function:
+    disable_no_action: true
+    grammar:
+      #disable: true
+      no_mixed_free_string: true
+      mixed_mode: true
+      schema_type: llama3.1 # or JSON is supported too (json)
+    response_regex:
+    - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
+  context_size: 8192
+
+  usage: |
+        curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+        }'
+files:
+- filename: "downloads/llama_backend/Llama-3.2-3B-Instruct-Q8_0.gguf"
+  sha256: ""
+  uri: "https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q8_0.gguf"
+  # uri: "https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf"
+         
\ No newline at end of file
--- a/index.yaml
+++ b/index.yaml
@@ -133,6 +133,14 @@
    - 8B Model
    - functions
    - deutsch
+- url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-llama-3-tiny.yaml
+  name: dev-llama-3-tiny
+  tags:
+    - gpu
+    - Text generation
+    - 3B Model
+    - functions
+    - deutsch

 - url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-mixtral-large.yaml
  name: dev-mixtral-large