Add new file

2289669e · Jacob Hoffmann · 7f26cb6f · 2289669e
Commit 2289669e authored 7 months ago by Jacob Hoffmann
--- a/llama-3.ymal
+++ b/llama-3.ymal
+name: "dev-llama-3-small"
+description: |
+  This alias is for model with a high throughput. 
+config_file: |
+  name: dev-llama-3-small
+  mmap: true
+  backend: llama
+  f16: true 
+  gpu_layers: 35
+  parameters:
+    model: downloads/llama_backend/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - <|eot_id|>
+  - <|end_of_text|>
+  template:
+    chat: |
+      <|begin_of_text|>{{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ toJson .FunctionCall -}}
+      {{ end -}}
+      <|eot_id|>
+    completion: |
+      {{.Input}}
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+      You have access to the following functions:
+      {{range .Functions}}
+      Use the function '{{.Name}}' to '{{.Description}}'
+      {{toJson .Parameters}}
+      {{end}}
+      Think very carefully before calling functions.
+      If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
+      <function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
+      Reminder:
+      - If looking for real time information use relevant functions before falling back to searching on internet
+      - Function calls MUST follow the specified format, start with <function= and end with </function>
+      - Required parameters MUST be specified
+      - Only call one function at a time
+      - Put the entire function call reply on one line
+      <|eot_id|>
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+  function:
+    disable_no_action: true
+    grammar:
+      #disable: true
+      no_mixed_free_string: true
+      mixed_mode: true
+      schema_type: llama3.1 # or JSON is supported too (json)
+    response_regex:
+    - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
+  context_size: 8192
+  usage: |
+        curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+        }'
+files:
+- filename: "downloads/llama_backend/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf"
+  sha256: ""
+  uri: "https://huggingface.co/lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf"
+  # uri: "https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf"