From 2289669e493404a95c9ba243c1e04c998cac2bc2 Mon Sep 17 00:00:00 2001 From: Jacob Hoffmann <lc4415@partner.kit.edu> Date: Sat, 31 Aug 2024 15:52:02 +0000 Subject: [PATCH] Add new file --- llama-3.ymal | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 llama-3.ymal diff --git a/llama-3.ymal b/llama-3.ymal new file mode 100644 index 0000000..be989ad --- /dev/null +++ b/llama-3.ymal @@ -0,0 +1,83 @@ +name: "dev-llama-3-small" + +description: | + This alias is for model with a high throughput. +config_file: | + name: dev-llama-3-small + mmap: true + backend: llama + f16: true + gpu_layers: 35 + parameters: + model: downloads/llama_backend/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf + + stopwords: + - <|im_end|> + - <dummy32000> + - <|eot_id|> + - <|end_of_text|> + template: + chat: | + <|begin_of_text|>{{.Input }} + <|start_header_id|>assistant<|end_header_id|> + chat_message: | + <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content -}} + {{ else if .FunctionCall -}} + {{ toJson .FunctionCall -}} + {{ end -}} + <|eot_id|> + completion: | + {{.Input}} + function: | + <|start_header_id|>system<|end_header_id|> + + You have access to the following functions: + + {{range .Functions}} + Use the function '{{.Name}}' to '{{.Description}}' + {{toJson .Parameters}} + {{end}} + + Think very carefully before calling functions. + If a you choose to call a function ONLY reply in the following format with no prefix or suffix: + + <function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function> + + Reminder: + - If looking for real time information use relevant functions before falling back to searching on internet + - Function calls MUST follow the specified format, start with <function= and end with </function> + - Required parameters MUST be specified + - Only call one function at a time + - Put the entire function call reply on one line + <|eot_id|> + {{.Input }} + <|start_header_id|>assistant<|end_header_id|> + function: + disable_no_action: true + grammar: + #disable: true + no_mixed_free_string: true + mixed_mode: true + schema_type: llama3.1 # or JSON is supported too (json) + response_regex: + - <function=(?P<name>\w+)>(?P<arguments>.*)</function> + context_size: 8192 + + usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "gpt-4", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' +files: +- filename: "downloads/llama_backend/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf" + sha256: "" + uri: "https://huggingface.co/lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf" + # uri: "https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf" -- GitLab