From fbd39490b0e5842cea7073b4d81e3cb4b2e76964 Mon Sep 17 00:00:00 2001 From: Martin Forell <martin.forell@kit.edu> Date: Thu, 26 Sep 2024 07:10:28 +0000 Subject: [PATCH] llama 32 --- dev-llama-3-tiny.yaml | 84 +++++++++++++++++++++++++++++++++++++++++++ index.yaml | 8 +++++ 2 files changed, 92 insertions(+) create mode 100644 dev-llama-3-tiny.yaml diff --git a/dev-llama-3-tiny.yaml b/dev-llama-3-tiny.yaml new file mode 100644 index 0000000..276df89 --- /dev/null +++ b/dev-llama-3-tiny.yaml @@ -0,0 +1,84 @@ +name: "dev-llama-3-tiny" + +description: | + This alias is for model with a high throughput. +config_file: | + name: dev-llama-3-tiny + mmap: true + backend: llama + f16: true + gpu_layers: 35 + parameters: + model: downloads/llama_backend/Llama-3.2-3B-Instruct-Q8_0.gguf + + stopwords: + - <|im_end|> + - <dummy32000> + - <|eot_id|> + - <|end_of_text|> + template: + chat: | + <|begin_of_text|>{{.Input }} + <|start_header_id|>assistant<|end_header_id|> + chat_message: | + <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content -}} + {{ else if .FunctionCall -}} + {{ toJson .FunctionCall -}} + {{ end -}} + <|eot_id|> + completion: | + {{.Input}} + function: | + <|start_header_id|>system<|end_header_id|> + + You have access to the following functions: + + {{range .Functions}} + Use the function '{{.Name}}' to '{{.Description}}' + {{toJson .Parameters}} + {{end}} + + Think very carefully before calling functions. + If a you choose to call a function ONLY reply in the following format with no prefix or suffix: + + <function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function> + + Reminder: + - If looking for real time information use relevant functions before falling back to searching on internet + - Function calls MUST follow the specified format, start with <function= and end with </function> + - Required parameters MUST be specified + - Only call one function at a time + - Put the entire function call reply on one line + <|eot_id|> + {{.Input }} + <|start_header_id|>assistant<|end_header_id|> + function: + disable_no_action: true + grammar: + #disable: true + no_mixed_free_string: true + mixed_mode: true + schema_type: llama3.1 # or JSON is supported too (json) + response_regex: + - <function=(?P<name>\w+)>(?P<arguments>.*)</function> + context_size: 8192 + + usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "gpt-4", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' +files: +- filename: "downloads/llama_backend/Llama-3.2-3B-Instruct-Q8_0.gguf" + sha256: "" + uri: "https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q8_0.gguf" + # uri: "https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf" + \ No newline at end of file diff --git a/index.yaml b/index.yaml index 830b513..c61794e 100644 --- a/index.yaml +++ b/index.yaml @@ -133,6 +133,14 @@ - 8B Model - functions - deutsch +- url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-llama-3-tiny.yaml + name: dev-llama-3-tiny + tags: + - gpu + - Text generation + - 3B Model + - functions + - deutsch - url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-mixtral-large.yaml name: dev-mixtral-large -- GitLab