From 7510865b7370a7e2561c4d74c0d95d364a1e8900 Mon Sep 17 00:00:00 2001 From: Martin Forell <martin.forell@kit.edu> Date: Sun, 8 Dec 2024 13:35:50 +0000 Subject: [PATCH] Update 2 files - /dev-llama-3-competence.yaml - /index.yaml --- dev-llama-3-competence.yaml | 62 +++++++++++++++++++++++++++++++++++++ index.yaml | 7 +++++ 2 files changed, 69 insertions(+) create mode 100644 dev-llama-3-competence.yaml diff --git a/dev-llama-3-competence.yaml b/dev-llama-3-competence.yaml new file mode 100644 index 0000000..09c6193 --- /dev/null +++ b/dev-llama-3-competence.yaml @@ -0,0 +1,62 @@ +name: "dev-llama-3-competence" + +description: | + This alias is for model with a high throughput. +config_file: | + name: dev-llama-3-competence + mmap: true + backend: llama + f16: true + gpu_layers: 85 + parameters: + model: downloads/llama_backend/competency-extraction-finetune-v2_q5_1.gguf + + template: + chat_message: | + <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content -}} + {{ else if .FunctionCall -}} + {{ toJson .FunctionCall -}} + {{ end -}} + <|eot_id|> + function: | + <|start_header_id|>system<|end_header_id|> + + You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + <tools> + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + </tools> + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|> + Function call: + chat: | + <|begin_of_text|>{{.Input }} + <|start_header_id|>assistant<|end_header_id|> + completion: | + {{.Input}} + context_size: 8192 + stopwords: + - <|im_end|> + - <dummy32000> + - "<|eot_id|>" + - <|end_of_text|> + usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "dev-llama-3", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' +files: +- filename: "downloads/llama_backend/competency-extraction-finetune-v2_q5_1.gguf" + sha256: "" + uri: "https://huggingface.co/zebrazinker/competency-extraction-finetune-v2/resolve/main/competency-extraction-finetune-v2_q5_1.gguf" + +# download model with huggingface-cli download bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF --local-dir /data/localai-data/downloads/llama_backend --include '*Q6_K*' diff --git a/index.yaml b/index.yaml index 909349a..f303edb 100644 --- a/index.yaml +++ b/index.yaml @@ -129,6 +129,13 @@ - Text generation - 70B Model - functions +- url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-llama-3-competence.yaml + name: dev-llama-3-competence + tags: + - gpu + - Text generation + - 70B Model + - functions - url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-llama-3-large-hermes.yaml name: dev-llama-3-large-hermes -- GitLab