diff --git a/dev-llama-3-competence.yaml b/dev-llama-3-competence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09c6193b39a98ce103ef11d72cbf0f84cd05d1e4 --- /dev/null +++ b/dev-llama-3-competence.yaml @@ -0,0 +1,62 @@ +name: "dev-llama-3-competence" + +description: | + This alias is for model with a high throughput. +config_file: | + name: dev-llama-3-competence + mmap: true + backend: llama + f16: true + gpu_layers: 85 + parameters: + model: downloads/llama_backend/competency-extraction-finetune-v2_q5_1.gguf + + template: + chat_message: | + <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content -}} + {{ else if .FunctionCall -}} + {{ toJson .FunctionCall -}} + {{ end -}} + <|eot_id|> + function: | + <|start_header_id|>system<|end_header_id|> + + You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + <tools> + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + </tools> + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|> + Function call: + chat: | + <|begin_of_text|>{{.Input }} + <|start_header_id|>assistant<|end_header_id|> + completion: | + {{.Input}} + context_size: 8192 + stopwords: + - <|im_end|> + - <dummy32000> + - "<|eot_id|>" + - <|end_of_text|> + usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "dev-llama-3", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' +files: +- filename: "downloads/llama_backend/competency-extraction-finetune-v2_q5_1.gguf" + sha256: "" + uri: "https://huggingface.co/zebrazinker/competency-extraction-finetune-v2/resolve/main/competency-extraction-finetune-v2_q5_1.gguf" + +# download model with huggingface-cli download bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF --local-dir /data/localai-data/downloads/llama_backend --include '*Q6_K*' diff --git a/index.yaml b/index.yaml index 909349a5c6249b22d64e538304a968788fcf7baa..f303edbaf5040517b4e1a48355c836aeacc3f9c2 100644 --- a/index.yaml +++ b/index.yaml @@ -129,6 +129,13 @@ - Text generation - 70B Model - functions +- url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-llama-3-competence.yaml + name: dev-llama-3-competence + tags: + - gpu + - Text generation + - 70B Model + - functions - url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-llama-3-large-hermes.yaml name: dev-llama-3-large-hermes