diff --git a/dev-llama-3-large-hermes.yaml b/dev-llama-3-large-hermes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c929adeabc95b29367314c3c2d6e1b12bec07df1 --- /dev/null +++ b/dev-llama-3-large-hermes.yaml @@ -0,0 +1,61 @@ +name: "dev-llama-3-large-hermes" + +description: | + This alias is for model with a high throughput. +config_file: | + name: dev-llama-3-large-hermes + mmap: true + backend: llama + f16: true + gpu_layers: 85 + parameters: + model: downloads/llama_backend/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf + + template: + chat_message: | + <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content -}} + {{ else if .FunctionCall -}} + {{ toJson .FunctionCall -}} + {{ end -}} + <|eot_id|> + function: | + <|start_header_id|>system<|end_header_id|> + + You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + <tools> + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + </tools> + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|> + Function call: + chat: | + <|begin_of_text|>{{.Input }} + <|start_header_id|>assistant<|end_header_id|> + completion: | + {{.Input}} + context_size: 8192 + stopwords: + - <|im_end|> + - <dummy32000> + - "<|eot_id|>" + - <|end_of_text|> + usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "dev-llama-3", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' +files: +- filename: "downloads/llama_backend/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf" + sha256: "" + uri: "https://huggingface.co/bartowski/Hermes-2-Theta-Llama-3-70B-GGUF/resolve/main/Hermes-2-Theta-Llama-3-70B-Q5_K_M.gguf" + \ No newline at end of file diff --git a/index.yaml b/index.yaml index 82cc249f1f4a587ed5b9b753b002e34eb1735d78..06ba8feaf48f10168f6effca941e70e597aa4329 100644 --- a/index.yaml +++ b/index.yaml @@ -129,6 +129,14 @@ - Text generation - 70B Model - functions + +- url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-llama-3-large-hermes.yaml + name: dev-llama-3-large-hermes + tags: + - gpu + - Text generation + - 70B Model + - functions - url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-llama-3-small.yaml name: dev-llama-3-small