-
Martin Forell authoredMartin Forell authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
dev-gemma-small.yaml 1.15 KiB
name: "dev-gemma-small"
description: |
This is a model which is currently tested.
config_file: |
name: dev-gemma-small
mmap: true
backend: llama
f16: true
gpu_layers: 35
threads: 12
parameters:
model: downloads/llama_backend/gemma-2-9b-it-Q6_K_L.gguf
roles:
assistant: 'Assistant:'
system: 'System:'
user: 'User:'
template:
chat_message: |-
<start_of_turn>{{if eq .RoleName "assistant" }}model{{else}}{{ .RoleName }}{{end}}
{{ if .Content -}}
{{.Content -}}
{{ end -}}<end_of_turn>
chat: |
{{.Input }}
<start_of_turn>model
completion: |
{{.Input}}
stopwords:
- '<|im_end|>'
- '<end_of_turn>'
- '<start_of_turn>'
context_size: 8192
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'
files:
- filename: "downloads/llama_backend/gemma-2-9b-it-Q6_K_L.gguf"
sha256: ""
uri: "https://huggingface.co/bartowski/gemma-2-9b-it-GGUF/resolve/main/gemma-2-9b-it-Q6_K_L.gguf"