name: "dev-gemma-mini" description: | This is a model which is currently tested. config_file: | name: dev-gemma-mini mmap: true backend: llama f16: true gpu_layers: 35 threads: 12 parameters: model: downloads/llama_backend/gemma-2-2b-it-Q8_0.gguf roles: assistant: 'Assistant:' system: 'System:' user: 'User:' template: chat_message: |- <start_of_turn>{{if eq .RoleName "assistant" }}model{{else}}{{ .RoleName }}{{end}} {{ if .Content -}} {{.Content -}} {{ end -}}<end_of_turn> chat: | {{.Input }} <start_of_turn>model completion: | {{.Input}} stopwords: - '<|im_end|>' - '<end_of_turn>' - '<start_of_turn>' context_size: 4096 usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "gpt-4", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] }' files: - filename: "downloads/llama_backend/gemma-2-2b-it-Q8_0.gguf" sha256: "" uri: "https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q8_0.gguf"