dev-gemma-small.yaml

  
 name: "dev-gemma-small"

description: |
  This is a model which is currently tested.
config_file: |
  name: dev-gemma-small
  mmap: true
  backend: llama
  f16: true 
  gpu_layers: 35
  threads: 12
  parameters:
    model: downloads/llama_backend/gemma-2-9b-it-Q6_K_L.gguf
  roles:
    assistant: 'Assistant:'
    system: 'System:'
    user: 'User:'
  template:
    chat_message: |-
      <start_of_turn>{{if eq .RoleName "assistant" }}model{{else}}{{ .RoleName }}{{end}}
      {{ if .Content -}}
      {{.Content -}}
      {{ end -}}<end_of_turn>
    chat: |
      {{.Input }}
      <start_of_turn>model
    completion: |
      {{.Input}}
  stopwords:
  - '<|im_end|>'
  - '<end_of_turn>'
  - '<start_of_turn>'
  context_size: 8192


  usage: |
        curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
            "model": "gpt-4",
            "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
        }'
files:
- filename: "downloads/llama_backend/gemma-2-9b-it-Q6_K_L.gguf"
  sha256: ""
  uri: "https://huggingface.co/bartowski/gemma-2-9b-it-GGUF/resolve/main/gemma-2-9b-it-Q6_K_L.gguf"