gemma.yaml

  
 name: "gemma-2b-it"

description: |
  This is the gemma-2b-it model

license: "https://huggingface.co/models?license=license%3Aapache-2.0/"


config_file: |
  name: gemma-2b-it
  context_size: 2048
  f16: true
  backend: llama
  mmap: true
  threads: 4
  parameters:
    model: model_data/text_generation/google/gemma/2b_it_v1p1.gguf
    temperature: 0.2
    top_k: 40
    top_p: 0.95
  template:
    chat_message: chatgemma
    chat: chatgemma-block
    completion: completion
  roles:
    assistant: 'Assistant:'
    system: 'System:'
    user: 'User:'
  gpu_layers: 35
  stopwords:
  - <|end_of_turn|>

prompt_templates:
- name: "chatgemma"
  content: |
    <start_of_turn>{{if eq .RoleName "assistant"}}model{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
    {{if .Content}}{{.Content}}{{end}}
    <end_of_turn>

- name: "chatgemma-block"
  content: |
    <bos>{{.Input}}
    <start_of_turn>model

- name: "completion"
  content: |
    {{.Input}}
    
files:
- filename: "model_data/text_generation/google/gemma/2b_it_v1p1.gguf"
  sha256: ""
  uri: "https://huggingface.co/google/gemma-1.1-2b-it-GGUF/resolve/main/2b_it_v1p1.gguf"