backend: llama-cpp
context_size: 4096
f16: true
threads: 6
gpu_layers: 35
mmap: true
name: llava
roles:
  user: "USER:"
  assistant: "ASSISTANT:"
  system: "SYSTEM:"
parameters:
  model: ggml-model-q4_k.gguf
  temperature: 0.2
  top_k: 40
  top_p: 0.95

template:
  chat: chat-simple-llava

mmproj: mmproj-model-f16.gguf

prompt_templates:
- name: "chat-simple-llava"
  content: |
    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
    {{.Input}}
    ASSISTANT: