diff --git a/aie-gemma-finetuned.yaml b/aie-gemma-finetuned.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca2209ded0411fb4e9067e37548865a5b65f0ca1 --- /dev/null +++ b/aie-gemma-finetuned.yaml @@ -0,0 +1,44 @@ +name: "aie-gemma-finetuned" + +description: | + This is a model which is currently tested. +config_file: | + name: aie-gemma-finetuned + mmap: true + backend: llama + f16: true + gpu_layers: 35 + threads: 12 + parameters: + model: downloads/llama_backend/AIE_Gemma2_2B_IT_Q4_K_M.gguf + roles: + assistant: 'Assistant:' + system: 'System:' + user: 'User:' + template: + chat_message: |- + <start_of_turn>{{if eq .RoleName "assistant" }}model{{else}}{{ .RoleName }}{{end}} + {{ if .Content -}} + {{.Content -}} + {{ end -}}<end_of_turn> + chat: | + {{.Input }} + <start_of_turn>model + completion: | + {{.Input}} + stopwords: + - '<|im_end|>' + - '<end_of_turn>' + - '<start_of_turn>' + context_size: 4096 + + + usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "gpt-4", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' +files: +- filename: "downloads/llama_backend/AIE_Gemma2_2B_IT_Q4_K_M.gguf" + sha256: "" + uri: "https://huggingface.co/Phabby/AIE-Gemma-2-2B-IT/blob/main/AIE_Gemma2_2B_IT_Q4_K_M.gguf"