From 12693fdceb82cd447812a03c124668cc9572d3ec Mon Sep 17 00:00:00 2001
From: Martin Forell <martin.forell@kit.edu>
Date: Fri, 27 Sep 2024 08:34:52 +0000
Subject: [PATCH] test vllm

---
 dev-phi-3-vllm.yaml | 78 +++++++++++++++++++++++++++++++++++++++++++++
 index.yaml          |  6 ++++
 2 files changed, 84 insertions(+)
 create mode 100644 dev-phi-3-vllm.yaml

diff --git a/dev-phi-3-vllm.yaml b/dev-phi-3-vllm.yaml
new file mode 100644
index 0000000..9dcc028
--- /dev/null
+++ b/dev-phi-3-vllm.yaml
@@ -0,0 +1,78 @@
+name: "hermes-vllm"
+
+config_file: |
+    backend: vllm
+    parameters:
+      max_tokens: 8192
+      model: "NousResearch/Hermes-3-Llama-3.1-8B"
+    context_size: 8192
+
+    stopwords:
+    - "<|im_end|>"
+    - "<dummy32000>"
+    - "<|eot_id|>"
+    - "<|end_of_text|>"
+    function:
+      disable_no_action: true
+      grammar:
+        # Uncomment the line below to enable grammar matching for JSON results if the model is breaking
+        # the output. This will make the model more accurate and won't break the JSON output.
+        # This however, will make parallel_calls not functional (it is a known bug)
+        # mixed_mode: true
+        disable: true
+        parallel_calls: true
+        expect_strings_after_json: true
+      json_regex_match:
+      - "(?s)<tool_call>(.*?)</tool_call>"
+      - "(?s)<tool_call>(.*)"
+      capture_llm_results:
+        - (?s)<scratchpad>(.*?)</scratchpad>
+      replace_llm_results:
+        - key: (?s)<scratchpad>(.*?)</scratchpad>
+          value: ""
+
+    template:
+      use_tokenizer_template: true
+      chat: |
+        {{.Input -}}
+        <|im_start|>assistant
+      chat_message: |
+        <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+        {{- if .FunctionCall }}
+        <tool_call>
+        {{- else if eq .RoleName "tool" }}
+        <tool_response>
+        {{- end }}
+        {{- if .Content}}
+        {{.Content }}
+        {{- end }}
+        {{- if .FunctionCall}}
+        {{toJson .FunctionCall}}
+        {{- end }}
+        {{- if .FunctionCall }}
+        </tool_call>
+        {{- else if eq .RoleName "tool" }}
+        </tool_response>
+        {{- end }}<|im_end|>
+      completion: |
+        {{.Input}}
+      function: |
+        <|im_start|>system
+        You are a function calling AI model.
+        Here are the available tools:
+        <tools>
+        {{range .Functions}}
+        {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+        {{end}}
+        </tools>
+        You should call the tools provided to you sequentially
+        Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
+        <scratchpad>
+        {step-by-step reasoning and plan in bullet points}
+        </scratchpad>
+        For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
+        <tool_call>
+        {"arguments": <args-dict>, "name": <function-name>}
+        </tool_call><|im_end|>
+        {{.Input -}}
+        <|im_start|>assistant
\ No newline at end of file
diff --git a/index.yaml b/index.yaml
index c61794e..b1a95aa 100644
--- a/index.yaml
+++ b/index.yaml
@@ -189,6 +189,12 @@
     - 2B Model
     - Code completion
 
+- url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-phi-3-vllm.yaml
+  name: hermes-vllm
+  tags:
+    - gpu
+
+
 - url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-testgen.yaml
   name: dev-testgen
   tags:
-- 
GitLab