From 12693fdceb82cd447812a03c124668cc9572d3ec Mon Sep 17 00:00:00 2001 From: Martin Forell <martin.forell@kit.edu> Date: Fri, 27 Sep 2024 08:34:52 +0000 Subject: [PATCH] test vllm --- dev-phi-3-vllm.yaml | 78 +++++++++++++++++++++++++++++++++++++++++++++ index.yaml | 6 ++++ 2 files changed, 84 insertions(+) create mode 100644 dev-phi-3-vllm.yaml diff --git a/dev-phi-3-vllm.yaml b/dev-phi-3-vllm.yaml new file mode 100644 index 0000000..9dcc028 --- /dev/null +++ b/dev-phi-3-vllm.yaml @@ -0,0 +1,78 @@ +name: "hermes-vllm" + +config_file: | + backend: vllm + parameters: + max_tokens: 8192 + model: "NousResearch/Hermes-3-Llama-3.1-8B" + context_size: 8192 + + stopwords: + - "<|im_end|>" + - "<dummy32000>" + - "<|eot_id|>" + - "<|end_of_text|>" + function: + disable_no_action: true + grammar: + # Uncomment the line below to enable grammar matching for JSON results if the model is breaking + # the output. This will make the model more accurate and won't break the JSON output. + # This however, will make parallel_calls not functional (it is a known bug) + # mixed_mode: true + disable: true + parallel_calls: true + expect_strings_after_json: true + json_regex_match: + - "(?s)<tool_call>(.*?)</tool_call>" + - "(?s)<tool_call>(.*)" + capture_llm_results: + - (?s)<scratchpad>(.*?)</scratchpad> + replace_llm_results: + - key: (?s)<scratchpad>(.*?)</scratchpad> + value: "" + + template: + use_tokenizer_template: true + chat: | + {{.Input -}} + <|im_start|>assistant + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} + {{- if .FunctionCall }} + <tool_call> + {{- else if eq .RoleName "tool" }} + <tool_response> + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + </tool_call> + {{- else if eq .RoleName "tool" }} + </tool_response> + {{- end }}<|im_end|> + completion: | + {{.Input}} + function: | + <|im_start|>system + You are a function calling AI model. + Here are the available tools: + <tools> + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + </tools> + You should call the tools provided to you sequentially + Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows: + <scratchpad> + {step-by-step reasoning and plan in bullet points} + </scratchpad> + For each function call return a json object with function name and arguments within <tool_call> XML tags as follows: + <tool_call> + {"arguments": <args-dict>, "name": <function-name>} + </tool_call><|im_end|> + {{.Input -}} + <|im_start|>assistant \ No newline at end of file diff --git a/index.yaml b/index.yaml index c61794e..b1a95aa 100644 --- a/index.yaml +++ b/index.yaml @@ -189,6 +189,12 @@ - 2B Model - Code completion +- url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-phi-3-vllm.yaml + name: hermes-vllm + tags: + - gpu + + - url: https://gitlab.kit.edu/kit/aifb/BIS/infrastruktur/localai/localai-model-gallery/-/raw/main/dev-testgen.yaml name: dev-testgen tags: -- GitLab