|
name: tinyllama-chat |
|
backend: transformers |
|
type: AutoModelForCausalLM |
|
|
|
parameters: |
|
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 |
|
temperature: 0.2 |
|
top_k: 40 |
|
top_p: 0.95 |
|
max_tokens: 4096 |
|
|
|
template: |
|
chat_message: | |
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} |
|
{{if .Content}}{{.Content}}{{end}}<|im_end|> |
|
chat: | |
|
{{.Input}} |
|
<|im_start|>assistant |
|
|
|
completion: | |
|
{{.Input}} |
|
|
|
stopwords: |
|
- <|im_end|> |
|
|
|
usage: | |
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ |
|
"model": "tinyllama-chat", |
|
"messages": [{"role": "user", "content": "Say this is a test!"}], |
|
"temperature": 0.7 |
|
}' |
|
|