Spaces:
Running
Running
feat: add `deepseek-ai/DeepSeek-R1-Distill-Qwen-32B` to HuggingChat config (#1660)
Browse files* feat: add `deepseek-ai/DeepSeek-R1-Distill-Qwen-32B` to HuggingChat config
* feat: hide reasoning modal when its empty
* fix: put r1 model higher in list
chart/env/prod.yaml
CHANGED
@@ -136,23 +136,15 @@ envVars:
|
|
136 |
]
|
137 |
},
|
138 |
{
|
139 |
-
"name": "
|
140 |
-
"
|
141 |
-
"
|
142 |
-
"
|
143 |
-
"
|
144 |
-
"description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
|
145 |
"reasoning": {
|
146 |
-
"type": "
|
147 |
-
|
148 |
-
|
149 |
-
"stop": ["<|im_end|>"],
|
150 |
-
"truncate": 12288,
|
151 |
-
"max_new_tokens": 4096,
|
152 |
-
"temperature": 0.7,
|
153 |
-
"top_k": 20,
|
154 |
-
"top_p": 0.8,
|
155 |
-
"repetition_penalty": 1.05
|
156 |
},
|
157 |
"promptExamples": [
|
158 |
{
|
@@ -167,6 +159,12 @@ envVars:
|
|
167 |
"title": "Measuring 6 liters",
|
168 |
"prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
|
169 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
]
|
171 |
},
|
172 |
{
|
@@ -196,10 +194,46 @@ envVars:
|
|
196 |
"prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
|
197 |
}
|
198 |
],
|
199 |
-
"endpoints": [
|
200 |
-
|
201 |
-
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
},
|
204 |
{
|
205 |
"name": "Qwen/Qwen2.5-Coder-32B-Instruct",
|
@@ -228,10 +262,12 @@ envVars:
|
|
228 |
"prompt": "Generate a snazzy static landing page for a local coffee shop using HTML and CSS. You can use tailwind using <script src='https://cdn.tailwindcss.com'></script>."
|
229 |
}
|
230 |
],
|
231 |
-
"endpoints": [
|
232 |
-
|
233 |
-
|
234 |
-
|
|
|
|
|
235 |
},
|
236 |
{
|
237 |
"name": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
@@ -245,19 +281,21 @@ envVars:
|
|
245 |
"truncate": 14336,
|
246 |
"max_new_tokens": 1536
|
247 |
},
|
248 |
-
"endpoints": [
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
"
|
253 |
-
"
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
|
|
|
|
258 |
}
|
259 |
}
|
260 |
-
|
261 |
},
|
262 |
{
|
263 |
"name": "NousResearch/Hermes-3-Llama-3.1-8B",
|
|
|
136 |
]
|
137 |
},
|
138 |
{
|
139 |
+
"name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
140 |
+
"modelUrl": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
141 |
+
"websiteUrl": "https://deepseek.com/",
|
142 |
+
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/deepseek-logo.png",
|
143 |
+
"description": "The first reasoning model from DeepSeek, distilled into a 32B dense model. Outperforms o1-mini on multiple benchmarks.",
|
|
|
144 |
"reasoning": {
|
145 |
+
"type": "tokens",
|
146 |
+
"beginToken": "<think>",
|
147 |
+
"endToken": "</think>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
},
|
149 |
"promptExamples": [
|
150 |
{
|
|
|
159 |
"title": "Measuring 6 liters",
|
160 |
"prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
|
161 |
}
|
162 |
+
],
|
163 |
+
"endpoints": [
|
164 |
+
{
|
165 |
+
"type": "openai",
|
166 |
+
"baseURL": "https://api-inference.huggingface.co/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/v1"
|
167 |
+
}
|
168 |
]
|
169 |
},
|
170 |
{
|
|
|
194 |
"prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
|
195 |
}
|
196 |
],
|
197 |
+
"endpoints": [
|
198 |
+
{
|
199 |
+
"type": "openai",
|
200 |
+
"baseURL": "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/v1"
|
201 |
+
}
|
202 |
+
]
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"name": "Qwen/QwQ-32B-Preview",
|
206 |
+
"preprompt": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.",
|
207 |
+
"modelUrl": "https://huggingface.co/Qwen/QwQ-32B-Preview",
|
208 |
+
"websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
|
209 |
+
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
|
210 |
+
"description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
|
211 |
+
"reasoning": {
|
212 |
+
"type": "summarize"
|
213 |
+
},
|
214 |
+
"parameters": {
|
215 |
+
"stop": ["<|im_end|>"],
|
216 |
+
"truncate": 12288,
|
217 |
+
"max_new_tokens": 4096,
|
218 |
+
"temperature": 0.7,
|
219 |
+
"top_k": 20,
|
220 |
+
"top_p": 0.8,
|
221 |
+
"repetition_penalty": 1.05
|
222 |
+
},
|
223 |
+
"promptExamples": [
|
224 |
+
{
|
225 |
+
"title": "Rs in strawberry",
|
226 |
+
"prompt": "how many R in strawberry?"
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"title": "Larger number",
|
230 |
+
"prompt": "9.11 or 9.9 which number is larger?"
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"title": "Measuring 6 liters",
|
234 |
+
"prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
|
235 |
+
}
|
236 |
+
]
|
237 |
},
|
238 |
{
|
239 |
"name": "Qwen/Qwen2.5-Coder-32B-Instruct",
|
|
|
262 |
"prompt": "Generate a snazzy static landing page for a local coffee shop using HTML and CSS. You can use tailwind using <script src='https://cdn.tailwindcss.com'></script>."
|
263 |
}
|
264 |
],
|
265 |
+
"endpoints": [
|
266 |
+
{
|
267 |
+
"type": "openai",
|
268 |
+
"baseURL": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1"
|
269 |
+
}
|
270 |
+
]
|
271 |
},
|
272 |
{
|
273 |
"name": "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
|
|
281 |
"truncate": 14336,
|
282 |
"max_new_tokens": 1536
|
283 |
},
|
284 |
+
"endpoints": [
|
285 |
+
{
|
286 |
+
"type": "openai",
|
287 |
+
"baseURL": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1",
|
288 |
+
"multimodal": {
|
289 |
+
"image": {
|
290 |
+
"maxSizeInMB": 10,
|
291 |
+
"maxWidth": 560,
|
292 |
+
"maxHeight": 560,
|
293 |
+
"supportedMimeTypes": ["image/png", "image/jpeg", "image/webp"],
|
294 |
+
"preferredMimeType": "image/webp"
|
295 |
+
}
|
296 |
}
|
297 |
}
|
298 |
+
]
|
299 |
},
|
300 |
{
|
301 |
"name": "NousResearch/Hermes-3-Llama-3.1-8B",
|
src/lib/components/chat/ChatMessage.svelte
CHANGED
@@ -217,7 +217,7 @@
|
|
217 |
{#if searchUpdates && searchUpdates.length > 0}
|
218 |
<OpenWebSearchResults webSearchMessages={searchUpdates} />
|
219 |
{/if}
|
220 |
-
{#if reasoningUpdates && reasoningUpdates.length > 0}
|
221 |
{@const summaries = reasoningUpdates
|
222 |
.filter((u) => u.subtype === MessageReasoningUpdateType.Status)
|
223 |
.map((u) => u.status)}
|
|
|
217 |
{#if searchUpdates && searchUpdates.length > 0}
|
218 |
<OpenWebSearchResults webSearchMessages={searchUpdates} />
|
219 |
{/if}
|
220 |
+
{#if reasoningUpdates && reasoningUpdates.length > 0 && message.reasoning && message.reasoning.trim().length > 0}
|
221 |
{@const summaries = reasoningUpdates
|
222 |
.filter((u) => u.subtype === MessageReasoningUpdateType.Status)
|
223 |
.map((u) => u.status)}
|
src/lib/server/textGeneration/generate.ts
CHANGED
@@ -101,15 +101,26 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
|
|
101 |
finalAnswer = text;
|
102 |
logger.error(e);
|
103 |
}
|
104 |
-
}
|
|
|
|
|
|
|
|
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
}
|
114 |
|
115 |
if (model.reasoning && model.reasoning.type === "tokens") {
|
@@ -121,6 +132,7 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
|
|
121 |
subtype: MessageReasoningUpdateType.Status,
|
122 |
status: "Started thinking...",
|
123 |
};
|
|
|
124 |
} else if (output.token.text === model.reasoning.endToken) {
|
125 |
reasoning = false;
|
126 |
reasoningBuffer += output.token.text;
|
@@ -129,6 +141,7 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
|
|
129 |
subtype: MessageReasoningUpdateType.Status,
|
130 |
status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
|
131 |
};
|
|
|
132 |
}
|
133 |
}
|
134 |
// ignore special tokens
|
|
|
101 |
finalAnswer = text;
|
102 |
logger.error(e);
|
103 |
}
|
104 |
+
} else if (model.reasoning && model.reasoning.type === "tokens") {
|
105 |
+
// make sure to remove the content of the reasoning buffer from
|
106 |
+
// the final answer to avoid duplication
|
107 |
+
const beginIndex = reasoningBuffer.indexOf(model.reasoning.beginToken);
|
108 |
+
const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);
|
109 |
|
110 |
+
if (beginIndex !== -1 && endIndex !== -1) {
|
111 |
+
// Remove the reasoning section (including tokens) from final answer
|
112 |
+
finalAnswer =
|
113 |
+
text.slice(0, beginIndex) + text.slice(endIndex + model.reasoning.endToken.length);
|
114 |
+
}
|
115 |
+
|
116 |
+
yield {
|
117 |
+
type: MessageUpdateType.FinalAnswer,
|
118 |
+
text: finalAnswer,
|
119 |
+
interrupted,
|
120 |
+
webSources: output.webSources,
|
121 |
+
};
|
122 |
+
continue;
|
123 |
+
}
|
124 |
}
|
125 |
|
126 |
if (model.reasoning && model.reasoning.type === "tokens") {
|
|
|
132 |
subtype: MessageReasoningUpdateType.Status,
|
133 |
status: "Started thinking...",
|
134 |
};
|
135 |
+
continue;
|
136 |
} else if (output.token.text === model.reasoning.endToken) {
|
137 |
reasoning = false;
|
138 |
reasoningBuffer += output.token.text;
|
|
|
141 |
subtype: MessageReasoningUpdateType.Status,
|
142 |
status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
|
143 |
};
|
144 |
+
continue;
|
145 |
}
|
146 |
}
|
147 |
// ignore special tokens
|