nsarrazin HF Staff commited on
Commit
a38e9de
·
unverified ·
1 Parent(s): 3c216a4

feat: add `deepseek-ai/DeepSeek-R1-Distill-Qwen-32B` to HuggingChat config (#1660)

Browse files

* feat: add `deepseek-ai/DeepSeek-R1-Distill-Qwen-32B` to HuggingChat config

* feat: hide reasoning modal when its empty

* fix: put r1 model higher in list

chart/env/prod.yaml CHANGED
@@ -136,23 +136,15 @@ envVars:
136
  ]
137
  },
138
  {
139
- "name": "Qwen/QwQ-32B-Preview",
140
- "preprompt": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.",
141
- "modelUrl": "https://huggingface.co/Qwen/QwQ-32B-Preview",
142
- "websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
143
- "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
144
- "description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
145
  "reasoning": {
146
- "type": "summarize"
147
- },
148
- "parameters": {
149
- "stop": ["<|im_end|>"],
150
- "truncate": 12288,
151
- "max_new_tokens": 4096,
152
- "temperature": 0.7,
153
- "top_k": 20,
154
- "top_p": 0.8,
155
- "repetition_penalty": 1.05
156
  },
157
  "promptExamples": [
158
  {
@@ -167,6 +159,12 @@ envVars:
167
  "title": "Measuring 6 liters",
168
  "prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
169
  }
 
 
 
 
 
 
170
  ]
171
  },
172
  {
@@ -196,10 +194,46 @@ envVars:
196
  "prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
197
  }
198
  ],
199
- "endpoints": [{
200
- "type": "openai",
201
- "baseURL": "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/v1"
202
- }]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  },
204
  {
205
  "name": "Qwen/Qwen2.5-Coder-32B-Instruct",
@@ -228,10 +262,12 @@ envVars:
228
  "prompt": "Generate a snazzy static landing page for a local coffee shop using HTML and CSS. You can use tailwind using <script src='https://cdn.tailwindcss.com'></script>."
229
  }
230
  ],
231
- "endpoints": [{
232
- "type": "openai",
233
- "baseURL": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1"
234
- }]
 
 
235
  },
236
  {
237
  "name": "meta-llama/Llama-3.2-11B-Vision-Instruct",
@@ -245,19 +281,21 @@ envVars:
245
  "truncate": 14336,
246
  "max_new_tokens": 1536
247
  },
248
- "endpoints": [{
249
- "type": "openai",
250
- "baseURL": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1",
251
- "multimodal": {
252
- "image": {
253
- "maxSizeInMB": 10,
254
- "maxWidth": 560,
255
- "maxHeight": 560,
256
- "supportedMimeTypes": ["image/png", "image/jpeg", "image/webp"],
257
- "preferredMimeType": "image/webp"
 
 
258
  }
259
  }
260
- }]
261
  },
262
  {
263
  "name": "NousResearch/Hermes-3-Llama-3.1-8B",
 
136
  ]
137
  },
138
  {
139
+ "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
140
+ "modelUrl": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
141
+ "websiteUrl": "https://deepseek.com/",
142
+ "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/deepseek-logo.png",
143
+ "description": "The first reasoning model from DeepSeek, distilled into a 32B dense model. Outperforms o1-mini on multiple benchmarks.",
 
144
  "reasoning": {
145
+ "type": "tokens",
146
+ "beginToken": "<think>",
147
+ "endToken": "</think>"
 
 
 
 
 
 
 
148
  },
149
  "promptExamples": [
150
  {
 
159
  "title": "Measuring 6 liters",
160
  "prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
161
  }
162
+ ],
163
+ "endpoints": [
164
+ {
165
+ "type": "openai",
166
+ "baseURL": "https://api-inference.huggingface.co/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/v1"
167
+ }
168
  ]
169
  },
170
  {
 
194
  "prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
195
  }
196
  ],
197
+ "endpoints": [
198
+ {
199
+ "type": "openai",
200
+ "baseURL": "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/v1"
201
+ }
202
+ ]
203
+ },
204
+ {
205
+ "name": "Qwen/QwQ-32B-Preview",
206
+ "preprompt": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.",
207
+ "modelUrl": "https://huggingface.co/Qwen/QwQ-32B-Preview",
208
+ "websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
209
+ "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
210
+ "description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
211
+ "reasoning": {
212
+ "type": "summarize"
213
+ },
214
+ "parameters": {
215
+ "stop": ["<|im_end|>"],
216
+ "truncate": 12288,
217
+ "max_new_tokens": 4096,
218
+ "temperature": 0.7,
219
+ "top_k": 20,
220
+ "top_p": 0.8,
221
+ "repetition_penalty": 1.05
222
+ },
223
+ "promptExamples": [
224
+ {
225
+ "title": "Rs in strawberry",
226
+ "prompt": "how many R in strawberry?"
227
+ },
228
+ {
229
+ "title": "Larger number",
230
+ "prompt": "9.11 or 9.9 which number is larger?"
231
+ },
232
+ {
233
+ "title": "Measuring 6 liters",
234
+ "prompt": "I have a 6- and a 12-liter jug. I want to measure exactly 6 liters."
235
+ }
236
+ ]
237
  },
238
  {
239
  "name": "Qwen/Qwen2.5-Coder-32B-Instruct",
 
262
  "prompt": "Generate a snazzy static landing page for a local coffee shop using HTML and CSS. You can use tailwind using <script src='https://cdn.tailwindcss.com'></script>."
263
  }
264
  ],
265
+ "endpoints": [
266
+ {
267
+ "type": "openai",
268
+ "baseURL": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1"
269
+ }
270
+ ]
271
  },
272
  {
273
  "name": "meta-llama/Llama-3.2-11B-Vision-Instruct",
 
281
  "truncate": 14336,
282
  "max_new_tokens": 1536
283
  },
284
+ "endpoints": [
285
+ {
286
+ "type": "openai",
287
+ "baseURL": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1",
288
+ "multimodal": {
289
+ "image": {
290
+ "maxSizeInMB": 10,
291
+ "maxWidth": 560,
292
+ "maxHeight": 560,
293
+ "supportedMimeTypes": ["image/png", "image/jpeg", "image/webp"],
294
+ "preferredMimeType": "image/webp"
295
+ }
296
  }
297
  }
298
+ ]
299
  },
300
  {
301
  "name": "NousResearch/Hermes-3-Llama-3.1-8B",
src/lib/components/chat/ChatMessage.svelte CHANGED
@@ -217,7 +217,7 @@
217
  {#if searchUpdates && searchUpdates.length > 0}
218
  <OpenWebSearchResults webSearchMessages={searchUpdates} />
219
  {/if}
220
- {#if reasoningUpdates && reasoningUpdates.length > 0}
221
  {@const summaries = reasoningUpdates
222
  .filter((u) => u.subtype === MessageReasoningUpdateType.Status)
223
  .map((u) => u.status)}
 
217
  {#if searchUpdates && searchUpdates.length > 0}
218
  <OpenWebSearchResults webSearchMessages={searchUpdates} />
219
  {/if}
220
+ {#if reasoningUpdates && reasoningUpdates.length > 0 && message.reasoning && message.reasoning.trim().length > 0}
221
  {@const summaries = reasoningUpdates
222
  .filter((u) => u.subtype === MessageReasoningUpdateType.Status)
223
  .map((u) => u.status)}
src/lib/server/textGeneration/generate.ts CHANGED
@@ -101,15 +101,26 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
101
  finalAnswer = text;
102
  logger.error(e);
103
  }
104
- }
 
 
 
 
105
 
106
- yield {
107
- type: MessageUpdateType.FinalAnswer,
108
- text: finalAnswer,
109
- interrupted,
110
- webSources: output.webSources,
111
- };
112
- continue;
 
 
 
 
 
 
 
113
  }
114
 
115
  if (model.reasoning && model.reasoning.type === "tokens") {
@@ -121,6 +132,7 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
121
  subtype: MessageReasoningUpdateType.Status,
122
  status: "Started thinking...",
123
  };
 
124
  } else if (output.token.text === model.reasoning.endToken) {
125
  reasoning = false;
126
  reasoningBuffer += output.token.text;
@@ -129,6 +141,7 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
129
  subtype: MessageReasoningUpdateType.Status,
130
  status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
131
  };
 
132
  }
133
  }
134
  // ignore special tokens
 
101
  finalAnswer = text;
102
  logger.error(e);
103
  }
104
+ } else if (model.reasoning && model.reasoning.type === "tokens") {
105
+ // make sure to remove the content of the reasoning buffer from
106
+ // the final answer to avoid duplication
107
+ const beginIndex = reasoningBuffer.indexOf(model.reasoning.beginToken);
108
+ const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);
109
 
110
+ if (beginIndex !== -1 && endIndex !== -1) {
111
+ // Remove the reasoning section (including tokens) from final answer
112
+ finalAnswer =
113
+ text.slice(0, beginIndex) + text.slice(endIndex + model.reasoning.endToken.length);
114
+ }
115
+
116
+ yield {
117
+ type: MessageUpdateType.FinalAnswer,
118
+ text: finalAnswer,
119
+ interrupted,
120
+ webSources: output.webSources,
121
+ };
122
+ continue;
123
+ }
124
  }
125
 
126
  if (model.reasoning && model.reasoning.type === "tokens") {
 
132
  subtype: MessageReasoningUpdateType.Status,
133
  status: "Started thinking...",
134
  };
135
+ continue;
136
  } else if (output.token.text === model.reasoning.endToken) {
137
  reasoning = false;
138
  reasoningBuffer += output.token.text;
 
141
  subtype: MessageReasoningUpdateType.Status,
142
  status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
143
  };
144
+ continue;
145
  }
146
  }
147
  // ignore special tokens