Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
fix: handle no beginToken for token based reasoning models (#1713)
Browse files* refactor: remove debug console log in updateLocalEnv script
* fix: handle no beginToken for token-based reasoning models
chart/env/prod.yaml
CHANGED
@@ -139,7 +139,7 @@ envVars:
|
|
139 |
"description": "The first reasoning model from DeepSeek, distilled into a 32B dense model. Outperforms o1-mini on multiple benchmarks.",
|
140 |
"reasoning": {
|
141 |
"type": "tokens",
|
142 |
-
"beginToken": "
|
143 |
"endToken": "</think>"
|
144 |
},
|
145 |
"promptExamples": [
|
|
|
139 |
"description": "The first reasoning model from DeepSeek, distilled into a 32B dense model. Outperforms o1-mini on multiple benchmarks.",
|
140 |
"reasoning": {
|
141 |
"type": "tokens",
|
142 |
+
"beginToken": "",
|
143 |
"endToken": "</think>"
|
144 |
},
|
145 |
"promptExamples": [
|
scripts/updateLocalEnv.ts
CHANGED
@@ -30,7 +30,5 @@ full_config = full_config.replaceAll(
|
|
30 |
"https://api-inference.huggingface.co"
|
31 |
);
|
32 |
|
33 |
-
console.log(full_config);
|
34 |
-
|
35 |
// Write full_config to .env.local
|
36 |
fs.writeFileSync(".env.local", full_config);
|
|
|
30 |
"https://api-inference.huggingface.co"
|
31 |
);
|
32 |
|
|
|
|
|
33 |
// Write full_config to .env.local
|
34 |
fs.writeFileSync(".env.local", full_config);
|
src/lib/server/models.ts
CHANGED
@@ -24,7 +24,7 @@ const reasoningSchema = z.union([
|
|
24 |
}),
|
25 |
z.object({
|
26 |
type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer
|
27 |
-
beginToken: z.string(),
|
28 |
endToken: z.string(),
|
29 |
}),
|
30 |
z.object({
|
|
|
24 |
}),
|
25 |
z.object({
|
26 |
type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer
|
27 |
+
beginToken: z.string(), // empty string means the model starts in reasoning mode
|
28 |
endToken: z.string(),
|
29 |
}),
|
30 |
z.object({
|
src/lib/server/textGeneration/generate.ts
CHANGED
@@ -27,7 +27,10 @@ export async function* generate(
|
|
27 |
const startTime = new Date();
|
28 |
if (
|
29 |
model.reasoning &&
|
30 |
-
|
|
|
|
|
|
|
31 |
) {
|
32 |
// if the model has reasoning in regex or summarize mode, it starts in reasoning mode
|
33 |
// and we extract the answer from the reasoning
|
@@ -104,7 +107,11 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
|
|
104 |
} else if (model.reasoning && model.reasoning.type === "tokens") {
|
105 |
// make sure to remove the content of the reasoning buffer from
|
106 |
// the final answer to avoid duplication
|
107 |
-
|
|
|
|
|
|
|
|
|
108 |
const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);
|
109 |
|
110 |
if (beginIndex !== -1 && endIndex !== -1) {
|
|
|
27 |
const startTime = new Date();
|
28 |
if (
|
29 |
model.reasoning &&
|
30 |
+
// if the beginToken is an empty string, the model starts in reasoning mode
|
31 |
+
(model.reasoning.type === "regex" ||
|
32 |
+
model.reasoning.type === "summarize" ||
|
33 |
+
(model.reasoning.type === "tokens" && model.reasoning.beginToken === ""))
|
34 |
) {
|
35 |
// if the model has reasoning in regex or summarize mode, it starts in reasoning mode
|
36 |
// and we extract the answer from the reasoning
|
|
|
107 |
} else if (model.reasoning && model.reasoning.type === "tokens") {
|
108 |
// make sure to remove the content of the reasoning buffer from
|
109 |
// the final answer to avoid duplication
|
110 |
+
|
111 |
+
// if the beginToken is an empty string, we don't need to remove anything
|
112 |
+
const beginIndex = model.reasoning.beginToken
|
113 |
+
? reasoningBuffer.indexOf(model.reasoning.beginToken)
|
114 |
+
: 0;
|
115 |
const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);
|
116 |
|
117 |
if (beginIndex !== -1 && endIndex !== -1) {
|