Spaces:
Running
Running
fix: catch errors more gracefully in reasoning specific calls
Browse files
src/lib/server/textGeneration/generate.ts
CHANGED
@@ -9,6 +9,7 @@ import type { TextGenerationContext } from "./types";
|
|
9 |
import type { EndpointMessage } from "../endpoints/endpoints";
|
10 |
import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
|
11 |
import { generateSummaryOfReasoning } from "./reasoning";
|
|
|
12 |
|
13 |
type GenerateContext = Omit<TextGenerationContext, "messages"> & { messages: EndpointMessage[] };
|
14 |
|
@@ -69,30 +70,35 @@ export async function* generate(
|
|
69 |
subtype: MessageReasoningUpdateType.Status,
|
70 |
status: "Summarizing reasoning...",
|
71 |
};
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
82 |
|
83 |
If the user is just having a casual conversation that doesn't require explanations, answer directly without explaining your steps, otherwise make sure to summarize step by step, make sure to skip dead-ends in your reasoning and removing excess detail.
|
84 |
|
85 |
Do not use prefixes such as Response: or Answer: when answering to the user.`,
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
96 |
}
|
97 |
|
98 |
yield {
|
@@ -143,9 +149,13 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
|
|
143 |
// create a new status every 5 seconds
|
144 |
if (new Date().getTime() - lastReasoningUpdate.getTime() > 4000) {
|
145 |
lastReasoningUpdate = new Date();
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
149 |
}
|
150 |
yield {
|
151 |
type: MessageUpdateType.Reasoning,
|
|
|
9 |
import type { EndpointMessage } from "../endpoints/endpoints";
|
10 |
import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
|
11 |
import { generateSummaryOfReasoning } from "./reasoning";
|
12 |
+
import { logger } from "../logger";
|
13 |
|
14 |
type GenerateContext = Omit<TextGenerationContext, "messages"> & { messages: EndpointMessage[] };
|
15 |
|
|
|
70 |
subtype: MessageReasoningUpdateType.Status,
|
71 |
status: "Summarizing reasoning...",
|
72 |
};
|
73 |
+
try {
|
74 |
+
const summary = yield* generateFromDefaultEndpoint({
|
75 |
+
messages: [
|
76 |
+
{
|
77 |
+
from: "user",
|
78 |
+
content: `Question: ${
|
79 |
+
messages[messages.length - 1].content
|
80 |
+
}\n\nReasoning: ${reasoningBuffer}`,
|
81 |
+
},
|
82 |
+
],
|
83 |
+
preprompt: `Your task is to summarize concisely all your reasoning steps and then give the final answer. Keep it short, one short paragraph at most. If the reasoning steps explicitly include a code solution, make sure to include it in your answer.
|
84 |
|
85 |
If the user is just having a casual conversation that doesn't require explanations, answer directly without explaining your steps, otherwise make sure to summarize step by step, make sure to skip dead-ends in your reasoning and removing excess detail.
|
86 |
|
87 |
Do not use prefixes such as Response: or Answer: when answering to the user.`,
|
88 |
+
generateSettings: {
|
89 |
+
max_new_tokens: 1024,
|
90 |
+
},
|
91 |
+
});
|
92 |
+
finalAnswer = summary;
|
93 |
+
yield {
|
94 |
+
type: MessageUpdateType.Reasoning,
|
95 |
+
subtype: MessageReasoningUpdateType.Status,
|
96 |
+
status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
|
97 |
+
};
|
98 |
+
} catch (e) {
|
99 |
+
finalAnswer = text;
|
100 |
+
logger.error(e);
|
101 |
+
}
|
102 |
}
|
103 |
|
104 |
yield {
|
|
|
149 |
// create a new status every 5 seconds
|
150 |
if (new Date().getTime() - lastReasoningUpdate.getTime() > 4000) {
|
151 |
lastReasoningUpdate = new Date();
|
152 |
+
try {
|
153 |
+
generateSummaryOfReasoning(reasoningBuffer).then((summary) => {
|
154 |
+
status = summary;
|
155 |
+
});
|
156 |
+
} catch (e) {
|
157 |
+
logger.error(e);
|
158 |
+
}
|
159 |
}
|
160 |
yield {
|
161 |
type: MessageUpdateType.Reasoning,
|
src/lib/server/textGeneration/reasoning.ts
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
|
2 |
|
3 |
import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
|
|
|
4 |
|
5 |
export async function generateSummaryOfReasoning(buffer: string): Promise<string> {
|
6 |
// debug 5s delay
|
@@ -21,10 +22,15 @@ export async function generateSummaryOfReasoning(buffer: string): Promise<string
|
|
21 |
max_new_tokens: 50,
|
22 |
},
|
23 |
})
|
24 |
-
)
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
return summary;
|
30 |
}
|
|
|
1 |
import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
|
2 |
|
3 |
import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
|
4 |
+
import { logger } from "../logger";
|
5 |
|
6 |
export async function generateSummaryOfReasoning(buffer: string): Promise<string> {
|
7 |
// debug 5s delay
|
|
|
22 |
max_new_tokens: 50,
|
23 |
},
|
24 |
})
|
25 |
+
)
|
26 |
+
.then((summary) => {
|
27 |
+
const parts = summary.split("...");
|
28 |
+
return parts[0] + "...";
|
29 |
+
})
|
30 |
+
.catch((e) => {
|
31 |
+
logger.error(e);
|
32 |
+
return "Reasoning...";
|
33 |
+
});
|
34 |
|
35 |
return summary;
|
36 |
}
|