Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: UI for advanced reasoning models (#1605)
Browse files* feat: add a reasoning dropdown for CoT models
* feat: add status updates
* fix: various cleanups
- pass content & status to result dropdown
- dont store streaming updates in db
- make status generation non blocking
* fix: make sure not to push reasoning token stream to db
* feat: add time indicator and make the ui match websearch
* fix: change in status update & prompt
- chart/env/prod.yaml +3 -0
- src/lib/components/OpenWebSearchResults.svelte +1 -2
- src/lib/components/chat/ChatMessage.svelte +29 -6
- src/lib/components/chat/MarkdownRenderer.svelte +11 -15
- src/lib/components/chat/OpenReasoningResults.svelte +74 -0
- src/lib/server/generateFromDefaultEndpoint.ts +7 -2
- src/lib/server/models.ts +16 -0
- src/lib/server/textGeneration/generate.ts +111 -3
- src/lib/server/textGeneration/reasoning.ts +30 -0
- src/lib/server/textGeneration/title.ts +11 -8
- src/lib/server/websearch/search/generateQuery.ts +10 -7
- src/lib/types/Message.ts +2 -0
- src/lib/types/MessageUpdate.ts +22 -1
- src/lib/utils/getReturnFromGenerator.ts +7 -0
- src/routes/conversation/[id]/+page.svelte +12 -7
- src/routes/conversation/[id]/+server.ts +11 -0
chart/env/prod.yaml
CHANGED
@@ -144,6 +144,9 @@ envVars:
|
|
144 |
"websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
|
145 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
|
146 |
"description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
|
|
|
|
|
|
|
147 |
"parameters": {
|
148 |
"stop": ["<|im_end|>"],
|
149 |
"truncate": 12288,
|
|
|
144 |
"websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
|
145 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
|
146 |
"description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
|
147 |
+
"reasoning": {
|
148 |
+
"type": "summarize"
|
149 |
+
},
|
150 |
"parameters": {
|
151 |
"stop": ["<|im_end|>"],
|
152 |
"truncate": 12288,
|
src/lib/components/OpenWebSearchResults.svelte
CHANGED
@@ -9,7 +9,6 @@
|
|
9 |
import EosIconsLoading from "~icons/eos-icons/loading";
|
10 |
import IconInternet from "./icons/IconInternet.svelte";
|
11 |
|
12 |
-
export let classNames = "";
|
13 |
export let webSearchMessages: MessageWebSearchUpdate[] = [];
|
14 |
|
15 |
$: sources = webSearchMessages.find(isMessageWebSearchSourcesUpdate)?.sources;
|
@@ -23,7 +22,7 @@
|
|
23 |
</script>
|
24 |
|
25 |
<details
|
26 |
-
class="flex w-fit rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900
|
27 |
>
|
28 |
<summary class="grid min-w-72 select-none grid-cols-[40px,1fr] items-center gap-2.5 p-2">
|
29 |
<div
|
|
|
9 |
import EosIconsLoading from "~icons/eos-icons/loading";
|
10 |
import IconInternet from "./icons/IconInternet.svelte";
|
11 |
|
|
|
12 |
export let webSearchMessages: MessageWebSearchUpdate[] = [];
|
13 |
|
14 |
$: sources = webSearchMessages.find(isMessageWebSearchSourcesUpdate)?.sources;
|
|
|
22 |
</script>
|
23 |
|
24 |
<details
|
25 |
+
class="flex w-fit max-w-full rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900"
|
26 |
>
|
27 |
<summary class="grid min-w-72 select-none grid-cols-[40px,1fr] items-center gap-2.5 p-2">
|
28 |
<div
|
src/lib/components/chat/ChatMessage.svelte
CHANGED
@@ -25,6 +25,8 @@
|
|
25 |
type MessageWebSearchSourcesUpdate,
|
26 |
type MessageWebSearchUpdate,
|
27 |
type MessageFinalAnswerUpdate,
|
|
|
|
|
28 |
} from "$lib/types/MessageUpdate";
|
29 |
import { base } from "$app/paths";
|
30 |
import { useConvTreeStore } from "$lib/stores/convTree";
|
@@ -33,6 +35,7 @@
|
|
33 |
import { enhance } from "$app/forms";
|
34 |
import { browser } from "$app/environment";
|
35 |
import MarkdownRenderer from "./MarkdownRenderer.svelte";
|
|
|
36 |
|
37 |
export let model: Model;
|
38 |
export let id: Message["id"];
|
@@ -90,9 +93,13 @@
|
|
90 |
}
|
91 |
}
|
92 |
|
93 |
-
$: searchUpdates = (message.updates?.filter(({ type }) => type ===
|
94 |
[]) as MessageWebSearchUpdate[];
|
95 |
|
|
|
|
|
|
|
|
|
96 |
$: messageFinalAnswer = message.updates?.find(
|
97 |
({ type }) => type === MessageUpdateType.FinalAnswer
|
98 |
) as MessageFinalAnswerUpdate;
|
@@ -208,9 +215,17 @@
|
|
208 |
</div>
|
209 |
{/if}
|
210 |
{#if searchUpdates && searchUpdates.length > 0}
|
211 |
-
<OpenWebSearchResults
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
/>
|
215 |
{/if}
|
216 |
|
@@ -224,11 +239,19 @@
|
|
224 |
{/each}
|
225 |
{/if}
|
226 |
|
227 |
-
<div
|
|
|
|
|
|
|
228 |
{#if isLast && loading && $settings.disableStream}
|
229 |
<IconLoading classNames="loading inline ml-2 first:ml-0" />
|
230 |
{/if}
|
231 |
-
|
|
|
|
|
|
|
|
|
|
|
232 |
</div>
|
233 |
|
234 |
<!-- Web Search sources -->
|
|
|
25 |
type MessageWebSearchSourcesUpdate,
|
26 |
type MessageWebSearchUpdate,
|
27 |
type MessageFinalAnswerUpdate,
|
28 |
+
type MessageReasoningUpdate,
|
29 |
+
MessageReasoningUpdateType,
|
30 |
} from "$lib/types/MessageUpdate";
|
31 |
import { base } from "$app/paths";
|
32 |
import { useConvTreeStore } from "$lib/stores/convTree";
|
|
|
35 |
import { enhance } from "$app/forms";
|
36 |
import { browser } from "$app/environment";
|
37 |
import MarkdownRenderer from "./MarkdownRenderer.svelte";
|
38 |
+
import OpenReasoningResults from "./OpenReasoningResults.svelte";
|
39 |
|
40 |
export let model: Model;
|
41 |
export let id: Message["id"];
|
|
|
93 |
}
|
94 |
}
|
95 |
|
96 |
+
$: searchUpdates = (message.updates?.filter(({ type }) => type === MessageUpdateType.WebSearch) ??
|
97 |
[]) as MessageWebSearchUpdate[];
|
98 |
|
99 |
+
$: reasoningUpdates = (message.updates?.filter(
|
100 |
+
({ type }) => type === MessageUpdateType.Reasoning
|
101 |
+
) ?? []) as MessageReasoningUpdate[];
|
102 |
+
|
103 |
$: messageFinalAnswer = message.updates?.find(
|
104 |
({ type }) => type === MessageUpdateType.FinalAnswer
|
105 |
) as MessageFinalAnswerUpdate;
|
|
|
215 |
</div>
|
216 |
{/if}
|
217 |
{#if searchUpdates && searchUpdates.length > 0}
|
218 |
+
<OpenWebSearchResults webSearchMessages={searchUpdates} />
|
219 |
+
{/if}
|
220 |
+
{#if reasoningUpdates && reasoningUpdates.length > 0}
|
221 |
+
{@const summaries = reasoningUpdates
|
222 |
+
.filter((u) => u.subtype === MessageReasoningUpdateType.Status)
|
223 |
+
.map((u) => u.status)}
|
224 |
+
|
225 |
+
<OpenReasoningResults
|
226 |
+
summary={summaries[summaries.length - 1] || ""}
|
227 |
+
content={message.reasoning || ""}
|
228 |
+
loading={loading && message.content.length === 0}
|
229 |
/>
|
230 |
{/if}
|
231 |
|
|
|
239 |
{/each}
|
240 |
{/if}
|
241 |
|
242 |
+
<div
|
243 |
+
bind:this={contentEl}
|
244 |
+
class:mt-2={reasoningUpdates.length > 0 || searchUpdates.length > 0}
|
245 |
+
>
|
246 |
{#if isLast && loading && $settings.disableStream}
|
247 |
<IconLoading classNames="loading inline ml-2 first:ml-0" />
|
248 |
{/if}
|
249 |
+
|
250 |
+
<div
|
251 |
+
class="prose max-w-none dark:prose-invert max-sm:prose-sm prose-headings:font-semibold prose-h1:text-lg prose-h2:text-base prose-h3:text-base prose-pre:bg-gray-800 dark:prose-pre:bg-gray-900"
|
252 |
+
>
|
253 |
+
<MarkdownRenderer content={message.content} sources={webSearchSources} />
|
254 |
+
</div>
|
255 |
</div>
|
256 |
|
257 |
<!-- Web Search sources -->
|
src/lib/components/chat/MarkdownRenderer.svelte
CHANGED
@@ -106,21 +106,17 @@
|
|
106 |
});
|
107 |
</script>
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
{
|
113 |
-
{
|
114 |
-
|
115 |
-
|
116 |
-
{@
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
{/await}
|
121 |
-
{/if}
|
122 |
-
{/each}
|
123 |
-
</div>
|
124 |
|
125 |
<style lang="postcss">
|
126 |
:global(.katex-display) {
|
|
|
106 |
});
|
107 |
</script>
|
108 |
|
109 |
+
{#each tokens as token}
|
110 |
+
{#if token.type === "code"}
|
111 |
+
<CodeBlock lang={token.lang} code={token.text} />
|
112 |
+
{:else}
|
113 |
+
{@const parsed = marked.parse(processLatex(escapeHTML(token.raw)), options)}
|
114 |
+
{#await parsed then parsed}
|
115 |
+
<!-- eslint-disable-next-line svelte/no-at-html-tags -->
|
116 |
+
{@html DOMPurify.sanitize(parsed)}
|
117 |
+
{/await}
|
118 |
+
{/if}
|
119 |
+
{/each}
|
|
|
|
|
|
|
|
|
120 |
|
121 |
<style lang="postcss">
|
122 |
:global(.katex-display) {
|
src/lib/components/chat/OpenReasoningResults.svelte
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<script lang="ts">
|
2 |
+
import IconThought from "~icons/carbon/circle-packing";
|
3 |
+
import MarkdownRenderer from "./MarkdownRenderer.svelte";
|
4 |
+
|
5 |
+
export let summary: string;
|
6 |
+
export let content: string;
|
7 |
+
export let loading: boolean = false;
|
8 |
+
</script>
|
9 |
+
|
10 |
+
<details
|
11 |
+
class="u flex w-fit max-w-full rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900"
|
12 |
+
>
|
13 |
+
<summary
|
14 |
+
class="grid min-w-72 cursor-pointer select-none grid-cols-[40px,1fr] items-center gap-2.5 p-2"
|
15 |
+
>
|
16 |
+
<div
|
17 |
+
class="relative grid aspect-square place-content-center overflow-hidden rounded-lg bg-gray-100 dark:bg-gray-800"
|
18 |
+
>
|
19 |
+
<svg
|
20 |
+
class="absolute inset-0 text-gray-300 transition-opacity dark:text-gray-700 {loading
|
21 |
+
? 'opacity-100'
|
22 |
+
: 'opacity-0'}"
|
23 |
+
width="40"
|
24 |
+
height="40"
|
25 |
+
viewBox="0 0 38 38"
|
26 |
+
fill="none"
|
27 |
+
xmlns="http://www.w3.org/2000/svg"
|
28 |
+
>
|
29 |
+
<path
|
30 |
+
class="loading-path"
|
31 |
+
d="M8 2.5H30C30 2.5 35.5 2.5 35.5 8V30C35.5 30 35.5 35.5 30 35.5H8C8 35.5 2.5 35.5 2.5 30V8C2.5 8 2.5 2.5 8 2.5Z"
|
32 |
+
stroke="currentColor"
|
33 |
+
stroke-width="1"
|
34 |
+
stroke-linecap="round"
|
35 |
+
id="shape"
|
36 |
+
/>
|
37 |
+
</svg>
|
38 |
+
|
39 |
+
<IconThought class="text-[1rem]" />
|
40 |
+
</div>
|
41 |
+
<dl class="leading-4">
|
42 |
+
<dd class="text-sm">Reasoning</dd>
|
43 |
+
<dt
|
44 |
+
class="flex items-center gap-1 truncate whitespace-nowrap text-[.82rem] text-gray-400"
|
45 |
+
class:animate-pulse={loading}
|
46 |
+
>
|
47 |
+
{summary}
|
48 |
+
</dt>
|
49 |
+
</dl>
|
50 |
+
</summary>
|
51 |
+
|
52 |
+
<div
|
53 |
+
class="border-t border-gray-200 px-5 pb-2 pt-2 text-sm text-gray-600 dark:border-gray-800 dark:text-gray-400"
|
54 |
+
>
|
55 |
+
<MarkdownRenderer {content} />
|
56 |
+
</div>
|
57 |
+
</details>
|
58 |
+
|
59 |
+
<style>
|
60 |
+
details summary::-webkit-details-marker {
|
61 |
+
display: none;
|
62 |
+
}
|
63 |
+
|
64 |
+
.loading-path {
|
65 |
+
stroke-dasharray: 61.45;
|
66 |
+
animation: loading 2s linear infinite;
|
67 |
+
}
|
68 |
+
|
69 |
+
@keyframes loading {
|
70 |
+
to {
|
71 |
+
stroke-dashoffset: 122.9;
|
72 |
+
}
|
73 |
+
}
|
74 |
+
</style>
|
src/lib/server/generateFromDefaultEndpoint.ts
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import { smallModel } from "$lib/server/models";
|
|
|
2 |
import type { EndpointMessage } from "./endpoints/endpoints";
|
3 |
|
4 |
-
export async function generateFromDefaultEndpoint({
|
5 |
messages,
|
6 |
preprompt,
|
7 |
generateSettings,
|
@@ -9,7 +10,7 @@ export async function generateFromDefaultEndpoint({
|
|
9 |
messages: EndpointMessage[];
|
10 |
preprompt?: string;
|
11 |
generateSettings?: Record<string, unknown>;
|
12 |
-
}):
|
13 |
const endpoint = await smallModel.getEndpoint();
|
14 |
|
15 |
const tokenStream = await endpoint({ messages, preprompt, generateSettings });
|
@@ -25,6 +26,10 @@ export async function generateFromDefaultEndpoint({
|
|
25 |
}
|
26 |
return generated_text;
|
27 |
}
|
|
|
|
|
|
|
|
|
28 |
}
|
29 |
throw new Error("Generation failed");
|
30 |
}
|
|
|
1 |
import { smallModel } from "$lib/server/models";
|
2 |
+
import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
|
3 |
import type { EndpointMessage } from "./endpoints/endpoints";
|
4 |
|
5 |
+
export async function* generateFromDefaultEndpoint({
|
6 |
messages,
|
7 |
preprompt,
|
8 |
generateSettings,
|
|
|
10 |
messages: EndpointMessage[];
|
11 |
preprompt?: string;
|
12 |
generateSettings?: Record<string, unknown>;
|
13 |
+
}): AsyncGenerator<MessageUpdate, string, undefined> {
|
14 |
const endpoint = await smallModel.getEndpoint();
|
15 |
|
16 |
const tokenStream = await endpoint({ messages, preprompt, generateSettings });
|
|
|
26 |
}
|
27 |
return generated_text;
|
28 |
}
|
29 |
+
yield {
|
30 |
+
type: MessageUpdateType.Stream,
|
31 |
+
token: output.token.text,
|
32 |
+
};
|
33 |
}
|
34 |
throw new Error("Generation failed");
|
35 |
}
|
src/lib/server/models.ts
CHANGED
@@ -17,6 +17,21 @@ import { isHuggingChat } from "$lib/utils/isHuggingChat";
|
|
17 |
|
18 |
type Optional<T, K extends keyof T> = Pick<Partial<T>, K> & Omit<T, K>;
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
const modelConfig = z.object({
|
21 |
/** Used as an identifier in DB */
|
22 |
id: z.string().optional(),
|
@@ -70,6 +85,7 @@ const modelConfig = z.object({
|
|
70 |
embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
|
71 |
/** Used to enable/disable system prompt usage */
|
72 |
systemRoleSupported: z.boolean().default(true),
|
|
|
73 |
});
|
74 |
|
75 |
const modelsRaw = z.array(modelConfig).parse(JSON5.parse(env.MODELS));
|
|
|
17 |
|
18 |
type Optional<T, K extends keyof T> = Pick<Partial<T>, K> & Omit<T, K>;
|
19 |
|
20 |
+
const reasoningSchema = z.union([
|
21 |
+
z.object({
|
22 |
+
type: z.literal("regex"), // everything is reasoning, extract the answer from the regex
|
23 |
+
regex: z.string(),
|
24 |
+
}),
|
25 |
+
z.object({
|
26 |
+
type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer
|
27 |
+
beginToken: z.string(),
|
28 |
+
endToken: z.string(),
|
29 |
+
}),
|
30 |
+
z.object({
|
31 |
+
type: z.literal("summarize"), // everything is reasoning, summarize the answer
|
32 |
+
}),
|
33 |
+
]);
|
34 |
+
|
35 |
const modelConfig = z.object({
|
36 |
/** Used as an identifier in DB */
|
37 |
id: z.string().optional(),
|
|
|
85 |
embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
|
86 |
/** Used to enable/disable system prompt usage */
|
87 |
systemRoleSupported: z.boolean().default(true),
|
88 |
+
reasoning: reasoningSchema.optional(),
|
89 |
});
|
90 |
|
91 |
const modelsRaw = z.array(modelConfig).parse(JSON5.parse(env.MODELS));
|
src/lib/server/textGeneration/generate.ts
CHANGED
@@ -1,8 +1,14 @@
|
|
1 |
import type { ToolResult } from "$lib/types/Tool";
|
2 |
-
import {
|
|
|
|
|
|
|
|
|
3 |
import { AbortedGenerations } from "../abortedGenerations";
|
4 |
import type { TextGenerationContext } from "./types";
|
5 |
import type { EndpointMessage } from "../endpoints/endpoints";
|
|
|
|
|
6 |
|
7 |
type GenerateContext = Omit<TextGenerationContext, "messages"> & { messages: EndpointMessage[] };
|
8 |
|
@@ -11,6 +17,26 @@ export async function* generate(
|
|
11 |
toolResults: ToolResult[],
|
12 |
preprompt?: string
|
13 |
): AsyncIterable<MessageUpdate> {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
for await (const output of await endpoint({
|
15 |
messages,
|
16 |
preprompt,
|
@@ -33,20 +59,102 @@ export async function* generate(
|
|
33 |
text = text.slice(0, text.length - stopToken.length);
|
34 |
}
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
yield {
|
37 |
type: MessageUpdateType.FinalAnswer,
|
38 |
-
text,
|
39 |
interrupted,
|
40 |
webSources: output.webSources,
|
41 |
};
|
42 |
continue;
|
43 |
}
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
// ignore special tokens
|
46 |
if (output.token.special) continue;
|
47 |
|
48 |
// pass down normal token
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
// abort check
|
52 |
const date = AbortedGenerations.getInstance().getList().get(conv._id.toString());
|
|
|
1 |
import type { ToolResult } from "$lib/types/Tool";
|
2 |
+
import {
|
3 |
+
MessageReasoningUpdateType,
|
4 |
+
MessageUpdateType,
|
5 |
+
type MessageUpdate,
|
6 |
+
} from "$lib/types/MessageUpdate";
|
7 |
import { AbortedGenerations } from "../abortedGenerations";
|
8 |
import type { TextGenerationContext } from "./types";
|
9 |
import type { EndpointMessage } from "../endpoints/endpoints";
|
10 |
+
import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
|
11 |
+
import { generateSummaryOfReasoning } from "./reasoning";
|
12 |
|
13 |
type GenerateContext = Omit<TextGenerationContext, "messages"> & { messages: EndpointMessage[] };
|
14 |
|
|
|
17 |
toolResults: ToolResult[],
|
18 |
preprompt?: string
|
19 |
): AsyncIterable<MessageUpdate> {
|
20 |
+
// reasoning mode is false by default
|
21 |
+
let reasoning = false;
|
22 |
+
let reasoningBuffer = "";
|
23 |
+
let lastReasoningUpdate = new Date();
|
24 |
+
let status = "";
|
25 |
+
const startTime = new Date();
|
26 |
+
if (
|
27 |
+
model.reasoning &&
|
28 |
+
(model.reasoning.type === "regex" || model.reasoning.type === "summarize")
|
29 |
+
) {
|
30 |
+
// if the model has reasoning in regex or summarize mode, it starts in reasoning mode
|
31 |
+
// and we extract the answer from the reasoning
|
32 |
+
reasoning = true;
|
33 |
+
yield {
|
34 |
+
type: MessageUpdateType.Reasoning,
|
35 |
+
subtype: MessageReasoningUpdateType.Status,
|
36 |
+
status: "Started reasoning...",
|
37 |
+
};
|
38 |
+
}
|
39 |
+
|
40 |
for await (const output of await endpoint({
|
41 |
messages,
|
42 |
preprompt,
|
|
|
59 |
text = text.slice(0, text.length - stopToken.length);
|
60 |
}
|
61 |
|
62 |
+
let finalAnswer = text;
|
63 |
+
if (model.reasoning && model.reasoning.type === "regex") {
|
64 |
+
const regex = new RegExp(model.reasoning.regex);
|
65 |
+
finalAnswer = regex.exec(reasoningBuffer)?.[1] ?? text;
|
66 |
+
} else if (model.reasoning && model.reasoning.type === "summarize") {
|
67 |
+
yield {
|
68 |
+
type: MessageUpdateType.Reasoning,
|
69 |
+
subtype: MessageReasoningUpdateType.Status,
|
70 |
+
status: "Summarizing reasoning...",
|
71 |
+
};
|
72 |
+
const summary = yield* generateFromDefaultEndpoint({
|
73 |
+
messages: [
|
74 |
+
{
|
75 |
+
from: "user",
|
76 |
+
content: `Question: ${
|
77 |
+
messages[messages.length - 1].content
|
78 |
+
}\n\nReasoning: ${reasoningBuffer}`,
|
79 |
+
},
|
80 |
+
],
|
81 |
+
preprompt: `Your task is to summarize concisely all your reasoning steps and then give the final answer. Keep it short, one short paragraph at most. If the final solution includes code, make sure to include it in your answer.
|
82 |
+
|
83 |
+
If the user is just having a casual conversation that doesn't require explanations, answer directly without explaining your steps, otherwise make sure to summarize step by step, make sure to skip dead-ends in your reasoning and removing excess detail.
|
84 |
+
|
85 |
+
Do not use prefixes such as Response: or Answer: when answering to the user.`,
|
86 |
+
generateSettings: {
|
87 |
+
max_new_tokens: 1024,
|
88 |
+
},
|
89 |
+
});
|
90 |
+
finalAnswer = summary;
|
91 |
+
yield {
|
92 |
+
type: MessageUpdateType.Reasoning,
|
93 |
+
subtype: MessageReasoningUpdateType.Status,
|
94 |
+
status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
|
95 |
+
};
|
96 |
+
}
|
97 |
+
|
98 |
yield {
|
99 |
type: MessageUpdateType.FinalAnswer,
|
100 |
+
text: finalAnswer,
|
101 |
interrupted,
|
102 |
webSources: output.webSources,
|
103 |
};
|
104 |
continue;
|
105 |
}
|
106 |
|
107 |
+
if (model.reasoning && model.reasoning.type === "tokens") {
|
108 |
+
if (output.token.text === model.reasoning.beginToken) {
|
109 |
+
reasoning = true;
|
110 |
+
reasoningBuffer += output.token.text;
|
111 |
+
yield {
|
112 |
+
type: MessageUpdateType.Reasoning,
|
113 |
+
subtype: MessageReasoningUpdateType.Status,
|
114 |
+
status: "Started thinking...",
|
115 |
+
};
|
116 |
+
} else if (output.token.text === model.reasoning.endToken) {
|
117 |
+
reasoning = false;
|
118 |
+
reasoningBuffer += output.token.text;
|
119 |
+
yield {
|
120 |
+
type: MessageUpdateType.Reasoning,
|
121 |
+
subtype: MessageReasoningUpdateType.Status,
|
122 |
+
status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
|
123 |
+
};
|
124 |
+
}
|
125 |
+
}
|
126 |
// ignore special tokens
|
127 |
if (output.token.special) continue;
|
128 |
|
129 |
// pass down normal token
|
130 |
+
if (reasoning) {
|
131 |
+
reasoningBuffer += output.token.text;
|
132 |
+
|
133 |
+
// yield status update if it has changed
|
134 |
+
if (status !== "") {
|
135 |
+
yield {
|
136 |
+
type: MessageUpdateType.Reasoning,
|
137 |
+
subtype: MessageReasoningUpdateType.Status,
|
138 |
+
status,
|
139 |
+
};
|
140 |
+
status = "";
|
141 |
+
}
|
142 |
+
|
143 |
+
// create a new status every 5 seconds
|
144 |
+
if (new Date().getTime() - lastReasoningUpdate.getTime() > 4000) {
|
145 |
+
lastReasoningUpdate = new Date();
|
146 |
+
generateSummaryOfReasoning(reasoningBuffer).then((summary) => {
|
147 |
+
status = summary;
|
148 |
+
});
|
149 |
+
}
|
150 |
+
yield {
|
151 |
+
type: MessageUpdateType.Reasoning,
|
152 |
+
subtype: MessageReasoningUpdateType.Stream,
|
153 |
+
token: output.token.text,
|
154 |
+
};
|
155 |
+
} else {
|
156 |
+
yield { type: MessageUpdateType.Stream, token: output.token.text };
|
157 |
+
}
|
158 |
|
159 |
// abort check
|
160 |
const date = AbortedGenerations.getInstance().getList().get(conv._id.toString());
|
src/lib/server/textGeneration/reasoning.ts
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
|
2 |
+
|
3 |
+
import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
|
4 |
+
|
5 |
+
export async function generateSummaryOfReasoning(buffer: string): Promise<string> {
|
6 |
+
// debug 5s delay
|
7 |
+
await new Promise((resolve) => setTimeout(resolve, 3000));
|
8 |
+
|
9 |
+
const summary = await getReturnFromGenerator(
|
10 |
+
generateFromDefaultEndpoint({
|
11 |
+
messages: [
|
12 |
+
{
|
13 |
+
from: "user",
|
14 |
+
content: buffer.slice(-200),
|
15 |
+
},
|
16 |
+
],
|
17 |
+
preprompt: `You are tasked with summarizing the latest reasoning steps. Never describe results of the reasoning, only the process. Remain vague in your summary.
|
18 |
+
The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
|
19 |
+
Example: "Thinking about life...", "Summarizing the results...", "Processing the input..."`,
|
20 |
+
generateSettings: {
|
21 |
+
max_new_tokens: 50,
|
22 |
+
},
|
23 |
+
})
|
24 |
+
).then((summary) => {
|
25 |
+
const parts = summary.split("...");
|
26 |
+
return parts[0] + "...";
|
27 |
+
});
|
28 |
+
|
29 |
+
return summary;
|
30 |
+
}
|
src/lib/server/textGeneration/title.ts
CHANGED
@@ -4,6 +4,7 @@ import type { EndpointMessage } from "../endpoints/endpoints";
|
|
4 |
import { logger } from "$lib/server/logger";
|
5 |
import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
|
6 |
import type { Conversation } from "$lib/types/Conversation";
|
|
|
7 |
|
8 |
export async function* generateTitleForConversation(
|
9 |
conv: Conversation
|
@@ -55,14 +56,16 @@ export async function generateTitle(prompt: string) {
|
|
55 |
{ from: "user", content: prompt },
|
56 |
];
|
57 |
|
58 |
-
return await
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
66 |
.then((summary) => {
|
67 |
// add an emoji if none is found in the first three characters
|
68 |
if (!/\p{Emoji}/u.test(summary.slice(0, 3))) {
|
|
|
4 |
import { logger } from "$lib/server/logger";
|
5 |
import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
|
6 |
import type { Conversation } from "$lib/types/Conversation";
|
7 |
+
import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
|
8 |
|
9 |
export async function* generateTitleForConversation(
|
10 |
conv: Conversation
|
|
|
56 |
{ from: "user", content: prompt },
|
57 |
];
|
58 |
|
59 |
+
return await getReturnFromGenerator(
|
60 |
+
generateFromDefaultEndpoint({
|
61 |
+
messages,
|
62 |
+
preprompt:
|
63 |
+
"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
|
64 |
+
generateSettings: {
|
65 |
+
max_new_tokens: 15,
|
66 |
+
},
|
67 |
+
})
|
68 |
+
)
|
69 |
.then((summary) => {
|
70 |
// add an emoji if none is found in the first three characters
|
71 |
if (!/\p{Emoji}/u.test(summary.slice(0, 3))) {
|
src/lib/server/websearch/search/generateQuery.ts
CHANGED
@@ -2,6 +2,7 @@ import type { Message } from "$lib/types/Message";
|
|
2 |
import { format } from "date-fns";
|
3 |
import type { EndpointMessage } from "../../endpoints/endpoints";
|
4 |
import { generateFromDefaultEndpoint } from "../../generateFromDefaultEndpoint";
|
|
|
5 |
|
6 |
export async function generateQuery(messages: Message[]) {
|
7 |
const currentDate = format(new Date(), "MMMM d, yyyy");
|
@@ -62,13 +63,15 @@ Current Question: Where is it being hosted?`,
|
|
62 |
},
|
63 |
];
|
64 |
|
65 |
-
const webQuery = await
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
72 |
|
73 |
return webQuery.trim();
|
74 |
}
|
|
|
2 |
import { format } from "date-fns";
|
3 |
import type { EndpointMessage } from "../../endpoints/endpoints";
|
4 |
import { generateFromDefaultEndpoint } from "../../generateFromDefaultEndpoint";
|
5 |
+
import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
|
6 |
|
7 |
export async function generateQuery(messages: Message[]) {
|
8 |
const currentDate = format(new Date(), "MMMM d, yyyy");
|
|
|
63 |
},
|
64 |
];
|
65 |
|
66 |
+
const webQuery = await getReturnFromGenerator(
|
67 |
+
generateFromDefaultEndpoint({
|
68 |
+
messages: convQuery,
|
69 |
+
preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
|
70 |
+
generateSettings: {
|
71 |
+
max_new_tokens: 30,
|
72 |
+
},
|
73 |
+
})
|
74 |
+
);
|
75 |
|
76 |
return webQuery.trim();
|
77 |
}
|
src/lib/types/Message.ts
CHANGED
@@ -10,6 +10,8 @@ export type Message = Partial<Timestamps> & {
|
|
10 |
updates?: MessageUpdate[];
|
11 |
webSearchId?: WebSearch["_id"]; // legacy version
|
12 |
webSearch?: WebSearch;
|
|
|
|
|
13 |
score?: -1 | 0 | 1;
|
14 |
/**
|
15 |
* Either contains the base64 encoded image data
|
|
|
10 |
updates?: MessageUpdate[];
|
11 |
webSearchId?: WebSearch["_id"]; // legacy version
|
12 |
webSearch?: WebSearch;
|
13 |
+
|
14 |
+
reasoning?: string;
|
15 |
score?: -1 | 0 | 1;
|
16 |
/**
|
17 |
* Either contains the base64 encoded image data
|
src/lib/types/MessageUpdate.ts
CHANGED
@@ -8,7 +8,8 @@ export type MessageUpdate =
|
|
8 |
| MessageWebSearchUpdate
|
9 |
| MessageStreamUpdate
|
10 |
| MessageFileUpdate
|
11 |
-
| MessageFinalAnswerUpdate
|
|
|
12 |
|
13 |
export enum MessageUpdateType {
|
14 |
Status = "status",
|
@@ -18,6 +19,7 @@ export enum MessageUpdateType {
|
|
18 |
Stream = "stream",
|
19 |
File = "file",
|
20 |
FinalAnswer = "finalAnswer",
|
|
|
21 |
}
|
22 |
|
23 |
// Status
|
@@ -114,6 +116,25 @@ export interface MessageStreamUpdate {
|
|
114 |
type: MessageUpdateType.Stream;
|
115 |
token: string;
|
116 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
export interface MessageFileUpdate {
|
118 |
type: MessageUpdateType.File;
|
119 |
name: string;
|
|
|
8 |
| MessageWebSearchUpdate
|
9 |
| MessageStreamUpdate
|
10 |
| MessageFileUpdate
|
11 |
+
| MessageFinalAnswerUpdate
|
12 |
+
| MessageReasoningUpdate;
|
13 |
|
14 |
export enum MessageUpdateType {
|
15 |
Status = "status",
|
|
|
19 |
Stream = "stream",
|
20 |
File = "file",
|
21 |
FinalAnswer = "finalAnswer",
|
22 |
+
Reasoning = "reasoning",
|
23 |
}
|
24 |
|
25 |
// Status
|
|
|
116 |
type: MessageUpdateType.Stream;
|
117 |
token: string;
|
118 |
}
|
119 |
+
|
120 |
+
export enum MessageReasoningUpdateType {
|
121 |
+
Stream = "stream",
|
122 |
+
Status = "status",
|
123 |
+
}
|
124 |
+
|
125 |
+
export type MessageReasoningUpdate = MessageReasoningStreamUpdate | MessageReasoningStatusUpdate;
|
126 |
+
|
127 |
+
export interface MessageReasoningStreamUpdate {
|
128 |
+
type: MessageUpdateType.Reasoning;
|
129 |
+
subtype: MessageReasoningUpdateType.Stream;
|
130 |
+
token: string;
|
131 |
+
}
|
132 |
+
export interface MessageReasoningStatusUpdate {
|
133 |
+
type: MessageUpdateType.Reasoning;
|
134 |
+
subtype: MessageReasoningUpdateType.Status;
|
135 |
+
status: string;
|
136 |
+
}
|
137 |
+
|
138 |
export interface MessageFileUpdate {
|
139 |
type: MessageUpdateType.File;
|
140 |
name: string;
|
src/lib/utils/getReturnFromGenerator.ts
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export async function getReturnFromGenerator<T, R>(generator: AsyncGenerator<T, R>): Promise<R> {
|
2 |
+
let result: IteratorResult<T, R>;
|
3 |
+
do {
|
4 |
+
result = await generator.next();
|
5 |
+
} while (!result.done); // Keep calling `next()` until `done` is true
|
6 |
+
return result.value; // Return the final value
|
7 |
+
}
|
src/routes/conversation/[id]/+page.svelte
CHANGED
@@ -12,9 +12,9 @@
|
|
12 |
import { webSearchParameters } from "$lib/stores/webSearchParameters";
|
13 |
import type { Message } from "$lib/types/Message";
|
14 |
import {
|
|
|
15 |
MessageUpdateStatus,
|
16 |
MessageUpdateType,
|
17 |
-
type MessageUpdate,
|
18 |
} from "$lib/types/MessageUpdate";
|
19 |
import titleUpdate from "$lib/stores/titleUpdate";
|
20 |
import file2base64 from "$lib/utils/file2base64";
|
@@ -215,8 +215,6 @@
|
|
215 |
|
216 |
files = [];
|
217 |
|
218 |
-
const messageUpdates: MessageUpdate[] = [];
|
219 |
-
|
220 |
for await (const update of messageUpdatesIterator) {
|
221 |
if ($isAborted) {
|
222 |
messageUpdatesAbortController.abort();
|
@@ -229,7 +227,7 @@
|
|
229 |
update.token = update.token.replaceAll("\0", "");
|
230 |
}
|
231 |
|
232 |
-
|
233 |
|
234 |
if (update.type === MessageUpdateType.Stream && !$settings.disableStream) {
|
235 |
messageToWriteTo.content += update.token;
|
@@ -239,7 +237,6 @@
|
|
239 |
update.type === MessageUpdateType.WebSearch ||
|
240 |
update.type === MessageUpdateType.Tool
|
241 |
) {
|
242 |
-
messageToWriteTo.updates = [...(messageToWriteTo.updates ?? []), update];
|
243 |
messages = [...messages];
|
244 |
} else if (
|
245 |
update.type === MessageUpdateType.Status &&
|
@@ -262,10 +259,18 @@
|
|
262 |
{ type: "hash", value: update.sha, mime: update.mime, name: update.name },
|
263 |
];
|
264 |
messages = [...messages];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
}
|
266 |
}
|
267 |
-
|
268 |
-
messageToWriteTo.updates = messageUpdates;
|
269 |
} catch (err) {
|
270 |
if (err instanceof Error && err.message.includes("overloaded")) {
|
271 |
$error = "Too much traffic, please try again.";
|
|
|
12 |
import { webSearchParameters } from "$lib/stores/webSearchParameters";
|
13 |
import type { Message } from "$lib/types/Message";
|
14 |
import {
|
15 |
+
MessageReasoningUpdateType,
|
16 |
MessageUpdateStatus,
|
17 |
MessageUpdateType,
|
|
|
18 |
} from "$lib/types/MessageUpdate";
|
19 |
import titleUpdate from "$lib/stores/titleUpdate";
|
20 |
import file2base64 from "$lib/utils/file2base64";
|
|
|
215 |
|
216 |
files = [];
|
217 |
|
|
|
|
|
218 |
for await (const update of messageUpdatesIterator) {
|
219 |
if ($isAborted) {
|
220 |
messageUpdatesAbortController.abort();
|
|
|
227 |
update.token = update.token.replaceAll("\0", "");
|
228 |
}
|
229 |
|
230 |
+
messageToWriteTo.updates = [...(messageToWriteTo.updates ?? []), update];
|
231 |
|
232 |
if (update.type === MessageUpdateType.Stream && !$settings.disableStream) {
|
233 |
messageToWriteTo.content += update.token;
|
|
|
237 |
update.type === MessageUpdateType.WebSearch ||
|
238 |
update.type === MessageUpdateType.Tool
|
239 |
) {
|
|
|
240 |
messages = [...messages];
|
241 |
} else if (
|
242 |
update.type === MessageUpdateType.Status &&
|
|
|
259 |
{ type: "hash", value: update.sha, mime: update.mime, name: update.name },
|
260 |
];
|
261 |
messages = [...messages];
|
262 |
+
} else if (update.type === MessageUpdateType.Reasoning) {
|
263 |
+
if (!messageToWriteTo.reasoning) {
|
264 |
+
messageToWriteTo.reasoning = "";
|
265 |
+
}
|
266 |
+
if (update.subtype === MessageReasoningUpdateType.Stream) {
|
267 |
+
messageToWriteTo.reasoning += update.token;
|
268 |
+
} else {
|
269 |
+
messageToWriteTo.updates = [...(messageToWriteTo.updates ?? []), update];
|
270 |
+
}
|
271 |
+
messages = [...messages];
|
272 |
}
|
273 |
}
|
|
|
|
|
274 |
} catch (err) {
|
275 |
if (err instanceof Error && err.message.includes("overloaded")) {
|
276 |
$error = "Too much traffic, please try again.";
|
src/routes/conversation/[id]/+server.ts
CHANGED
@@ -9,6 +9,7 @@ import { error } from "@sveltejs/kit";
|
|
9 |
import { ObjectId } from "mongodb";
|
10 |
import { z } from "zod";
|
11 |
import {
|
|
|
12 |
MessageUpdateStatus,
|
13 |
MessageUpdateType,
|
14 |
type MessageUpdate,
|
@@ -355,6 +356,12 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
355 |
Date.now() - (lastTokenTimestamp ?? promptedAt).getTime()
|
356 |
);
|
357 |
lastTokenTimestamp = new Date();
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
}
|
359 |
|
360 |
// Set the title
|
@@ -392,6 +399,10 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
392 |
!(
|
393 |
event.type === MessageUpdateType.Status &&
|
394 |
event.status === MessageUpdateStatus.KeepAlive
|
|
|
|
|
|
|
|
|
395 |
)
|
396 |
) {
|
397 |
messageToWriteTo?.updates?.push(event);
|
|
|
9 |
import { ObjectId } from "mongodb";
|
10 |
import { z } from "zod";
|
11 |
import {
|
12 |
+
MessageReasoningUpdateType,
|
13 |
MessageUpdateStatus,
|
14 |
MessageUpdateType,
|
15 |
type MessageUpdate,
|
|
|
356 |
Date.now() - (lastTokenTimestamp ?? promptedAt).getTime()
|
357 |
);
|
358 |
lastTokenTimestamp = new Date();
|
359 |
+
} else if (
|
360 |
+
event.type === MessageUpdateType.Reasoning &&
|
361 |
+
event.subtype === MessageReasoningUpdateType.Stream
|
362 |
+
) {
|
363 |
+
messageToWriteTo.reasoning ??= "";
|
364 |
+
messageToWriteTo.reasoning += event.token;
|
365 |
}
|
366 |
|
367 |
// Set the title
|
|
|
399 |
!(
|
400 |
event.type === MessageUpdateType.Status &&
|
401 |
event.status === MessageUpdateStatus.KeepAlive
|
402 |
+
) &&
|
403 |
+
!(
|
404 |
+
event.type === MessageUpdateType.Reasoning &&
|
405 |
+
event.subtype === MessageReasoningUpdateType.Stream
|
406 |
)
|
407 |
) {
|
408 |
messageToWriteTo?.updates?.push(event);
|