nsarrazin HF Staff commited on
Commit
bf75aa7
·
unverified ·
1 Parent(s): 85b38e3

feat: UI for advanced reasoning models (#1605)

Browse files

* feat: add a reasoning dropdown for CoT models

* feat: add status updates

* fix: various cleanups
- pass content & status to result dropdown
- dont store streaming updates in db
- make status generation non blocking

* fix: make sure not to push reasoning token stream to db

* feat: add time indicator and make the ui match websearch

* fix: change in status update & prompt

chart/env/prod.yaml CHANGED
@@ -144,6 +144,9 @@ envVars:
144
  "websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
145
  "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
146
  "description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
 
 
 
147
  "parameters": {
148
  "stop": ["<|im_end|>"],
149
  "truncate": 12288,
 
144
  "websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
145
  "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
146
  "description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
147
+ "reasoning": {
148
+ "type": "summarize"
149
+ },
150
  "parameters": {
151
  "stop": ["<|im_end|>"],
152
  "truncate": 12288,
src/lib/components/OpenWebSearchResults.svelte CHANGED
@@ -9,7 +9,6 @@
9
  import EosIconsLoading from "~icons/eos-icons/loading";
10
  import IconInternet from "./icons/IconInternet.svelte";
11
 
12
- export let classNames = "";
13
  export let webSearchMessages: MessageWebSearchUpdate[] = [];
14
 
15
  $: sources = webSearchMessages.find(isMessageWebSearchSourcesUpdate)?.sources;
@@ -23,7 +22,7 @@
23
  </script>
24
 
25
  <details
26
- class="flex w-fit rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900 {classNames} max-w-full"
27
  >
28
  <summary class="grid min-w-72 select-none grid-cols-[40px,1fr] items-center gap-2.5 p-2">
29
  <div
 
9
  import EosIconsLoading from "~icons/eos-icons/loading";
10
  import IconInternet from "./icons/IconInternet.svelte";
11
 
 
12
  export let webSearchMessages: MessageWebSearchUpdate[] = [];
13
 
14
  $: sources = webSearchMessages.find(isMessageWebSearchSourcesUpdate)?.sources;
 
22
  </script>
23
 
24
  <details
25
+ class="flex w-fit max-w-full rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900"
26
  >
27
  <summary class="grid min-w-72 select-none grid-cols-[40px,1fr] items-center gap-2.5 p-2">
28
  <div
src/lib/components/chat/ChatMessage.svelte CHANGED
@@ -25,6 +25,8 @@
25
  type MessageWebSearchSourcesUpdate,
26
  type MessageWebSearchUpdate,
27
  type MessageFinalAnswerUpdate,
 
 
28
  } from "$lib/types/MessageUpdate";
29
  import { base } from "$app/paths";
30
  import { useConvTreeStore } from "$lib/stores/convTree";
@@ -33,6 +35,7 @@
33
  import { enhance } from "$app/forms";
34
  import { browser } from "$app/environment";
35
  import MarkdownRenderer from "./MarkdownRenderer.svelte";
 
36
 
37
  export let model: Model;
38
  export let id: Message["id"];
@@ -90,9 +93,13 @@
90
  }
91
  }
92
 
93
- $: searchUpdates = (message.updates?.filter(({ type }) => type === "webSearch") ??
94
  []) as MessageWebSearchUpdate[];
95
 
 
 
 
 
96
  $: messageFinalAnswer = message.updates?.find(
97
  ({ type }) => type === MessageUpdateType.FinalAnswer
98
  ) as MessageFinalAnswerUpdate;
@@ -208,9 +215,17 @@
208
  </div>
209
  {/if}
210
  {#if searchUpdates && searchUpdates.length > 0}
211
- <OpenWebSearchResults
212
- classNames={message.content.length ? "mb-3.5" : ""}
213
- webSearchMessages={searchUpdates}
 
 
 
 
 
 
 
 
214
  />
215
  {/if}
216
 
@@ -224,11 +239,19 @@
224
  {/each}
225
  {/if}
226
 
227
- <div bind:this={contentEl}>
 
 
 
228
  {#if isLast && loading && $settings.disableStream}
229
  <IconLoading classNames="loading inline ml-2 first:ml-0" />
230
  {/if}
231
- <MarkdownRenderer content={message.content} sources={webSearchSources} />
 
 
 
 
 
232
  </div>
233
 
234
  <!-- Web Search sources -->
 
25
  type MessageWebSearchSourcesUpdate,
26
  type MessageWebSearchUpdate,
27
  type MessageFinalAnswerUpdate,
28
+ type MessageReasoningUpdate,
29
+ MessageReasoningUpdateType,
30
  } from "$lib/types/MessageUpdate";
31
  import { base } from "$app/paths";
32
  import { useConvTreeStore } from "$lib/stores/convTree";
 
35
  import { enhance } from "$app/forms";
36
  import { browser } from "$app/environment";
37
  import MarkdownRenderer from "./MarkdownRenderer.svelte";
38
+ import OpenReasoningResults from "./OpenReasoningResults.svelte";
39
 
40
  export let model: Model;
41
  export let id: Message["id"];
 
93
  }
94
  }
95
 
96
+ $: searchUpdates = (message.updates?.filter(({ type }) => type === MessageUpdateType.WebSearch) ??
97
  []) as MessageWebSearchUpdate[];
98
 
99
+ $: reasoningUpdates = (message.updates?.filter(
100
+ ({ type }) => type === MessageUpdateType.Reasoning
101
+ ) ?? []) as MessageReasoningUpdate[];
102
+
103
  $: messageFinalAnswer = message.updates?.find(
104
  ({ type }) => type === MessageUpdateType.FinalAnswer
105
  ) as MessageFinalAnswerUpdate;
 
215
  </div>
216
  {/if}
217
  {#if searchUpdates && searchUpdates.length > 0}
218
+ <OpenWebSearchResults webSearchMessages={searchUpdates} />
219
+ {/if}
220
+ {#if reasoningUpdates && reasoningUpdates.length > 0}
221
+ {@const summaries = reasoningUpdates
222
+ .filter((u) => u.subtype === MessageReasoningUpdateType.Status)
223
+ .map((u) => u.status)}
224
+
225
+ <OpenReasoningResults
226
+ summary={summaries[summaries.length - 1] || ""}
227
+ content={message.reasoning || ""}
228
+ loading={loading && message.content.length === 0}
229
  />
230
  {/if}
231
 
 
239
  {/each}
240
  {/if}
241
 
242
+ <div
243
+ bind:this={contentEl}
244
+ class:mt-2={reasoningUpdates.length > 0 || searchUpdates.length > 0}
245
+ >
246
  {#if isLast && loading && $settings.disableStream}
247
  <IconLoading classNames="loading inline ml-2 first:ml-0" />
248
  {/if}
249
+
250
+ <div
251
+ class="prose max-w-none dark:prose-invert max-sm:prose-sm prose-headings:font-semibold prose-h1:text-lg prose-h2:text-base prose-h3:text-base prose-pre:bg-gray-800 dark:prose-pre:bg-gray-900"
252
+ >
253
+ <MarkdownRenderer content={message.content} sources={webSearchSources} />
254
+ </div>
255
  </div>
256
 
257
  <!-- Web Search sources -->
src/lib/components/chat/MarkdownRenderer.svelte CHANGED
@@ -106,21 +106,17 @@
106
  });
107
  </script>
108
 
109
- <div
110
- class="prose max-w-none dark:prose-invert max-sm:prose-sm prose-headings:font-semibold prose-h1:text-lg prose-h2:text-base prose-h3:text-base prose-pre:bg-gray-800 dark:prose-pre:bg-gray-900"
111
- >
112
- {#each tokens as token}
113
- {#if token.type === "code"}
114
- <CodeBlock lang={token.lang} code={token.text} />
115
- {:else}
116
- {@const parsed = marked.parse(processLatex(escapeHTML(token.raw)), options)}
117
- {#await parsed then parsed}
118
- <!-- eslint-disable-next-line svelte/no-at-html-tags -->
119
- {@html DOMPurify.sanitize(parsed)}
120
- {/await}
121
- {/if}
122
- {/each}
123
- </div>
124
 
125
  <style lang="postcss">
126
  :global(.katex-display) {
 
106
  });
107
  </script>
108
 
109
+ {#each tokens as token}
110
+ {#if token.type === "code"}
111
+ <CodeBlock lang={token.lang} code={token.text} />
112
+ {:else}
113
+ {@const parsed = marked.parse(processLatex(escapeHTML(token.raw)), options)}
114
+ {#await parsed then parsed}
115
+ <!-- eslint-disable-next-line svelte/no-at-html-tags -->
116
+ {@html DOMPurify.sanitize(parsed)}
117
+ {/await}
118
+ {/if}
119
+ {/each}
 
 
 
 
120
 
121
  <style lang="postcss">
122
  :global(.katex-display) {
src/lib/components/chat/OpenReasoningResults.svelte ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <script lang="ts">
2
+ import IconThought from "~icons/carbon/circle-packing";
3
+ import MarkdownRenderer from "./MarkdownRenderer.svelte";
4
+
5
+ export let summary: string;
6
+ export let content: string;
7
+ export let loading: boolean = false;
8
+ </script>
9
+
10
+ <details
11
+ class="u flex w-fit max-w-full rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900"
12
+ >
13
+ <summary
14
+ class="grid min-w-72 cursor-pointer select-none grid-cols-[40px,1fr] items-center gap-2.5 p-2"
15
+ >
16
+ <div
17
+ class="relative grid aspect-square place-content-center overflow-hidden rounded-lg bg-gray-100 dark:bg-gray-800"
18
+ >
19
+ <svg
20
+ class="absolute inset-0 text-gray-300 transition-opacity dark:text-gray-700 {loading
21
+ ? 'opacity-100'
22
+ : 'opacity-0'}"
23
+ width="40"
24
+ height="40"
25
+ viewBox="0 0 38 38"
26
+ fill="none"
27
+ xmlns="http://www.w3.org/2000/svg"
28
+ >
29
+ <path
30
+ class="loading-path"
31
+ d="M8 2.5H30C30 2.5 35.5 2.5 35.5 8V30C35.5 30 35.5 35.5 30 35.5H8C8 35.5 2.5 35.5 2.5 30V8C2.5 8 2.5 2.5 8 2.5Z"
32
+ stroke="currentColor"
33
+ stroke-width="1"
34
+ stroke-linecap="round"
35
+ id="shape"
36
+ />
37
+ </svg>
38
+
39
+ <IconThought class="text-[1rem]" />
40
+ </div>
41
+ <dl class="leading-4">
42
+ <dd class="text-sm">Reasoning</dd>
43
+ <dt
44
+ class="flex items-center gap-1 truncate whitespace-nowrap text-[.82rem] text-gray-400"
45
+ class:animate-pulse={loading}
46
+ >
47
+ {summary}
48
+ </dt>
49
+ </dl>
50
+ </summary>
51
+
52
+ <div
53
+ class="border-t border-gray-200 px-5 pb-2 pt-2 text-sm text-gray-600 dark:border-gray-800 dark:text-gray-400"
54
+ >
55
+ <MarkdownRenderer {content} />
56
+ </div>
57
+ </details>
58
+
59
+ <style>
60
+ details summary::-webkit-details-marker {
61
+ display: none;
62
+ }
63
+
64
+ .loading-path {
65
+ stroke-dasharray: 61.45;
66
+ animation: loading 2s linear infinite;
67
+ }
68
+
69
+ @keyframes loading {
70
+ to {
71
+ stroke-dashoffset: 122.9;
72
+ }
73
+ }
74
+ </style>
src/lib/server/generateFromDefaultEndpoint.ts CHANGED
@@ -1,7 +1,8 @@
1
  import { smallModel } from "$lib/server/models";
 
2
  import type { EndpointMessage } from "./endpoints/endpoints";
3
 
4
- export async function generateFromDefaultEndpoint({
5
  messages,
6
  preprompt,
7
  generateSettings,
@@ -9,7 +10,7 @@ export async function generateFromDefaultEndpoint({
9
  messages: EndpointMessage[];
10
  preprompt?: string;
11
  generateSettings?: Record<string, unknown>;
12
- }): Promise<string> {
13
  const endpoint = await smallModel.getEndpoint();
14
 
15
  const tokenStream = await endpoint({ messages, preprompt, generateSettings });
@@ -25,6 +26,10 @@ export async function generateFromDefaultEndpoint({
25
  }
26
  return generated_text;
27
  }
 
 
 
 
28
  }
29
  throw new Error("Generation failed");
30
  }
 
1
  import { smallModel } from "$lib/server/models";
2
+ import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
3
  import type { EndpointMessage } from "./endpoints/endpoints";
4
 
5
+ export async function* generateFromDefaultEndpoint({
6
  messages,
7
  preprompt,
8
  generateSettings,
 
10
  messages: EndpointMessage[];
11
  preprompt?: string;
12
  generateSettings?: Record<string, unknown>;
13
+ }): AsyncGenerator<MessageUpdate, string, undefined> {
14
  const endpoint = await smallModel.getEndpoint();
15
 
16
  const tokenStream = await endpoint({ messages, preprompt, generateSettings });
 
26
  }
27
  return generated_text;
28
  }
29
+ yield {
30
+ type: MessageUpdateType.Stream,
31
+ token: output.token.text,
32
+ };
33
  }
34
  throw new Error("Generation failed");
35
  }
src/lib/server/models.ts CHANGED
@@ -17,6 +17,21 @@ import { isHuggingChat } from "$lib/utils/isHuggingChat";
17
 
18
  type Optional<T, K extends keyof T> = Pick<Partial<T>, K> & Omit<T, K>;
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  const modelConfig = z.object({
21
  /** Used as an identifier in DB */
22
  id: z.string().optional(),
@@ -70,6 +85,7 @@ const modelConfig = z.object({
70
  embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
71
  /** Used to enable/disable system prompt usage */
72
  systemRoleSupported: z.boolean().default(true),
 
73
  });
74
 
75
  const modelsRaw = z.array(modelConfig).parse(JSON5.parse(env.MODELS));
 
17
 
18
  type Optional<T, K extends keyof T> = Pick<Partial<T>, K> & Omit<T, K>;
19
 
20
+ const reasoningSchema = z.union([
21
+ z.object({
22
+ type: z.literal("regex"), // everything is reasoning, extract the answer from the regex
23
+ regex: z.string(),
24
+ }),
25
+ z.object({
26
+ type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer
27
+ beginToken: z.string(),
28
+ endToken: z.string(),
29
+ }),
30
+ z.object({
31
+ type: z.literal("summarize"), // everything is reasoning, summarize the answer
32
+ }),
33
+ ]);
34
+
35
  const modelConfig = z.object({
36
  /** Used as an identifier in DB */
37
  id: z.string().optional(),
 
85
  embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
86
  /** Used to enable/disable system prompt usage */
87
  systemRoleSupported: z.boolean().default(true),
88
+ reasoning: reasoningSchema.optional(),
89
  });
90
 
91
  const modelsRaw = z.array(modelConfig).parse(JSON5.parse(env.MODELS));
src/lib/server/textGeneration/generate.ts CHANGED
@@ -1,8 +1,14 @@
1
  import type { ToolResult } from "$lib/types/Tool";
2
- import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
 
 
 
 
3
  import { AbortedGenerations } from "../abortedGenerations";
4
  import type { TextGenerationContext } from "./types";
5
  import type { EndpointMessage } from "../endpoints/endpoints";
 
 
6
 
7
  type GenerateContext = Omit<TextGenerationContext, "messages"> & { messages: EndpointMessage[] };
8
 
@@ -11,6 +17,26 @@ export async function* generate(
11
  toolResults: ToolResult[],
12
  preprompt?: string
13
  ): AsyncIterable<MessageUpdate> {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  for await (const output of await endpoint({
15
  messages,
16
  preprompt,
@@ -33,20 +59,102 @@ export async function* generate(
33
  text = text.slice(0, text.length - stopToken.length);
34
  }
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  yield {
37
  type: MessageUpdateType.FinalAnswer,
38
- text,
39
  interrupted,
40
  webSources: output.webSources,
41
  };
42
  continue;
43
  }
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  // ignore special tokens
46
  if (output.token.special) continue;
47
 
48
  // pass down normal token
49
- yield { type: MessageUpdateType.Stream, token: output.token.text };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  // abort check
52
  const date = AbortedGenerations.getInstance().getList().get(conv._id.toString());
 
1
  import type { ToolResult } from "$lib/types/Tool";
2
+ import {
3
+ MessageReasoningUpdateType,
4
+ MessageUpdateType,
5
+ type MessageUpdate,
6
+ } from "$lib/types/MessageUpdate";
7
  import { AbortedGenerations } from "../abortedGenerations";
8
  import type { TextGenerationContext } from "./types";
9
  import type { EndpointMessage } from "../endpoints/endpoints";
10
+ import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
11
+ import { generateSummaryOfReasoning } from "./reasoning";
12
 
13
  type GenerateContext = Omit<TextGenerationContext, "messages"> & { messages: EndpointMessage[] };
14
 
 
17
  toolResults: ToolResult[],
18
  preprompt?: string
19
  ): AsyncIterable<MessageUpdate> {
20
+ // reasoning mode is false by default
21
+ let reasoning = false;
22
+ let reasoningBuffer = "";
23
+ let lastReasoningUpdate = new Date();
24
+ let status = "";
25
+ const startTime = new Date();
26
+ if (
27
+ model.reasoning &&
28
+ (model.reasoning.type === "regex" || model.reasoning.type === "summarize")
29
+ ) {
30
+ // if the model has reasoning in regex or summarize mode, it starts in reasoning mode
31
+ // and we extract the answer from the reasoning
32
+ reasoning = true;
33
+ yield {
34
+ type: MessageUpdateType.Reasoning,
35
+ subtype: MessageReasoningUpdateType.Status,
36
+ status: "Started reasoning...",
37
+ };
38
+ }
39
+
40
  for await (const output of await endpoint({
41
  messages,
42
  preprompt,
 
59
  text = text.slice(0, text.length - stopToken.length);
60
  }
61
 
62
+ let finalAnswer = text;
63
+ if (model.reasoning && model.reasoning.type === "regex") {
64
+ const regex = new RegExp(model.reasoning.regex);
65
+ finalAnswer = regex.exec(reasoningBuffer)?.[1] ?? text;
66
+ } else if (model.reasoning && model.reasoning.type === "summarize") {
67
+ yield {
68
+ type: MessageUpdateType.Reasoning,
69
+ subtype: MessageReasoningUpdateType.Status,
70
+ status: "Summarizing reasoning...",
71
+ };
72
+ const summary = yield* generateFromDefaultEndpoint({
73
+ messages: [
74
+ {
75
+ from: "user",
76
+ content: `Question: ${
77
+ messages[messages.length - 1].content
78
+ }\n\nReasoning: ${reasoningBuffer}`,
79
+ },
80
+ ],
81
+ preprompt: `Your task is to summarize concisely all your reasoning steps and then give the final answer. Keep it short, one short paragraph at most. If the final solution includes code, make sure to include it in your answer.
82
+
83
+ If the user is just having a casual conversation that doesn't require explanations, answer directly without explaining your steps, otherwise make sure to summarize step by step, make sure to skip dead-ends in your reasoning and removing excess detail.
84
+
85
+ Do not use prefixes such as Response: or Answer: when answering to the user.`,
86
+ generateSettings: {
87
+ max_new_tokens: 1024,
88
+ },
89
+ });
90
+ finalAnswer = summary;
91
+ yield {
92
+ type: MessageUpdateType.Reasoning,
93
+ subtype: MessageReasoningUpdateType.Status,
94
+ status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
95
+ };
96
+ }
97
+
98
  yield {
99
  type: MessageUpdateType.FinalAnswer,
100
+ text: finalAnswer,
101
  interrupted,
102
  webSources: output.webSources,
103
  };
104
  continue;
105
  }
106
 
107
+ if (model.reasoning && model.reasoning.type === "tokens") {
108
+ if (output.token.text === model.reasoning.beginToken) {
109
+ reasoning = true;
110
+ reasoningBuffer += output.token.text;
111
+ yield {
112
+ type: MessageUpdateType.Reasoning,
113
+ subtype: MessageReasoningUpdateType.Status,
114
+ status: "Started thinking...",
115
+ };
116
+ } else if (output.token.text === model.reasoning.endToken) {
117
+ reasoning = false;
118
+ reasoningBuffer += output.token.text;
119
+ yield {
120
+ type: MessageUpdateType.Reasoning,
121
+ subtype: MessageReasoningUpdateType.Status,
122
+ status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
123
+ };
124
+ }
125
+ }
126
  // ignore special tokens
127
  if (output.token.special) continue;
128
 
129
  // pass down normal token
130
+ if (reasoning) {
131
+ reasoningBuffer += output.token.text;
132
+
133
+ // yield status update if it has changed
134
+ if (status !== "") {
135
+ yield {
136
+ type: MessageUpdateType.Reasoning,
137
+ subtype: MessageReasoningUpdateType.Status,
138
+ status,
139
+ };
140
+ status = "";
141
+ }
142
+
143
+ // create a new status every 5 seconds
144
+ if (new Date().getTime() - lastReasoningUpdate.getTime() > 4000) {
145
+ lastReasoningUpdate = new Date();
146
+ generateSummaryOfReasoning(reasoningBuffer).then((summary) => {
147
+ status = summary;
148
+ });
149
+ }
150
+ yield {
151
+ type: MessageUpdateType.Reasoning,
152
+ subtype: MessageReasoningUpdateType.Stream,
153
+ token: output.token.text,
154
+ };
155
+ } else {
156
+ yield { type: MessageUpdateType.Stream, token: output.token.text };
157
+ }
158
 
159
  // abort check
160
  const date = AbortedGenerations.getInstance().getList().get(conv._id.toString());
src/lib/server/textGeneration/reasoning.ts ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
2
+
3
+ import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
4
+
5
+ export async function generateSummaryOfReasoning(buffer: string): Promise<string> {
6
+ // debug 5s delay
7
+ await new Promise((resolve) => setTimeout(resolve, 3000));
8
+
9
+ const summary = await getReturnFromGenerator(
10
+ generateFromDefaultEndpoint({
11
+ messages: [
12
+ {
13
+ from: "user",
14
+ content: buffer.slice(-200),
15
+ },
16
+ ],
17
+ preprompt: `You are tasked with summarizing the latest reasoning steps. Never describe results of the reasoning, only the process. Remain vague in your summary.
18
+ The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
19
+ Example: "Thinking about life...", "Summarizing the results...", "Processing the input..."`,
20
+ generateSettings: {
21
+ max_new_tokens: 50,
22
+ },
23
+ })
24
+ ).then((summary) => {
25
+ const parts = summary.split("...");
26
+ return parts[0] + "...";
27
+ });
28
+
29
+ return summary;
30
+ }
src/lib/server/textGeneration/title.ts CHANGED
@@ -4,6 +4,7 @@ import type { EndpointMessage } from "../endpoints/endpoints";
4
  import { logger } from "$lib/server/logger";
5
  import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
6
  import type { Conversation } from "$lib/types/Conversation";
 
7
 
8
  export async function* generateTitleForConversation(
9
  conv: Conversation
@@ -55,14 +56,16 @@ export async function generateTitle(prompt: string) {
55
  { from: "user", content: prompt },
56
  ];
57
 
58
- return await generateFromDefaultEndpoint({
59
- messages,
60
- preprompt:
61
- "You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
62
- generateSettings: {
63
- max_new_tokens: 15,
64
- },
65
- })
 
 
66
  .then((summary) => {
67
  // add an emoji if none is found in the first three characters
68
  if (!/\p{Emoji}/u.test(summary.slice(0, 3))) {
 
4
  import { logger } from "$lib/server/logger";
5
  import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
6
  import type { Conversation } from "$lib/types/Conversation";
7
+ import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
8
 
9
  export async function* generateTitleForConversation(
10
  conv: Conversation
 
56
  { from: "user", content: prompt },
57
  ];
58
 
59
+ return await getReturnFromGenerator(
60
+ generateFromDefaultEndpoint({
61
+ messages,
62
+ preprompt:
63
+ "You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
64
+ generateSettings: {
65
+ max_new_tokens: 15,
66
+ },
67
+ })
68
+ )
69
  .then((summary) => {
70
  // add an emoji if none is found in the first three characters
71
  if (!/\p{Emoji}/u.test(summary.slice(0, 3))) {
src/lib/server/websearch/search/generateQuery.ts CHANGED
@@ -2,6 +2,7 @@ import type { Message } from "$lib/types/Message";
2
  import { format } from "date-fns";
3
  import type { EndpointMessage } from "../../endpoints/endpoints";
4
  import { generateFromDefaultEndpoint } from "../../generateFromDefaultEndpoint";
 
5
 
6
  export async function generateQuery(messages: Message[]) {
7
  const currentDate = format(new Date(), "MMMM d, yyyy");
@@ -62,13 +63,15 @@ Current Question: Where is it being hosted?`,
62
  },
63
  ];
64
 
65
- const webQuery = await generateFromDefaultEndpoint({
66
- messages: convQuery,
67
- preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
68
- generateSettings: {
69
- max_new_tokens: 30,
70
- },
71
- });
 
 
72
 
73
  return webQuery.trim();
74
  }
 
2
  import { format } from "date-fns";
3
  import type { EndpointMessage } from "../../endpoints/endpoints";
4
  import { generateFromDefaultEndpoint } from "../../generateFromDefaultEndpoint";
5
+ import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
6
 
7
  export async function generateQuery(messages: Message[]) {
8
  const currentDate = format(new Date(), "MMMM d, yyyy");
 
63
  },
64
  ];
65
 
66
+ const webQuery = await getReturnFromGenerator(
67
+ generateFromDefaultEndpoint({
68
+ messages: convQuery,
69
+ preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
70
+ generateSettings: {
71
+ max_new_tokens: 30,
72
+ },
73
+ })
74
+ );
75
 
76
  return webQuery.trim();
77
  }
src/lib/types/Message.ts CHANGED
@@ -10,6 +10,8 @@ export type Message = Partial<Timestamps> & {
10
  updates?: MessageUpdate[];
11
  webSearchId?: WebSearch["_id"]; // legacy version
12
  webSearch?: WebSearch;
 
 
13
  score?: -1 | 0 | 1;
14
  /**
15
  * Either contains the base64 encoded image data
 
10
  updates?: MessageUpdate[];
11
  webSearchId?: WebSearch["_id"]; // legacy version
12
  webSearch?: WebSearch;
13
+
14
+ reasoning?: string;
15
  score?: -1 | 0 | 1;
16
  /**
17
  * Either contains the base64 encoded image data
src/lib/types/MessageUpdate.ts CHANGED
@@ -8,7 +8,8 @@ export type MessageUpdate =
8
  | MessageWebSearchUpdate
9
  | MessageStreamUpdate
10
  | MessageFileUpdate
11
- | MessageFinalAnswerUpdate;
 
12
 
13
  export enum MessageUpdateType {
14
  Status = "status",
@@ -18,6 +19,7 @@ export enum MessageUpdateType {
18
  Stream = "stream",
19
  File = "file",
20
  FinalAnswer = "finalAnswer",
 
21
  }
22
 
23
  // Status
@@ -114,6 +116,25 @@ export interface MessageStreamUpdate {
114
  type: MessageUpdateType.Stream;
115
  token: string;
116
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  export interface MessageFileUpdate {
118
  type: MessageUpdateType.File;
119
  name: string;
 
8
  | MessageWebSearchUpdate
9
  | MessageStreamUpdate
10
  | MessageFileUpdate
11
+ | MessageFinalAnswerUpdate
12
+ | MessageReasoningUpdate;
13
 
14
  export enum MessageUpdateType {
15
  Status = "status",
 
19
  Stream = "stream",
20
  File = "file",
21
  FinalAnswer = "finalAnswer",
22
+ Reasoning = "reasoning",
23
  }
24
 
25
  // Status
 
116
  type: MessageUpdateType.Stream;
117
  token: string;
118
  }
119
+
120
+ export enum MessageReasoningUpdateType {
121
+ Stream = "stream",
122
+ Status = "status",
123
+ }
124
+
125
+ export type MessageReasoningUpdate = MessageReasoningStreamUpdate | MessageReasoningStatusUpdate;
126
+
127
+ export interface MessageReasoningStreamUpdate {
128
+ type: MessageUpdateType.Reasoning;
129
+ subtype: MessageReasoningUpdateType.Stream;
130
+ token: string;
131
+ }
132
+ export interface MessageReasoningStatusUpdate {
133
+ type: MessageUpdateType.Reasoning;
134
+ subtype: MessageReasoningUpdateType.Status;
135
+ status: string;
136
+ }
137
+
138
  export interface MessageFileUpdate {
139
  type: MessageUpdateType.File;
140
  name: string;
src/lib/utils/getReturnFromGenerator.ts ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ export async function getReturnFromGenerator<T, R>(generator: AsyncGenerator<T, R>): Promise<R> {
2
+ let result: IteratorResult<T, R>;
3
+ do {
4
+ result = await generator.next();
5
+ } while (!result.done); // Keep calling `next()` until `done` is true
6
+ return result.value; // Return the final value
7
+ }
src/routes/conversation/[id]/+page.svelte CHANGED
@@ -12,9 +12,9 @@
12
  import { webSearchParameters } from "$lib/stores/webSearchParameters";
13
  import type { Message } from "$lib/types/Message";
14
  import {
 
15
  MessageUpdateStatus,
16
  MessageUpdateType,
17
- type MessageUpdate,
18
  } from "$lib/types/MessageUpdate";
19
  import titleUpdate from "$lib/stores/titleUpdate";
20
  import file2base64 from "$lib/utils/file2base64";
@@ -215,8 +215,6 @@
215
 
216
  files = [];
217
 
218
- const messageUpdates: MessageUpdate[] = [];
219
-
220
  for await (const update of messageUpdatesIterator) {
221
  if ($isAborted) {
222
  messageUpdatesAbortController.abort();
@@ -229,7 +227,7 @@
229
  update.token = update.token.replaceAll("\0", "");
230
  }
231
 
232
- messageUpdates.push(update);
233
 
234
  if (update.type === MessageUpdateType.Stream && !$settings.disableStream) {
235
  messageToWriteTo.content += update.token;
@@ -239,7 +237,6 @@
239
  update.type === MessageUpdateType.WebSearch ||
240
  update.type === MessageUpdateType.Tool
241
  ) {
242
- messageToWriteTo.updates = [...(messageToWriteTo.updates ?? []), update];
243
  messages = [...messages];
244
  } else if (
245
  update.type === MessageUpdateType.Status &&
@@ -262,10 +259,18 @@
262
  { type: "hash", value: update.sha, mime: update.mime, name: update.name },
263
  ];
264
  messages = [...messages];
 
 
 
 
 
 
 
 
 
 
265
  }
266
  }
267
-
268
- messageToWriteTo.updates = messageUpdates;
269
  } catch (err) {
270
  if (err instanceof Error && err.message.includes("overloaded")) {
271
  $error = "Too much traffic, please try again.";
 
12
  import { webSearchParameters } from "$lib/stores/webSearchParameters";
13
  import type { Message } from "$lib/types/Message";
14
  import {
15
+ MessageReasoningUpdateType,
16
  MessageUpdateStatus,
17
  MessageUpdateType,
 
18
  } from "$lib/types/MessageUpdate";
19
  import titleUpdate from "$lib/stores/titleUpdate";
20
  import file2base64 from "$lib/utils/file2base64";
 
215
 
216
  files = [];
217
 
 
 
218
  for await (const update of messageUpdatesIterator) {
219
  if ($isAborted) {
220
  messageUpdatesAbortController.abort();
 
227
  update.token = update.token.replaceAll("\0", "");
228
  }
229
 
230
+ messageToWriteTo.updates = [...(messageToWriteTo.updates ?? []), update];
231
 
232
  if (update.type === MessageUpdateType.Stream && !$settings.disableStream) {
233
  messageToWriteTo.content += update.token;
 
237
  update.type === MessageUpdateType.WebSearch ||
238
  update.type === MessageUpdateType.Tool
239
  ) {
 
240
  messages = [...messages];
241
  } else if (
242
  update.type === MessageUpdateType.Status &&
 
259
  { type: "hash", value: update.sha, mime: update.mime, name: update.name },
260
  ];
261
  messages = [...messages];
262
+ } else if (update.type === MessageUpdateType.Reasoning) {
263
+ if (!messageToWriteTo.reasoning) {
264
+ messageToWriteTo.reasoning = "";
265
+ }
266
+ if (update.subtype === MessageReasoningUpdateType.Stream) {
267
+ messageToWriteTo.reasoning += update.token;
268
+ } else {
269
+ messageToWriteTo.updates = [...(messageToWriteTo.updates ?? []), update];
270
+ }
271
+ messages = [...messages];
272
  }
273
  }
 
 
274
  } catch (err) {
275
  if (err instanceof Error && err.message.includes("overloaded")) {
276
  $error = "Too much traffic, please try again.";
src/routes/conversation/[id]/+server.ts CHANGED
@@ -9,6 +9,7 @@ import { error } from "@sveltejs/kit";
9
  import { ObjectId } from "mongodb";
10
  import { z } from "zod";
11
  import {
 
12
  MessageUpdateStatus,
13
  MessageUpdateType,
14
  type MessageUpdate,
@@ -355,6 +356,12 @@ export async function POST({ request, locals, params, getClientAddress }) {
355
  Date.now() - (lastTokenTimestamp ?? promptedAt).getTime()
356
  );
357
  lastTokenTimestamp = new Date();
 
 
 
 
 
 
358
  }
359
 
360
  // Set the title
@@ -392,6 +399,10 @@ export async function POST({ request, locals, params, getClientAddress }) {
392
  !(
393
  event.type === MessageUpdateType.Status &&
394
  event.status === MessageUpdateStatus.KeepAlive
 
 
 
 
395
  )
396
  ) {
397
  messageToWriteTo?.updates?.push(event);
 
9
  import { ObjectId } from "mongodb";
10
  import { z } from "zod";
11
  import {
12
+ MessageReasoningUpdateType,
13
  MessageUpdateStatus,
14
  MessageUpdateType,
15
  type MessageUpdate,
 
356
  Date.now() - (lastTokenTimestamp ?? promptedAt).getTime()
357
  );
358
  lastTokenTimestamp = new Date();
359
+ } else if (
360
+ event.type === MessageUpdateType.Reasoning &&
361
+ event.subtype === MessageReasoningUpdateType.Stream
362
+ ) {
363
+ messageToWriteTo.reasoning ??= "";
364
+ messageToWriteTo.reasoning += event.token;
365
  }
366
 
367
  // Set the title
 
399
  !(
400
  event.type === MessageUpdateType.Status &&
401
  event.status === MessageUpdateStatus.KeepAlive
402
+ ) &&
403
+ !(
404
+ event.type === MessageUpdateType.Reasoning &&
405
+ event.subtype === MessageReasoningUpdateType.Stream
406
  )
407
  ) {
408
  messageToWriteTo?.updates?.push(event);