Spaces:

jdelavande
/

chat-ui-energy

Running on CPU Upgrade

App Files Files Community

nsarrazin HF Staff commited on Dec 2, 2024

Commit

bf75aa7

unverified ·

1 Parent(s): 85b38e3

feat: UI for advanced reasoning models (#1605)

Browse files

* feat: add a reasoning dropdown for CoT models

* feat: add status updates

* fix: various cleanups
- pass content & status to result dropdown
- dont store streaming updates in db
- make status generation non blocking

* fix: make sure not to push reasoning token stream to db

* feat: add time indicator and make the ui match websearch

* fix: change in status update & prompt

Files changed (16) hide show

chart/env/prod.yaml +3 -0
src/lib/components/OpenWebSearchResults.svelte +1 -2
src/lib/components/chat/ChatMessage.svelte +29 -6
src/lib/components/chat/MarkdownRenderer.svelte +11 -15
src/lib/components/chat/OpenReasoningResults.svelte +74 -0
src/lib/server/generateFromDefaultEndpoint.ts +7 -2
src/lib/server/models.ts +16 -0
src/lib/server/textGeneration/generate.ts +111 -3
src/lib/server/textGeneration/reasoning.ts +30 -0
src/lib/server/textGeneration/title.ts +11 -8
src/lib/server/websearch/search/generateQuery.ts +10 -7
src/lib/types/Message.ts +2 -0
src/lib/types/MessageUpdate.ts +22 -1
src/lib/utils/getReturnFromGenerator.ts +7 -0
src/routes/conversation/[id]/+page.svelte +12 -7
src/routes/conversation/[id]/+server.ts +11 -0

chart/env/prod.yaml CHANGED Viewed

@@ -144,6 +144,9 @@ envVars:
         "websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
         "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
         "description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
         "parameters": {
           "stop": ["<|im_end|>"],
           "truncate": 12288,

         "websiteUrl": "https://qwenlm.github.io/blog/qwq-32b-preview/",
         "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/qwen-logo.png",
         "description": "QwQ is an experiment model from the Qwen Team with advanced reasoning capabilities.",
+        "reasoning": {
+          "type": "summarize"
+        },
         "parameters": {
           "stop": ["<|im_end|>"],
           "truncate": 12288,

src/lib/components/OpenWebSearchResults.svelte CHANGED Viewed

@@ -9,7 +9,6 @@
 	import EosIconsLoading from "~icons/eos-icons/loading";
 	import IconInternet from "./icons/IconInternet.svelte";
-	export let classNames = "";
 	export let webSearchMessages: MessageWebSearchUpdate[] = [];
 	$: sources = webSearchMessages.find(isMessageWebSearchSourcesUpdate)?.sources;
@@ -23,7 +22,7 @@
 </script>
 <details
-	class="flex w-fit rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900 {classNames} max-w-full"
 >
 	<summary class="grid min-w-72 select-none grid-cols-[40px,1fr] items-center gap-2.5 p-2">
 		<div

 	import EosIconsLoading from "~icons/eos-icons/loading";
 	import IconInternet from "./icons/IconInternet.svelte";
 	export let webSearchMessages: MessageWebSearchUpdate[] = [];
 	$: sources = webSearchMessages.find(isMessageWebSearchSourcesUpdate)?.sources;
 </script>
 <details
+	class="flex w-fit max-w-full rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900"
 >
 	<summary class="grid min-w-72 select-none grid-cols-[40px,1fr] items-center gap-2.5 p-2">
 		<div

src/lib/components/chat/ChatMessage.svelte CHANGED Viewed

@@ -25,6 +25,8 @@
 		type MessageWebSearchSourcesUpdate,
 		type MessageWebSearchUpdate,
 		type MessageFinalAnswerUpdate,
 	} from "$lib/types/MessageUpdate";
 	import { base } from "$app/paths";
 	import { useConvTreeStore } from "$lib/stores/convTree";
@@ -33,6 +35,7 @@
 	import { enhance } from "$app/forms";
 	import { browser } from "$app/environment";
 	import MarkdownRenderer from "./MarkdownRenderer.svelte";
 	export let model: Model;
 	export let id: Message["id"];
@@ -90,9 +93,13 @@
 		}
 	}
-	$: searchUpdates = (message.updates?.filter(({ type }) => type === "webSearch") ??
 		[]) as MessageWebSearchUpdate[];
 	$: messageFinalAnswer = message.updates?.find(
 		({ type }) => type === MessageUpdateType.FinalAnswer
 	) as MessageFinalAnswerUpdate;
@@ -208,9 +215,17 @@
 				</div>
 			{/if}
 			{#if searchUpdates && searchUpdates.length > 0}
-				<OpenWebSearchResults
-					classNames={message.content.length ? "mb-3.5" : ""}
-					webSearchMessages={searchUpdates}
 				/>
 			{/if}
@@ -224,11 +239,19 @@
 				{/each}
 			{/if}
-			<div bind:this={contentEl}>
 				{#if isLast && loading && $settings.disableStream}
 					<IconLoading classNames="loading inline ml-2 first:ml-0" />
 				{/if}
-				<MarkdownRenderer content={message.content} sources={webSearchSources} />
 			</div>
 			<!-- Web Search sources -->

 		type MessageWebSearchSourcesUpdate,
 		type MessageWebSearchUpdate,
 		type MessageFinalAnswerUpdate,
+		type MessageReasoningUpdate,
+		MessageReasoningUpdateType,
 	} from "$lib/types/MessageUpdate";
 	import { base } from "$app/paths";
 	import { useConvTreeStore } from "$lib/stores/convTree";
 	import { enhance } from "$app/forms";
 	import { browser } from "$app/environment";
 	import MarkdownRenderer from "./MarkdownRenderer.svelte";
+	import OpenReasoningResults from "./OpenReasoningResults.svelte";
 	export let model: Model;
 	export let id: Message["id"];
 		}
 	}
+	$: searchUpdates = (message.updates?.filter(({ type }) => type === MessageUpdateType.WebSearch) ??
 		[]) as MessageWebSearchUpdate[];
+	$: reasoningUpdates = (message.updates?.filter(
+		({ type }) => type === MessageUpdateType.Reasoning
+	) ?? []) as MessageReasoningUpdate[];
 	$: messageFinalAnswer = message.updates?.find(
 		({ type }) => type === MessageUpdateType.FinalAnswer
 	) as MessageFinalAnswerUpdate;
 				</div>
 			{/if}
 			{#if searchUpdates && searchUpdates.length > 0}
+				<OpenWebSearchResults webSearchMessages={searchUpdates} />
+			{/if}
+			{#if reasoningUpdates && reasoningUpdates.length > 0}
+				{@const summaries = reasoningUpdates
+					.filter((u) => u.subtype === MessageReasoningUpdateType.Status)
+					.map((u) => u.status)}
+				<OpenReasoningResults
+					summary={summaries[summaries.length - 1] || ""}
+					content={message.reasoning || ""}
+					loading={loading && message.content.length === 0}
 				/>
 			{/if}
 				{/each}
 			{/if}
+			<div
+				bind:this={contentEl}
+				class:mt-2={reasoningUpdates.length > 0 || searchUpdates.length > 0}
+			>
 				{#if isLast && loading && $settings.disableStream}
 					<IconLoading classNames="loading inline ml-2 first:ml-0" />
 				{/if}
+				<div
+					class="prose max-w-none dark:prose-invert max-sm:prose-sm prose-headings:font-semibold prose-h1:text-lg prose-h2:text-base prose-h3:text-base prose-pre:bg-gray-800 dark:prose-pre:bg-gray-900"
+				>
+					<MarkdownRenderer content={message.content} sources={webSearchSources} />
+				</div>
 			</div>
 			<!-- Web Search sources -->

src/lib/components/chat/MarkdownRenderer.svelte CHANGED Viewed

@@ -106,21 +106,17 @@
 	});
 </script>
-<div
-	class="prose max-w-none dark:prose-invert max-sm:prose-sm prose-headings:font-semibold prose-h1:text-lg prose-h2:text-base prose-h3:text-base prose-pre:bg-gray-800 dark:prose-pre:bg-gray-900"
->
-	{#each tokens as token}
-		{#if token.type === "code"}
-			<CodeBlock lang={token.lang} code={token.text} />
-		{:else}
-			{@const parsed = marked.parse(processLatex(escapeHTML(token.raw)), options)}
-			{#await parsed then parsed}
-				<!-- eslint-disable-next-line svelte/no-at-html-tags -->
-				{@html DOMPurify.sanitize(parsed)}
-			{/await}
-		{/if}
-	{/each}
-</div>
 <style lang="postcss">
 	:global(.katex-display) {

 	});
 </script>
+{#each tokens as token}
+	{#if token.type === "code"}
+		<CodeBlock lang={token.lang} code={token.text} />
+	{:else}
+		{@const parsed = marked.parse(processLatex(escapeHTML(token.raw)), options)}
+		{#await parsed then parsed}
+			<!-- eslint-disable-next-line svelte/no-at-html-tags -->
+			{@html DOMPurify.sanitize(parsed)}
+		{/await}
+	{/if}
+{/each}
 <style lang="postcss">
 	:global(.katex-display) {

src/lib/components/chat/OpenReasoningResults.svelte ADDED Viewed

	@@ -0,0 +1,74 @@

+<script lang="ts">
+	import IconThought from "~icons/carbon/circle-packing";
+	import MarkdownRenderer from "./MarkdownRenderer.svelte";
+	export let summary: string;
+	export let content: string;
+	export let loading: boolean = false;
+</script>
+<details
+	class="u flex w-fit max-w-full rounded-xl border border-gray-200 bg-white shadow-sm dark:border-gray-800 dark:bg-gray-900"
+>
+	<summary
+		class="grid min-w-72 cursor-pointer select-none grid-cols-[40px,1fr] items-center gap-2.5 p-2"
+	>
+		<div
+			class="relative grid aspect-square place-content-center overflow-hidden rounded-lg bg-gray-100 dark:bg-gray-800"
+		>
+			<svg
+				class="absolute inset-0 text-gray-300 transition-opacity dark:text-gray-700 {loading
+					? 'opacity-100'
+					: 'opacity-0'}"
+				width="40"
+				height="40"
+				viewBox="0 0 38 38"
+				fill="none"
+				xmlns="http://www.w3.org/2000/svg"
+			>
+				<path
+					class="loading-path"
+					d="M8 2.5H30C30 2.5 35.5 2.5 35.5 8V30C35.5 30 35.5 35.5 30 35.5H8C8 35.5 2.5 35.5 2.5 30V8C2.5 8 2.5 2.5 8 2.5Z"
+					stroke="currentColor"
+					stroke-width="1"
+					stroke-linecap="round"
+					id="shape"
+				/>
+			</svg>
+			<IconThought class="text-[1rem]" />
+		</div>
+		<dl class="leading-4">
+			<dd class="text-sm">Reasoning</dd>
+			<dt
+				class="flex items-center gap-1 truncate whitespace-nowrap text-[.82rem] text-gray-400"
+				class:animate-pulse={loading}
+			>
+				{summary}
+			</dt>
+		</dl>
+	</summary>
+	<div
+		class="border-t border-gray-200 px-5 pb-2 pt-2 text-sm text-gray-600 dark:border-gray-800 dark:text-gray-400"
+	>
+		<MarkdownRenderer {content} />
+	</div>
+</details>
+<style>
+	details summary::-webkit-details-marker {
+		display: none;
+	}
+	.loading-path {
+		stroke-dasharray: 61.45;
+		animation: loading 2s linear infinite;
+	}
+	@keyframes loading {
+		to {
+			stroke-dashoffset: 122.9;
+		}
+	}
+</style>

src/lib/server/generateFromDefaultEndpoint.ts CHANGED Viewed

@@ -1,7 +1,8 @@
 import { smallModel } from "$lib/server/models";
 import type { EndpointMessage } from "./endpoints/endpoints";
-export async function generateFromDefaultEndpoint({
 	messages,
 	preprompt,
 	generateSettings,
@@ -9,7 +10,7 @@ export async function generateFromDefaultEndpoint({
 	messages: EndpointMessage[];
 	preprompt?: string;
 	generateSettings?: Record<string, unknown>;
-}): Promise<string> {
 	const endpoint = await smallModel.getEndpoint();
 	const tokenStream = await endpoint({ messages, preprompt, generateSettings });
@@ -25,6 +26,10 @@ export async function generateFromDefaultEndpoint({
 			}
 			return generated_text;
 		}
 	}
 	throw new Error("Generation failed");
 }

 import { smallModel } from "$lib/server/models";
+import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
 import type { EndpointMessage } from "./endpoints/endpoints";
+export async function* generateFromDefaultEndpoint({
 	messages,
 	preprompt,
 	generateSettings,
 	messages: EndpointMessage[];
 	preprompt?: string;
 	generateSettings?: Record<string, unknown>;
+}): AsyncGenerator<MessageUpdate, string, undefined> {
 	const endpoint = await smallModel.getEndpoint();
 	const tokenStream = await endpoint({ messages, preprompt, generateSettings });
 			}
 			return generated_text;
 		}
+		yield {
+			type: MessageUpdateType.Stream,
+			token: output.token.text,
+		};
 	}
 	throw new Error("Generation failed");
 }

src/lib/server/models.ts CHANGED Viewed

@@ -17,6 +17,21 @@ import { isHuggingChat } from "$lib/utils/isHuggingChat";
 type Optional<T, K extends keyof T> = Pick<Partial<T>, K> & Omit<T, K>;
 const modelConfig = z.object({
 	/** Used as an identifier in DB */
 	id: z.string().optional(),
@@ -70,6 +85,7 @@ const modelConfig = z.object({
 	embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
 	/** Used to enable/disable system prompt usage */
 	systemRoleSupported: z.boolean().default(true),
 });
 const modelsRaw = z.array(modelConfig).parse(JSON5.parse(env.MODELS));

 type Optional<T, K extends keyof T> = Pick<Partial<T>, K> & Omit<T, K>;
+const reasoningSchema = z.union([
+	z.object({
+		type: z.literal("regex"), // everything is reasoning, extract the answer from the regex
+		regex: z.string(),
+	}),
+	z.object({
+		type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer
+		beginToken: z.string(),
+		endToken: z.string(),
+	}),
+	z.object({
+		type: z.literal("summarize"), // everything is reasoning, summarize the answer
+	}),
+]);
 const modelConfig = z.object({
 	/** Used as an identifier in DB */
 	id: z.string().optional(),
 	embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
 	/** Used to enable/disable system prompt usage */
 	systemRoleSupported: z.boolean().default(true),
+	reasoning: reasoningSchema.optional(),
 });
 const modelsRaw = z.array(modelConfig).parse(JSON5.parse(env.MODELS));

src/lib/server/textGeneration/generate.ts CHANGED Viewed

@@ -1,8 +1,14 @@
 import type { ToolResult } from "$lib/types/Tool";
-import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
 import { AbortedGenerations } from "../abortedGenerations";
 import type { TextGenerationContext } from "./types";
 import type { EndpointMessage } from "../endpoints/endpoints";
 type GenerateContext = Omit<TextGenerationContext, "messages"> & { messages: EndpointMessage[] };
@@ -11,6 +17,26 @@ export async function* generate(
 	toolResults: ToolResult[],
 	preprompt?: string
 ): AsyncIterable<MessageUpdate> {
 	for await (const output of await endpoint({
 		messages,
 		preprompt,
@@ -33,20 +59,102 @@ export async function* generate(
 				text = text.slice(0, text.length - stopToken.length);
 			}
 			yield {
 				type: MessageUpdateType.FinalAnswer,
-				text,
 				interrupted,
 				webSources: output.webSources,
 			};
 			continue;
 		}
 		// ignore special tokens
 		if (output.token.special) continue;
 		// pass down normal token
-		yield { type: MessageUpdateType.Stream, token: output.token.text };
 		// abort check
 		const date = AbortedGenerations.getInstance().getList().get(conv._id.toString());

 import type { ToolResult } from "$lib/types/Tool";
+import {
+	MessageReasoningUpdateType,
+	MessageUpdateType,
+	type MessageUpdate,
+} from "$lib/types/MessageUpdate";
 import { AbortedGenerations } from "../abortedGenerations";
 import type { TextGenerationContext } from "./types";
 import type { EndpointMessage } from "../endpoints/endpoints";
+import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
+import { generateSummaryOfReasoning } from "./reasoning";
 type GenerateContext = Omit<TextGenerationContext, "messages"> & { messages: EndpointMessage[] };
 	toolResults: ToolResult[],
 	preprompt?: string
 ): AsyncIterable<MessageUpdate> {
+	// reasoning mode is false by default
+	let reasoning = false;
+	let reasoningBuffer = "";
+	let lastReasoningUpdate = new Date();
+	let status = "";
+	const startTime = new Date();
+	if (
+		model.reasoning &&
+		(model.reasoning.type === "regex" || model.reasoning.type === "summarize")
+	) {
+		// if the model has reasoning in regex or summarize mode, it starts in reasoning mode
+		// and we extract the answer from the reasoning
+		reasoning = true;
+		yield {
+			type: MessageUpdateType.Reasoning,
+			subtype: MessageReasoningUpdateType.Status,
+			status: "Started reasoning...",
+		};
+	}
 	for await (const output of await endpoint({
 		messages,
 		preprompt,
 				text = text.slice(0, text.length - stopToken.length);
 			}
+			let finalAnswer = text;
+			if (model.reasoning && model.reasoning.type === "regex") {
+				const regex = new RegExp(model.reasoning.regex);
+				finalAnswer = regex.exec(reasoningBuffer)?.[1] ?? text;
+			} else if (model.reasoning && model.reasoning.type === "summarize") {
+				yield {
+					type: MessageUpdateType.Reasoning,
+					subtype: MessageReasoningUpdateType.Status,
+					status: "Summarizing reasoning...",
+				};
+				const summary = yield* generateFromDefaultEndpoint({
+					messages: [
+						{
+							from: "user",
+							content: `Question: ${
+								messages[messages.length - 1].content
+							}\n\nReasoning: ${reasoningBuffer}`,
+						},
+					],
+					preprompt: `Your task is to summarize concisely all your reasoning steps and then give the final answer. Keep it short, one short paragraph at most. If the final solution includes code, make sure to include it in your answer.
+If the user is just having a casual conversation that doesn't require explanations, answer directly without explaining your steps, otherwise make sure to summarize step by step, make sure to skip dead-ends in your reasoning and removing excess detail.
+Do not use prefixes such as Response: or Answer: when answering to the user.`,
+					generateSettings: {
+						max_new_tokens: 1024,
+					},
+				});
+				finalAnswer = summary;
+				yield {
+					type: MessageUpdateType.Reasoning,
+					subtype: MessageReasoningUpdateType.Status,
+					status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
+				};
+			}
 			yield {
 				type: MessageUpdateType.FinalAnswer,
+				text: finalAnswer,
 				interrupted,
 				webSources: output.webSources,
 			};
 			continue;
 		}
+		if (model.reasoning && model.reasoning.type === "tokens") {
+			if (output.token.text === model.reasoning.beginToken) {
+				reasoning = true;
+				reasoningBuffer += output.token.text;
+				yield {
+					type: MessageUpdateType.Reasoning,
+					subtype: MessageReasoningUpdateType.Status,
+					status: "Started thinking...",
+				};
+			} else if (output.token.text === model.reasoning.endToken) {
+				reasoning = false;
+				reasoningBuffer += output.token.text;
+				yield {
+					type: MessageUpdateType.Reasoning,
+					subtype: MessageReasoningUpdateType.Status,
+					status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
+				};
+			}
+		}
 		// ignore special tokens
 		if (output.token.special) continue;
 		// pass down normal token
+		if (reasoning) {
+			reasoningBuffer += output.token.text;
+			// yield status update if it has changed
+			if (status !== "") {
+				yield {
+					type: MessageUpdateType.Reasoning,
+					subtype: MessageReasoningUpdateType.Status,
+					status,
+				};
+				status = "";
+			}
+			// create a new status every 5 seconds
+			if (new Date().getTime() - lastReasoningUpdate.getTime() > 4000) {
+				lastReasoningUpdate = new Date();
+				generateSummaryOfReasoning(reasoningBuffer).then((summary) => {
+					status = summary;
+				});
+			}
+			yield {
+				type: MessageUpdateType.Reasoning,
+				subtype: MessageReasoningUpdateType.Stream,
+				token: output.token.text,
+			};
+		} else {
+			yield { type: MessageUpdateType.Stream, token: output.token.text };
+		}
 		// abort check
 		const date = AbortedGenerations.getInstance().getList().get(conv._id.toString());

src/lib/server/textGeneration/reasoning.ts ADDED Viewed

	@@ -0,0 +1,30 @@

+import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
+import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
+export async function generateSummaryOfReasoning(buffer: string): Promise<string> {
+	// debug 5s delay
+	await new Promise((resolve) => setTimeout(resolve, 3000));
+	const summary = await getReturnFromGenerator(
+		generateFromDefaultEndpoint({
+			messages: [
+				{
+					from: "user",
+					content: buffer.slice(-200),
+				},
+			],
+			preprompt: `You are tasked with summarizing the latest reasoning steps. Never describe results of the reasoning, only the process. Remain vague in your summary.
+            The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
+            Example: "Thinking about life...", "Summarizing the results...", "Processing the input..."`,
+			generateSettings: {
+				max_new_tokens: 50,
+			},
+		})
+	).then((summary) => {
+		const parts = summary.split("...");
+		return parts[0] + "...";
+	});
+	return summary;
+}

src/lib/server/textGeneration/title.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import type { EndpointMessage } from "../endpoints/endpoints";
 import { logger } from "$lib/server/logger";
 import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
 import type { Conversation } from "$lib/types/Conversation";
 export async function* generateTitleForConversation(
 	conv: Conversation
@@ -55,14 +56,16 @@ export async function generateTitle(prompt: string) {
 		{ from: "user", content: prompt },
 	];
-	return await generateFromDefaultEndpoint({
-		messages,
-		preprompt:
-			"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
-		generateSettings: {
-			max_new_tokens: 15,
-		},
-	})
 		.then((summary) => {
 			// add an emoji if none is found in the first three characters
 			if (!/\p{Emoji}/u.test(summary.slice(0, 3))) {

 import { logger } from "$lib/server/logger";
 import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
 import type { Conversation } from "$lib/types/Conversation";
+import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
 export async function* generateTitleForConversation(
 	conv: Conversation
 		{ from: "user", content: prompt },
 	];
+	return await getReturnFromGenerator(
+		generateFromDefaultEndpoint({
+			messages,
+			preprompt:
+				"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
+			generateSettings: {
+				max_new_tokens: 15,
+			},
+		})
+	)
 		.then((summary) => {
 			// add an emoji if none is found in the first three characters
 			if (!/\p{Emoji}/u.test(summary.slice(0, 3))) {

src/lib/server/websearch/search/generateQuery.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import type { Message } from "$lib/types/Message";
 import { format } from "date-fns";
 import type { EndpointMessage } from "../../endpoints/endpoints";
 import { generateFromDefaultEndpoint } from "../../generateFromDefaultEndpoint";
 export async function generateQuery(messages: Message[]) {
 	const currentDate = format(new Date(), "MMMM d, yyyy");
@@ -62,13 +63,15 @@ Current Question: Where is it being hosted?`,
 		},
 	];
-	const webQuery = await generateFromDefaultEndpoint({
-		messages: convQuery,
-		preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
-		generateSettings: {
-			max_new_tokens: 30,
-		},
-	});
 	return webQuery.trim();
 }

 import { format } from "date-fns";
 import type { EndpointMessage } from "../../endpoints/endpoints";
 import { generateFromDefaultEndpoint } from "../../generateFromDefaultEndpoint";
+import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
 export async function generateQuery(messages: Message[]) {
 	const currentDate = format(new Date(), "MMMM d, yyyy");
 		},
 	];
+	const webQuery = await getReturnFromGenerator(
+		generateFromDefaultEndpoint({
+			messages: convQuery,
+			preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
+			generateSettings: {
+				max_new_tokens: 30,
+			},
+		})
+	);
 	return webQuery.trim();
 }

src/lib/types/Message.ts CHANGED Viewed

@@ -10,6 +10,8 @@ export type Message = Partial<Timestamps> & {
 	updates?: MessageUpdate[];
 	webSearchId?: WebSearch["_id"]; // legacy version
 	webSearch?: WebSearch;
 	score?: -1 | 0 | 1;
 	/**
 	 * Either contains the base64 encoded image data

 	updates?: MessageUpdate[];
 	webSearchId?: WebSearch["_id"]; // legacy version
 	webSearch?: WebSearch;
+	reasoning?: string;
 	score?: -1 | 0 | 1;
 	/**
 	 * Either contains the base64 encoded image data

src/lib/types/MessageUpdate.ts CHANGED Viewed

@@ -8,7 +8,8 @@ export type MessageUpdate =
 	| MessageWebSearchUpdate
 	| MessageStreamUpdate
 	| MessageFileUpdate
-	| MessageFinalAnswerUpdate;
 export enum MessageUpdateType {
 	Status = "status",
@@ -18,6 +19,7 @@ export enum MessageUpdateType {
 	Stream = "stream",
 	File = "file",
 	FinalAnswer = "finalAnswer",
 }
 // Status
@@ -114,6 +116,25 @@ export interface MessageStreamUpdate {
 	type: MessageUpdateType.Stream;
 	token: string;
 }
 export interface MessageFileUpdate {
 	type: MessageUpdateType.File;
 	name: string;

 	| MessageWebSearchUpdate
 	| MessageStreamUpdate
 	| MessageFileUpdate
+	| MessageFinalAnswerUpdate
+	| MessageReasoningUpdate;
 export enum MessageUpdateType {
 	Status = "status",
 	Stream = "stream",
 	File = "file",
 	FinalAnswer = "finalAnswer",
+	Reasoning = "reasoning",
 }
 // Status
 	type: MessageUpdateType.Stream;
 	token: string;
 }
+export enum MessageReasoningUpdateType {
+	Stream = "stream",
+	Status = "status",
+}
+export type MessageReasoningUpdate = MessageReasoningStreamUpdate | MessageReasoningStatusUpdate;
+export interface MessageReasoningStreamUpdate {
+	type: MessageUpdateType.Reasoning;
+	subtype: MessageReasoningUpdateType.Stream;
+	token: string;
+}
+export interface MessageReasoningStatusUpdate {
+	type: MessageUpdateType.Reasoning;
+	subtype: MessageReasoningUpdateType.Status;
+	status: string;
+}
 export interface MessageFileUpdate {
 	type: MessageUpdateType.File;
 	name: string;

src/lib/utils/getReturnFromGenerator.ts ADDED Viewed

	@@ -0,0 +1,7 @@

+export async function getReturnFromGenerator<T, R>(generator: AsyncGenerator<T, R>): Promise<R> {
+	let result: IteratorResult<T, R>;
+	do {
+		result = await generator.next();
+	} while (!result.done); // Keep calling `next()` until `done` is true
+	return result.value; // Return the final value
+}

src/routes/conversation/[id]/+page.svelte CHANGED Viewed

@@ -12,9 +12,9 @@
 	import { webSearchParameters } from "$lib/stores/webSearchParameters";
 	import type { Message } from "$lib/types/Message";
 	import {
 		MessageUpdateStatus,
 		MessageUpdateType,
-		type MessageUpdate,
 	} from "$lib/types/MessageUpdate";
 	import titleUpdate from "$lib/stores/titleUpdate";
 	import file2base64 from "$lib/utils/file2base64";
@@ -215,8 +215,6 @@
 			files = [];
-			const messageUpdates: MessageUpdate[] = [];
 			for await (const update of messageUpdatesIterator) {
 				if ($isAborted) {
 					messageUpdatesAbortController.abort();
@@ -229,7 +227,7 @@
 					update.token = update.token.replaceAll("\0", "");
 				}
-				messageUpdates.push(update);
 				if (update.type === MessageUpdateType.Stream && !$settings.disableStream) {
 					messageToWriteTo.content += update.token;
@@ -239,7 +237,6 @@
 					update.type === MessageUpdateType.WebSearch ||
 					update.type === MessageUpdateType.Tool
 				) {
-					messageToWriteTo.updates = [...(messageToWriteTo.updates ?? []), update];
 					messages = [...messages];
 				} else if (
 					update.type === MessageUpdateType.Status &&
@@ -262,10 +259,18 @@
 						{ type: "hash", value: update.sha, mime: update.mime, name: update.name },
 					];
 					messages = [...messages];
 				}
 			}
-			messageToWriteTo.updates = messageUpdates;
 		} catch (err) {
 			if (err instanceof Error && err.message.includes("overloaded")) {
 				$error = "Too much traffic, please try again.";

 	import { webSearchParameters } from "$lib/stores/webSearchParameters";
 	import type { Message } from "$lib/types/Message";
 	import {
+		MessageReasoningUpdateType,
 		MessageUpdateStatus,
 		MessageUpdateType,
 	} from "$lib/types/MessageUpdate";
 	import titleUpdate from "$lib/stores/titleUpdate";
 	import file2base64 from "$lib/utils/file2base64";
 			files = [];
 			for await (const update of messageUpdatesIterator) {
 				if ($isAborted) {
 					messageUpdatesAbortController.abort();
 					update.token = update.token.replaceAll("\0", "");
 				}
+				messageToWriteTo.updates = [...(messageToWriteTo.updates ?? []), update];
 				if (update.type === MessageUpdateType.Stream && !$settings.disableStream) {
 					messageToWriteTo.content += update.token;
 					update.type === MessageUpdateType.WebSearch ||
 					update.type === MessageUpdateType.Tool
 				) {
 					messages = [...messages];
 				} else if (
 					update.type === MessageUpdateType.Status &&
 						{ type: "hash", value: update.sha, mime: update.mime, name: update.name },
 					];
 					messages = [...messages];
+				} else if (update.type === MessageUpdateType.Reasoning) {
+					if (!messageToWriteTo.reasoning) {
+						messageToWriteTo.reasoning = "";
+					}
+					if (update.subtype === MessageReasoningUpdateType.Stream) {
+						messageToWriteTo.reasoning += update.token;
+					} else {
+						messageToWriteTo.updates = [...(messageToWriteTo.updates ?? []), update];
+					}
+					messages = [...messages];
 				}
 			}
 		} catch (err) {
 			if (err instanceof Error && err.message.includes("overloaded")) {
 				$error = "Too much traffic, please try again.";

src/routes/conversation/[id]/+server.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import { error } from "@sveltejs/kit";
 import { ObjectId } from "mongodb";
 import { z } from "zod";
 import {
 	MessageUpdateStatus,
 	MessageUpdateType,
 	type MessageUpdate,
@@ -355,6 +356,12 @@ export async function POST({ request, locals, params, getClientAddress }) {
 						Date.now() - (lastTokenTimestamp ?? promptedAt).getTime()
 					);
 					lastTokenTimestamp = new Date();
 				}
 				// Set the title
@@ -392,6 +399,10 @@ export async function POST({ request, locals, params, getClientAddress }) {
 					!(
 						event.type === MessageUpdateType.Status &&
 						event.status === MessageUpdateStatus.KeepAlive
 					)
 				) {
 					messageToWriteTo?.updates?.push(event);

 import { ObjectId } from "mongodb";
 import { z } from "zod";
 import {
+	MessageReasoningUpdateType,
 	MessageUpdateStatus,
 	MessageUpdateType,
 	type MessageUpdate,
 						Date.now() - (lastTokenTimestamp ?? promptedAt).getTime()
 					);
 					lastTokenTimestamp = new Date();
+				} else if (
+					event.type === MessageUpdateType.Reasoning &&
+					event.subtype === MessageReasoningUpdateType.Stream
+				) {
+					messageToWriteTo.reasoning ??= "";
+					messageToWriteTo.reasoning += event.token;
 				}
 				// Set the title
 					!(
 						event.type === MessageUpdateType.Status &&
 						event.status === MessageUpdateStatus.KeepAlive
+					) &&
+					!(
+						event.type === MessageUpdateType.Reasoning &&
+						event.subtype === MessageReasoningUpdateType.Stream
 					)
 				) {
 					messageToWriteTo?.updates?.push(event);