Spaces:

jdelavande
/

chat-ui-energy

Running on CPU Upgrade

App Files Files Community

nsarrazin HF Staff commited on Mar 8

Commit

6655689

unverified ·

1 Parent(s): 23b1143

feat(tasks): use tools for tasks if available (#1749)

Browse files

* feat(tasks): use tools for web search query if available

* feat(websearch): improve query generation with error handling and logging

* feat(tools): implement generic tool output extraction utility

This commit adds a new `getToolOutput` utility to standardize tool interaction across different tasks like title generation, reasoning summary, and web search query generation. The utility supports extracting tool outputs using a consistent approach, with fallback mechanisms and error handling.

Files changed (6) hide show

chart/env/prod.yaml +1 -0
src/lib/server/textGeneration/reasoning.ts +57 -24
src/lib/server/textGeneration/title.ts +40 -0
src/lib/server/textGeneration/tools.ts +1 -1
src/lib/server/tools/getToolOutput.ts +64 -0
src/lib/server/websearch/search/generateQuery.ts +32 -1

chart/env/prod.yaml CHANGED Viewed

@@ -380,6 +380,7 @@ envVars:
       },
       {
         "name": "meta-llama/Llama-3.1-8B-Instruct",
         "parameters": {
           "temperature": 0.6,
           "stop": ["<|endoftext|>", "<|eot_id|>"]

       },
       {
         "name": "meta-llama/Llama-3.1-8B-Instruct",
+        "tools": true,
         "parameters": {
           "temperature": 0.6,
           "stop": ["<|endoftext|>", "<|eot_id|>"]

src/lib/server/textGeneration/reasoning.ts CHANGED Viewed

@@ -1,36 +1,69 @@
 import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
 import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
-import { logger } from "../logger";
 export async function generateSummaryOfReasoning(buffer: string): Promise<string> {
-	// debug 5s delay
-	await new Promise((resolve) => setTimeout(resolve, 3000));
-	const summary = await getReturnFromGenerator(
-		generateFromDefaultEndpoint({
-			messages: [
 				{
-					from: "user",
-					content: buffer.slice(-200),
 				},
 			],
-			preprompt: `You are tasked with summarizing the latest reasoning steps. Never describe results of the reasoning, only the process. Remain vague in your summary.
             The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
             Example: "Thinking about life...", "Summarizing the results...", "Processing the input..."`,
-			generateSettings: {
-				max_new_tokens: 50,
-			},
-		})
-	)
-		.then((summary) => {
-			const parts = summary.split("...");
-			return parts[0] + "...";
-		})
-		.catch((e) => {
-			logger.error(e);
-			return "Reasoning...";
-		});
-	return summary;
 }

 import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
+import { smallModel } from "../models";
 import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
+import { getToolOutput } from "../tools/getToolOutput";
+import type { Tool } from "$lib/types/Tool";
 export async function generateSummaryOfReasoning(buffer: string): Promise<string> {
+	let summary: string | undefined;
+	const messages = [
+		{
+			from: "user" as const,
+			content: buffer.slice(-200),
+		},
+	];
+	const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
+The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
+Example: "Thinking about life...", "Summarizing the results...", "Processing the input...". `;
+	if (smallModel.tools) {
+		const summaryTool = {
+			name: "summary",
+			description: "Submit a summary for the submitted text",
+			inputs: [
 				{
+					name: "summary",
+					type: "str",
+					description: "The short summary of the reasoning steps",
+					paramType: "required",
 				},
 			],
+		} as unknown as Tool;
+		const endpoint = await smallModel.getEndpoint();
+		summary = await getToolOutput({
+			messages,
+			preprompt,
+			tool: summaryTool,
+			endpoint,
+		});
+	}
+	if (!summary) {
+		summary = await getReturnFromGenerator(
+			generateFromDefaultEndpoint({
+				messages: [
+					{
+						from: "user",
+						content: buffer.slice(-200),
+					},
+				],
+				preprompt: `You are tasked with summarizing the latest reasoning steps. Never describe results of the reasoning, only the process. Remain vague in your summary.
             The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
             Example: "Thinking about life...", "Summarizing the results...", "Processing the input..."`,
+				generateSettings: {
+					max_new_tokens: 50,
+				},
+			})
+		);
+	}
+	if (!summary) {
+		return "Reasoning...";
+	}
+	const parts = summary.split("...");
+	return parts[0].slice(0, 100) + "...";
 }

src/lib/server/textGeneration/title.ts CHANGED Viewed

@@ -5,6 +5,9 @@ import { logger } from "$lib/server/logger";
 import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
 import type { Conversation } from "$lib/types/Conversation";
 import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
 export async function* generateTitleForConversation(
 	conv: Conversation
@@ -31,6 +34,43 @@ export async function generateTitle(prompt: string) {
 		return prompt.split(/\s+/g).slice(0, 5).join(" ");
 	}
 	const messages: Array<EndpointMessage> = [
 		{
 			from: "system",

 import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
 import type { Conversation } from "$lib/types/Conversation";
 import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
+import { smallModel } from "../models";
+import type { Tool } from "$lib/types/Tool";
+import { getToolOutput } from "../tools/getToolOutput";
 export async function* generateTitleForConversation(
 	conv: Conversation
 		return prompt.split(/\s+/g).slice(0, 5).join(" ");
 	}
+	if (smallModel.tools) {
+		const titleTool = {
+			name: "title",
+			description:
+				"Submit a title for the conversation so far. Do not try to answer the user question or the tool will fail.",
+			inputs: [
+				{
+					name: "title",
+					type: "str",
+					description:
+						"The title for the conversation. It should be a single short sentence of four words or less and start with a unicode emoji relevant to the conversation.",
+				},
+			],
+		} as unknown as Tool;
+		const endpoint = await smallModel.getEndpoint();
+		const title = await getToolOutput({
+			messages: [
+				{
+					from: "user" as const,
+					content: prompt,
+				},
+			],
+			preprompt:
+				"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence.",
+			tool: titleTool,
+			endpoint,
+		});
+		if (title) {
+			if (!/\p{Emoji}/u.test(title.slice(0, 3))) {
+				return "💬 " + title;
+			}
+			return title;
+		}
+	}
 	const messages: Array<EndpointMessage> = [
 		{
 			from: "system",

src/lib/server/textGeneration/tools.ts CHANGED Viewed

@@ -260,7 +260,7 @@ export async function* runTools(
 	return toolResults.filter((result): result is ToolResult => result !== undefined);
 }
-function externalToToolCall(call: unknown, tools: Tool[]): ToolCall | undefined {
 	// Early return if invalid input
 	if (!isValidCallObject(call)) {
 		return undefined;

 	return toolResults.filter((result): result is ToolResult => result !== undefined);
 }
+export function externalToToolCall(call: unknown, tools: Tool[]): ToolCall | undefined {
 	// Early return if invalid input
 	if (!isValidCallObject(call)) {
 		return undefined;

src/lib/server/tools/getToolOutput.ts ADDED Viewed

	@@ -0,0 +1,64 @@

+import type { Tool } from "$lib/types/Tool";
+import { extractJson } from "./utils";
+import { externalToToolCall } from "../textGeneration/tools";
+import { logger } from "../logger";
+import type { Endpoint, EndpointMessage } from "../endpoints/endpoints";
+interface GetToolOutputOptions {
+	messages: EndpointMessage[];
+	tool: Tool;
+	preprompt?: string;
+	endpoint: Endpoint;
+	generateSettings?: {
+		max_new_tokens?: number;
+		[key: string]: unknown;
+	};
+}
+export async function getToolOutput<T = string>({
+	messages,
+	preprompt,
+	tool,
+	endpoint,
+	generateSettings = { max_new_tokens: 64 },
+}: GetToolOutputOptions): Promise<T | undefined> {
+	try {
+		const stream = await endpoint({
+			messages,
+			preprompt: preprompt + `\n\n Only use tool ${tool.name}.`,
+			tools: [tool],
+			generateSettings,
+		});
+		const calls = [];
+		for await (const output of stream) {
+			if (output.token.toolCalls) {
+				calls.push(...output.token.toolCalls);
+			}
+			if (output.generated_text) {
+				const extractedCalls = await extractJson(output.generated_text).then((calls) =>
+					calls.map((call) => externalToToolCall(call, [tool])).filter((call) => call !== undefined)
+				);
+				calls.push(...extractedCalls);
+			}
+		}
+		if (calls.length > 0) {
+			// Find the tool call matching our tool
+			const toolCall = calls.find((call) => call.name === tool.name);
+			// If we found a matching call and it has parameters
+			if (toolCall?.parameters) {
+				// Get the first parameter value since most tools have a single main parameter
+				const firstParamValue = Object.values(toolCall.parameters)[0];
+				return firstParamValue as T;
+			}
+		}
+		return undefined;
+	} catch (error) {
+		logger.warn(error, "Error getting tool output");
+		return undefined;
+	}
+}

src/lib/server/websearch/search/generateQuery.ts CHANGED Viewed

@@ -3,9 +3,40 @@ import { format } from "date-fns";
 import type { EndpointMessage } from "../../endpoints/endpoints";
 import { generateFromDefaultEndpoint } from "../../generateFromDefaultEndpoint";
 import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
 export async function generateQuery(messages: Message[]) {
 	const currentDate = format(new Date(), "MMMM d, yyyy");
 	const userMessages = messages.filter(({ from }) => from === "user");
 	const previousUserMessages = userMessages.slice(0, -1);
@@ -66,7 +97,7 @@ Current Question: Where is it being hosted?`,
 	const webQuery = await getReturnFromGenerator(
 		generateFromDefaultEndpoint({
 			messages: convQuery,
-			preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
 			generateSettings: {
 				max_new_tokens: 30,
 			},

 import type { EndpointMessage } from "../../endpoints/endpoints";
 import { generateFromDefaultEndpoint } from "../../generateFromDefaultEndpoint";
 import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
+import { smallModel } from "$lib/server/models";
+import type { Tool } from "$lib/types/Tool";
+import { getToolOutput } from "$lib/server/tools/getToolOutput";
 export async function generateQuery(messages: Message[]) {
 	const currentDate = format(new Date(), "MMMM d, yyyy");
+	if (smallModel.tools) {
+		const webSearchTool = {
+			name: "web_search",
+			description: "Search the web for information",
+			inputs: [
+				{
+					name: "query",
+					type: "str",
+					description: "The query to search the web for",
+					paramType: "required",
+				},
+			],
+		} as unknown as Tool;
+		const endpoint = await smallModel.getEndpoint();
+		const query = await getToolOutput({
+			messages,
+			preprompt: `The user wants you to search the web for information. Give a relevant google search query to answer the question. Answer with only the query. Today is ${currentDate}`,
+			tool: webSearchTool,
+			endpoint,
+		});
+		if (query) {
+			return query;
+		}
+	}
 	const userMessages = messages.filter(({ from }) => from === "user");
 	const previousUserMessages = userMessages.slice(0, -1);
 	const webQuery = await getReturnFromGenerator(
 		generateFromDefaultEndpoint({
 			messages: convQuery,
+			preprompt: `The user wants you to search the web for information. Give a relevant google search query to answer the question. Answer with only the query. Today is ${currentDate}`,
 			generateSettings: {
 				max_new_tokens: 30,
 			},