Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat(tasks): use tools for tasks if available (#1749)
Browse files* feat(tasks): use tools for web search query if available
* feat(websearch): improve query generation with error handling and logging
* feat(tools): implement generic tool output extraction utility
This commit adds a new `getToolOutput` utility to standardize tool interaction across different tasks like title generation, reasoning summary, and web search query generation. The utility supports extracting tool outputs using a consistent approach, with fallback mechanisms and error handling.
chart/env/prod.yaml
CHANGED
@@ -380,6 +380,7 @@ envVars:
|
|
380 |
},
|
381 |
{
|
382 |
"name": "meta-llama/Llama-3.1-8B-Instruct",
|
|
|
383 |
"parameters": {
|
384 |
"temperature": 0.6,
|
385 |
"stop": ["<|endoftext|>", "<|eot_id|>"]
|
|
|
380 |
},
|
381 |
{
|
382 |
"name": "meta-llama/Llama-3.1-8B-Instruct",
|
383 |
+
"tools": true,
|
384 |
"parameters": {
|
385 |
"temperature": 0.6,
|
386 |
"stop": ["<|endoftext|>", "<|eot_id|>"]
|
src/lib/server/textGeneration/reasoning.ts
CHANGED
@@ -1,36 +1,69 @@
|
|
1 |
import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
|
2 |
-
|
3 |
import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
|
4 |
-
import {
|
|
|
5 |
|
6 |
export async function generateSummaryOfReasoning(buffer: string): Promise<string> {
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
13 |
{
|
14 |
-
|
15 |
-
|
|
|
|
|
16 |
},
|
17 |
],
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
|
20 |
Example: "Thinking about life...", "Summarizing the results...", "Processing the input..."`,
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
logger.error(e);
|
32 |
-
return "Reasoning...";
|
33 |
-
});
|
34 |
|
35 |
-
|
|
|
36 |
}
|
|
|
1 |
import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
|
2 |
+
import { smallModel } from "../models";
|
3 |
import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
|
4 |
+
import { getToolOutput } from "../tools/getToolOutput";
|
5 |
+
import type { Tool } from "$lib/types/Tool";
|
6 |
|
7 |
export async function generateSummaryOfReasoning(buffer: string): Promise<string> {
|
8 |
+
let summary: string | undefined;
|
9 |
+
|
10 |
+
const messages = [
|
11 |
+
{
|
12 |
+
from: "user" as const,
|
13 |
+
content: buffer.slice(-200),
|
14 |
+
},
|
15 |
+
];
|
16 |
+
|
17 |
+
const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
|
18 |
+
The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
|
19 |
+
Example: "Thinking about life...", "Summarizing the results...", "Processing the input...". `;
|
20 |
|
21 |
+
if (smallModel.tools) {
|
22 |
+
const summaryTool = {
|
23 |
+
name: "summary",
|
24 |
+
description: "Submit a summary for the submitted text",
|
25 |
+
inputs: [
|
26 |
{
|
27 |
+
name: "summary",
|
28 |
+
type: "str",
|
29 |
+
description: "The short summary of the reasoning steps",
|
30 |
+
paramType: "required",
|
31 |
},
|
32 |
],
|
33 |
+
} as unknown as Tool;
|
34 |
+
|
35 |
+
const endpoint = await smallModel.getEndpoint();
|
36 |
+
summary = await getToolOutput({
|
37 |
+
messages,
|
38 |
+
preprompt,
|
39 |
+
tool: summaryTool,
|
40 |
+
endpoint,
|
41 |
+
});
|
42 |
+
}
|
43 |
+
|
44 |
+
if (!summary) {
|
45 |
+
summary = await getReturnFromGenerator(
|
46 |
+
generateFromDefaultEndpoint({
|
47 |
+
messages: [
|
48 |
+
{
|
49 |
+
from: "user",
|
50 |
+
content: buffer.slice(-200),
|
51 |
+
},
|
52 |
+
],
|
53 |
+
preprompt: `You are tasked with summarizing the latest reasoning steps. Never describe results of the reasoning, only the process. Remain vague in your summary.
|
54 |
The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
|
55 |
Example: "Thinking about life...", "Summarizing the results...", "Processing the input..."`,
|
56 |
+
generateSettings: {
|
57 |
+
max_new_tokens: 50,
|
58 |
+
},
|
59 |
+
})
|
60 |
+
);
|
61 |
+
}
|
62 |
+
|
63 |
+
if (!summary) {
|
64 |
+
return "Reasoning...";
|
65 |
+
}
|
|
|
|
|
|
|
66 |
|
67 |
+
const parts = summary.split("...");
|
68 |
+
return parts[0].slice(0, 100) + "...";
|
69 |
}
|
src/lib/server/textGeneration/title.ts
CHANGED
@@ -5,6 +5,9 @@ import { logger } from "$lib/server/logger";
|
|
5 |
import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
|
6 |
import type { Conversation } from "$lib/types/Conversation";
|
7 |
import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
|
|
|
|
|
|
|
8 |
|
9 |
export async function* generateTitleForConversation(
|
10 |
conv: Conversation
|
@@ -31,6 +34,43 @@ export async function generateTitle(prompt: string) {
|
|
31 |
return prompt.split(/\s+/g).slice(0, 5).join(" ");
|
32 |
}
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
const messages: Array<EndpointMessage> = [
|
35 |
{
|
36 |
from: "system",
|
|
|
5 |
import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
|
6 |
import type { Conversation } from "$lib/types/Conversation";
|
7 |
import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
|
8 |
+
import { smallModel } from "../models";
|
9 |
+
import type { Tool } from "$lib/types/Tool";
|
10 |
+
import { getToolOutput } from "../tools/getToolOutput";
|
11 |
|
12 |
export async function* generateTitleForConversation(
|
13 |
conv: Conversation
|
|
|
34 |
return prompt.split(/\s+/g).slice(0, 5).join(" ");
|
35 |
}
|
36 |
|
37 |
+
if (smallModel.tools) {
|
38 |
+
const titleTool = {
|
39 |
+
name: "title",
|
40 |
+
description:
|
41 |
+
"Submit a title for the conversation so far. Do not try to answer the user question or the tool will fail.",
|
42 |
+
inputs: [
|
43 |
+
{
|
44 |
+
name: "title",
|
45 |
+
type: "str",
|
46 |
+
description:
|
47 |
+
"The title for the conversation. It should be a single short sentence of four words or less and start with a unicode emoji relevant to the conversation.",
|
48 |
+
},
|
49 |
+
],
|
50 |
+
} as unknown as Tool;
|
51 |
+
|
52 |
+
const endpoint = await smallModel.getEndpoint();
|
53 |
+
const title = await getToolOutput({
|
54 |
+
messages: [
|
55 |
+
{
|
56 |
+
from: "user" as const,
|
57 |
+
content: prompt,
|
58 |
+
},
|
59 |
+
],
|
60 |
+
preprompt:
|
61 |
+
"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence.",
|
62 |
+
tool: titleTool,
|
63 |
+
endpoint,
|
64 |
+
});
|
65 |
+
|
66 |
+
if (title) {
|
67 |
+
if (!/\p{Emoji}/u.test(title.slice(0, 3))) {
|
68 |
+
return "💬 " + title;
|
69 |
+
}
|
70 |
+
return title;
|
71 |
+
}
|
72 |
+
}
|
73 |
+
|
74 |
const messages: Array<EndpointMessage> = [
|
75 |
{
|
76 |
from: "system",
|
src/lib/server/textGeneration/tools.ts
CHANGED
@@ -260,7 +260,7 @@ export async function* runTools(
|
|
260 |
return toolResults.filter((result): result is ToolResult => result !== undefined);
|
261 |
}
|
262 |
|
263 |
-
function externalToToolCall(call: unknown, tools: Tool[]): ToolCall | undefined {
|
264 |
// Early return if invalid input
|
265 |
if (!isValidCallObject(call)) {
|
266 |
return undefined;
|
|
|
260 |
return toolResults.filter((result): result is ToolResult => result !== undefined);
|
261 |
}
|
262 |
|
263 |
+
export function externalToToolCall(call: unknown, tools: Tool[]): ToolCall | undefined {
|
264 |
// Early return if invalid input
|
265 |
if (!isValidCallObject(call)) {
|
266 |
return undefined;
|
src/lib/server/tools/getToolOutput.ts
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { Tool } from "$lib/types/Tool";
|
2 |
+
import { extractJson } from "./utils";
|
3 |
+
import { externalToToolCall } from "../textGeneration/tools";
|
4 |
+
import { logger } from "../logger";
|
5 |
+
import type { Endpoint, EndpointMessage } from "../endpoints/endpoints";
|
6 |
+
|
7 |
+
interface GetToolOutputOptions {
|
8 |
+
messages: EndpointMessage[];
|
9 |
+
tool: Tool;
|
10 |
+
preprompt?: string;
|
11 |
+
endpoint: Endpoint;
|
12 |
+
generateSettings?: {
|
13 |
+
max_new_tokens?: number;
|
14 |
+
[key: string]: unknown;
|
15 |
+
};
|
16 |
+
}
|
17 |
+
|
18 |
+
export async function getToolOutput<T = string>({
|
19 |
+
messages,
|
20 |
+
preprompt,
|
21 |
+
tool,
|
22 |
+
endpoint,
|
23 |
+
generateSettings = { max_new_tokens: 64 },
|
24 |
+
}: GetToolOutputOptions): Promise<T | undefined> {
|
25 |
+
try {
|
26 |
+
const stream = await endpoint({
|
27 |
+
messages,
|
28 |
+
preprompt: preprompt + `\n\n Only use tool ${tool.name}.`,
|
29 |
+
tools: [tool],
|
30 |
+
generateSettings,
|
31 |
+
});
|
32 |
+
|
33 |
+
const calls = [];
|
34 |
+
|
35 |
+
for await (const output of stream) {
|
36 |
+
if (output.token.toolCalls) {
|
37 |
+
calls.push(...output.token.toolCalls);
|
38 |
+
}
|
39 |
+
if (output.generated_text) {
|
40 |
+
const extractedCalls = await extractJson(output.generated_text).then((calls) =>
|
41 |
+
calls.map((call) => externalToToolCall(call, [tool])).filter((call) => call !== undefined)
|
42 |
+
);
|
43 |
+
calls.push(...extractedCalls);
|
44 |
+
}
|
45 |
+
}
|
46 |
+
|
47 |
+
if (calls.length > 0) {
|
48 |
+
// Find the tool call matching our tool
|
49 |
+
const toolCall = calls.find((call) => call.name === tool.name);
|
50 |
+
|
51 |
+
// If we found a matching call and it has parameters
|
52 |
+
if (toolCall?.parameters) {
|
53 |
+
// Get the first parameter value since most tools have a single main parameter
|
54 |
+
const firstParamValue = Object.values(toolCall.parameters)[0];
|
55 |
+
return firstParamValue as T;
|
56 |
+
}
|
57 |
+
}
|
58 |
+
|
59 |
+
return undefined;
|
60 |
+
} catch (error) {
|
61 |
+
logger.warn(error, "Error getting tool output");
|
62 |
+
return undefined;
|
63 |
+
}
|
64 |
+
}
|
src/lib/server/websearch/search/generateQuery.ts
CHANGED
@@ -3,9 +3,40 @@ import { format } from "date-fns";
|
|
3 |
import type { EndpointMessage } from "../../endpoints/endpoints";
|
4 |
import { generateFromDefaultEndpoint } from "../../generateFromDefaultEndpoint";
|
5 |
import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
|
|
|
|
|
|
|
6 |
|
7 |
export async function generateQuery(messages: Message[]) {
|
8 |
const currentDate = format(new Date(), "MMMM d, yyyy");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
const userMessages = messages.filter(({ from }) => from === "user");
|
10 |
const previousUserMessages = userMessages.slice(0, -1);
|
11 |
|
@@ -66,7 +97,7 @@ Current Question: Where is it being hosted?`,
|
|
66 |
const webQuery = await getReturnFromGenerator(
|
67 |
generateFromDefaultEndpoint({
|
68 |
messages: convQuery,
|
69 |
-
preprompt: `
|
70 |
generateSettings: {
|
71 |
max_new_tokens: 30,
|
72 |
},
|
|
|
3 |
import type { EndpointMessage } from "../../endpoints/endpoints";
|
4 |
import { generateFromDefaultEndpoint } from "../../generateFromDefaultEndpoint";
|
5 |
import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
|
6 |
+
import { smallModel } from "$lib/server/models";
|
7 |
+
import type { Tool } from "$lib/types/Tool";
|
8 |
+
import { getToolOutput } from "$lib/server/tools/getToolOutput";
|
9 |
|
10 |
export async function generateQuery(messages: Message[]) {
|
11 |
const currentDate = format(new Date(), "MMMM d, yyyy");
|
12 |
+
|
13 |
+
if (smallModel.tools) {
|
14 |
+
const webSearchTool = {
|
15 |
+
name: "web_search",
|
16 |
+
description: "Search the web for information",
|
17 |
+
inputs: [
|
18 |
+
{
|
19 |
+
name: "query",
|
20 |
+
type: "str",
|
21 |
+
description: "The query to search the web for",
|
22 |
+
paramType: "required",
|
23 |
+
},
|
24 |
+
],
|
25 |
+
} as unknown as Tool;
|
26 |
+
|
27 |
+
const endpoint = await smallModel.getEndpoint();
|
28 |
+
const query = await getToolOutput({
|
29 |
+
messages,
|
30 |
+
preprompt: `The user wants you to search the web for information. Give a relevant google search query to answer the question. Answer with only the query. Today is ${currentDate}`,
|
31 |
+
tool: webSearchTool,
|
32 |
+
endpoint,
|
33 |
+
});
|
34 |
+
|
35 |
+
if (query) {
|
36 |
+
return query;
|
37 |
+
}
|
38 |
+
}
|
39 |
+
|
40 |
const userMessages = messages.filter(({ from }) => from === "user");
|
41 |
const previousUserMessages = userMessages.slice(0, -1);
|
42 |
|
|
|
97 |
const webQuery = await getReturnFromGenerator(
|
98 |
generateFromDefaultEndpoint({
|
99 |
messages: convQuery,
|
100 |
+
preprompt: `The user wants you to search the web for information. Give a relevant google search query to answer the question. Answer with only the query. Today is ${currentDate}`,
|
101 |
generateSettings: {
|
102 |
max_new_tokens: 30,
|
103 |
},
|