Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
fix: improve tasks performance
Browse files
chart/env/prod.yaml
CHANGED
@@ -164,7 +164,6 @@ envVars:
|
|
164 |
},
|
165 |
{
|
166 |
"name": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
167 |
-
"tokenizer": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
168 |
"modelUrl": "https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
169 |
"websiteUrl": "https://www.nvidia.com/",
|
170 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nvidia-logo.png",
|
@@ -297,7 +296,6 @@ envVars:
|
|
297 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
|
298 |
"websiteUrl": "https://nousresearch.com/",
|
299 |
"modelUrl": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
|
300 |
-
"tokenizer": "NousResearch/Hermes-3-Llama-3.1-8B",
|
301 |
"promptExamples": [
|
302 |
{
|
303 |
"title": "Write an email from bullet list",
|
@@ -321,7 +319,6 @@ envVars:
|
|
321 |
},
|
322 |
{
|
323 |
"name": "mistralai/Mistral-Nemo-Instruct-2407",
|
324 |
-
"tokenizer": "mistralai/Mistral-Nemo-Instruct-2407",
|
325 |
"displayName": "mistralai/Mistral-Nemo-Instruct-2407",
|
326 |
"description": "A small model with good capabilities in language understanding and commonsense reasoning.",
|
327 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
|
@@ -351,7 +348,6 @@ envVars:
|
|
351 |
},
|
352 |
{
|
353 |
"name": "microsoft/Phi-3.5-mini-instruct",
|
354 |
-
"tokenizer": "microsoft/Phi-3.5-mini-instruct",
|
355 |
"description": "One of the best small models (3.8B parameters), super fast for simple tasks.",
|
356 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
|
357 |
"modelUrl": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
|
@@ -382,12 +378,11 @@ envVars:
|
|
382 |
"name": "meta-llama/Llama-3.1-8B-Instruct",
|
383 |
"tools": true,
|
384 |
"unlisted": true,
|
385 |
-
"
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
]
|
391 |
}
|
392 |
]
|
393 |
|
|
|
164 |
},
|
165 |
{
|
166 |
"name": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
|
|
167 |
"modelUrl": "https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
168 |
"websiteUrl": "https://www.nvidia.com/",
|
169 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nvidia-logo.png",
|
|
|
296 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
|
297 |
"websiteUrl": "https://nousresearch.com/",
|
298 |
"modelUrl": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
|
|
|
299 |
"promptExamples": [
|
300 |
{
|
301 |
"title": "Write an email from bullet list",
|
|
|
319 |
},
|
320 |
{
|
321 |
"name": "mistralai/Mistral-Nemo-Instruct-2407",
|
|
|
322 |
"displayName": "mistralai/Mistral-Nemo-Instruct-2407",
|
323 |
"description": "A small model with good capabilities in language understanding and commonsense reasoning.",
|
324 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
|
|
|
348 |
},
|
349 |
{
|
350 |
"name": "microsoft/Phi-3.5-mini-instruct",
|
|
|
351 |
"description": "One of the best small models (3.8B parameters), super fast for simple tasks.",
|
352 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
|
353 |
"modelUrl": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
|
|
|
378 |
"name": "meta-llama/Llama-3.1-8B-Instruct",
|
379 |
"tools": true,
|
380 |
"unlisted": true,
|
381 |
+
"parameters": {
|
382 |
+
"stop": ["<|eot_id|>", "<|im_end|>"],
|
383 |
+
"temperature": 0.1,
|
384 |
+
"max_new_tokens": 256
|
385 |
+
}
|
|
|
386 |
}
|
387 |
]
|
388 |
|
src/lib/server/textGeneration/reasoning.ts
CHANGED
@@ -15,8 +15,7 @@ export async function generateSummaryOfReasoning(buffer: string): Promise<string
|
|
15 |
];
|
16 |
|
17 |
const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
|
18 |
-
The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
|
19 |
-
Example: "Thinking about life...", "Summarizing the results...", "Processing the input...". `;
|
20 |
|
21 |
if (smallModel.tools) {
|
22 |
const summaryTool = {
|
|
|
15 |
];
|
16 |
|
17 |
const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
|
18 |
+
The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points. The reasoning follows: \n`;
|
|
|
19 |
|
20 |
if (smallModel.tools) {
|
21 |
const summaryTool = {
|
src/lib/server/textGeneration/title.ts
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import { env } from "$env/dynamic/private";
|
2 |
import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
|
3 |
-
import type { EndpointMessage } from "../endpoints/endpoints";
|
4 |
import { logger } from "$lib/server/logger";
|
5 |
import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
|
6 |
import type { Conversation } from "$lib/types/Conversation";
|
@@ -58,7 +57,7 @@ export async function generateTitle(prompt: string) {
|
|
58 |
},
|
59 |
],
|
60 |
preprompt:
|
61 |
-
"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence.",
|
62 |
tool: titleTool,
|
63 |
endpoint,
|
64 |
});
|
@@ -71,34 +70,9 @@ export async function generateTitle(prompt: string) {
|
|
71 |
}
|
72 |
}
|
73 |
|
74 |
-
const messages: Array<EndpointMessage> = [
|
75 |
-
{
|
76 |
-
from: "system",
|
77 |
-
content:
|
78 |
-
"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence of four words or less. Always start your answer with an emoji relevant to the summary",
|
79 |
-
},
|
80 |
-
{ from: "user", content: "Who is the president of Gabon?" },
|
81 |
-
{ from: "assistant", content: "🇬🇦 President of Gabon" },
|
82 |
-
{ from: "user", content: "Who is Julien Chaumond?" },
|
83 |
-
{ from: "assistant", content: "🧑 Julien Chaumond" },
|
84 |
-
{ from: "user", content: "what is 1 + 1?" },
|
85 |
-
{ from: "assistant", content: "🔢 Simple math operation" },
|
86 |
-
{ from: "user", content: "What are the latest news?" },
|
87 |
-
{ from: "assistant", content: "📰 Latest news" },
|
88 |
-
{ from: "user", content: "How to make a great cheesecake?" },
|
89 |
-
{ from: "assistant", content: "🍰 Cheesecake recipe" },
|
90 |
-
{ from: "user", content: "what is your favorite movie? do a short answer." },
|
91 |
-
{ from: "assistant", content: "🎥 Favorite movie" },
|
92 |
-
{ from: "user", content: "Explain the concept of artificial intelligence in one sentence" },
|
93 |
-
{ from: "assistant", content: "🤖 AI definition" },
|
94 |
-
{ from: "user", content: "Draw a cute cat" },
|
95 |
-
{ from: "assistant", content: "🐱 Cute cat drawing" },
|
96 |
-
{ from: "user", content: prompt },
|
97 |
-
];
|
98 |
-
|
99 |
return await getReturnFromGenerator(
|
100 |
generateFromDefaultEndpoint({
|
101 |
-
messages,
|
102 |
preprompt:
|
103 |
"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
|
104 |
generateSettings: {
|
|
|
1 |
import { env } from "$env/dynamic/private";
|
2 |
import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
|
|
|
3 |
import { logger } from "$lib/server/logger";
|
4 |
import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
|
5 |
import type { Conversation } from "$lib/types/Conversation";
|
|
|
57 |
},
|
58 |
],
|
59 |
preprompt:
|
60 |
+
"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence. Here is the user message: \n",
|
61 |
tool: titleTool,
|
62 |
endpoint,
|
63 |
});
|
|
|
70 |
}
|
71 |
}
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
return await getReturnFromGenerator(
|
74 |
generateFromDefaultEndpoint({
|
75 |
+
messages: [{ from: "user", content: prompt }],
|
76 |
preprompt:
|
77 |
"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
|
78 |
generateSettings: {
|
src/lib/server/tools/getToolOutput.ts
CHANGED
@@ -42,6 +42,10 @@ export async function getToolOutput<T = string>({
|
|
42 |
);
|
43 |
calls.push(...extractedCalls);
|
44 |
}
|
|
|
|
|
|
|
|
|
45 |
}
|
46 |
|
47 |
if (calls.length > 0) {
|
@@ -52,7 +56,9 @@ export async function getToolOutput<T = string>({
|
|
52 |
if (toolCall?.parameters) {
|
53 |
// Get the first parameter value since most tools have a single main parameter
|
54 |
const firstParamValue = Object.values(toolCall.parameters)[0];
|
55 |
-
|
|
|
|
|
56 |
}
|
57 |
}
|
58 |
|
|
|
42 |
);
|
43 |
calls.push(...extractedCalls);
|
44 |
}
|
45 |
+
|
46 |
+
if (calls.length > 0) {
|
47 |
+
break;
|
48 |
+
}
|
49 |
}
|
50 |
|
51 |
if (calls.length > 0) {
|
|
|
56 |
if (toolCall?.parameters) {
|
57 |
// Get the first parameter value since most tools have a single main parameter
|
58 |
const firstParamValue = Object.values(toolCall.parameters)[0];
|
59 |
+
if (typeof firstParamValue === "string") {
|
60 |
+
return firstParamValue as T;
|
61 |
+
}
|
62 |
}
|
63 |
}
|
64 |
|
src/lib/server/websearch/search/generateQuery.ts
CHANGED
@@ -59,7 +59,7 @@ export async function generateQuery(messages: Message[]) {
|
|
59 |
const webQuery = await getReturnFromGenerator(
|
60 |
generateFromDefaultEndpoint({
|
61 |
messages: convQuery,
|
62 |
-
preprompt: `The user wants you to search the web for information. Give a relevant google search query to answer the question. Answer with only the query. Today is ${currentDate}`,
|
63 |
generateSettings: {
|
64 |
max_new_tokens: 30,
|
65 |
},
|
|
|
59 |
const webQuery = await getReturnFromGenerator(
|
60 |
generateFromDefaultEndpoint({
|
61 |
messages: convQuery,
|
62 |
+
preprompt: `The user wants you to search the web for information. Give a relevant google search query to answer the question. Answer with only the query. Today is ${currentDate}. The conversation follows: \n`,
|
63 |
generateSettings: {
|
64 |
max_new_tokens: 30,
|
65 |
},
|