nsarrazin HF Staff commited on
Commit
28b6d44
·
1 Parent(s): 1ce36bb

fix: improve tasks performance

Browse files
chart/env/prod.yaml CHANGED
@@ -164,7 +164,6 @@ envVars:
164
  },
165
  {
166
  "name": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
167
- "tokenizer": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
168
  "modelUrl": "https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
169
  "websiteUrl": "https://www.nvidia.com/",
170
  "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nvidia-logo.png",
@@ -297,7 +296,6 @@ envVars:
297
  "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
298
  "websiteUrl": "https://nousresearch.com/",
299
  "modelUrl": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
300
- "tokenizer": "NousResearch/Hermes-3-Llama-3.1-8B",
301
  "promptExamples": [
302
  {
303
  "title": "Write an email from bullet list",
@@ -321,7 +319,6 @@ envVars:
321
  },
322
  {
323
  "name": "mistralai/Mistral-Nemo-Instruct-2407",
324
- "tokenizer": "mistralai/Mistral-Nemo-Instruct-2407",
325
  "displayName": "mistralai/Mistral-Nemo-Instruct-2407",
326
  "description": "A small model with good capabilities in language understanding and commonsense reasoning.",
327
  "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
@@ -351,7 +348,6 @@ envVars:
351
  },
352
  {
353
  "name": "microsoft/Phi-3.5-mini-instruct",
354
- "tokenizer": "microsoft/Phi-3.5-mini-instruct",
355
  "description": "One of the best small models (3.8B parameters), super fast for simple tasks.",
356
  "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
357
  "modelUrl": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
@@ -382,12 +378,11 @@ envVars:
382
  "name": "meta-llama/Llama-3.1-8B-Instruct",
383
  "tools": true,
384
  "unlisted": true,
385
- "endpoints": [
386
- {
387
- "type": "openai",
388
- "baseURL": "https://internal.api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct/v1"
389
- }
390
- ]
391
  }
392
  ]
393
 
 
164
  },
165
  {
166
  "name": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
 
167
  "modelUrl": "https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
168
  "websiteUrl": "https://www.nvidia.com/",
169
  "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nvidia-logo.png",
 
296
  "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
297
  "websiteUrl": "https://nousresearch.com/",
298
  "modelUrl": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
 
299
  "promptExamples": [
300
  {
301
  "title": "Write an email from bullet list",
 
319
  },
320
  {
321
  "name": "mistralai/Mistral-Nemo-Instruct-2407",
 
322
  "displayName": "mistralai/Mistral-Nemo-Instruct-2407",
323
  "description": "A small model with good capabilities in language understanding and commonsense reasoning.",
324
  "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
 
348
  },
349
  {
350
  "name": "microsoft/Phi-3.5-mini-instruct",
 
351
  "description": "One of the best small models (3.8B parameters), super fast for simple tasks.",
352
  "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
353
  "modelUrl": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
 
378
  "name": "meta-llama/Llama-3.1-8B-Instruct",
379
  "tools": true,
380
  "unlisted": true,
381
+ "parameters": {
382
+ "stop": ["<|eot_id|>", "<|im_end|>"],
383
+ "temperature": 0.1,
384
+ "max_new_tokens": 256
385
+ }
 
386
  }
387
  ]
388
 
src/lib/server/textGeneration/reasoning.ts CHANGED
@@ -15,8 +15,7 @@ export async function generateSummaryOfReasoning(buffer: string): Promise<string
15
  ];
16
 
17
  const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
18
- The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
19
- Example: "Thinking about life...", "Summarizing the results...", "Processing the input...". `;
20
 
21
  if (smallModel.tools) {
22
  const summaryTool = {
 
15
  ];
16
 
17
  const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
18
+ The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points. The reasoning follows: \n`;
 
19
 
20
  if (smallModel.tools) {
21
  const summaryTool = {
src/lib/server/textGeneration/title.ts CHANGED
@@ -1,6 +1,5 @@
1
  import { env } from "$env/dynamic/private";
2
  import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
3
- import type { EndpointMessage } from "../endpoints/endpoints";
4
  import { logger } from "$lib/server/logger";
5
  import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
6
  import type { Conversation } from "$lib/types/Conversation";
@@ -58,7 +57,7 @@ export async function generateTitle(prompt: string) {
58
  },
59
  ],
60
  preprompt:
61
- "You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence.",
62
  tool: titleTool,
63
  endpoint,
64
  });
@@ -71,34 +70,9 @@ export async function generateTitle(prompt: string) {
71
  }
72
  }
73
 
74
- const messages: Array<EndpointMessage> = [
75
- {
76
- from: "system",
77
- content:
78
- "You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence of four words or less. Always start your answer with an emoji relevant to the summary",
79
- },
80
- { from: "user", content: "Who is the president of Gabon?" },
81
- { from: "assistant", content: "🇬🇦 President of Gabon" },
82
- { from: "user", content: "Who is Julien Chaumond?" },
83
- { from: "assistant", content: "🧑 Julien Chaumond" },
84
- { from: "user", content: "what is 1 + 1?" },
85
- { from: "assistant", content: "🔢 Simple math operation" },
86
- { from: "user", content: "What are the latest news?" },
87
- { from: "assistant", content: "📰 Latest news" },
88
- { from: "user", content: "How to make a great cheesecake?" },
89
- { from: "assistant", content: "🍰 Cheesecake recipe" },
90
- { from: "user", content: "what is your favorite movie? do a short answer." },
91
- { from: "assistant", content: "🎥 Favorite movie" },
92
- { from: "user", content: "Explain the concept of artificial intelligence in one sentence" },
93
- { from: "assistant", content: "🤖 AI definition" },
94
- { from: "user", content: "Draw a cute cat" },
95
- { from: "assistant", content: "🐱 Cute cat drawing" },
96
- { from: "user", content: prompt },
97
- ];
98
-
99
  return await getReturnFromGenerator(
100
  generateFromDefaultEndpoint({
101
- messages,
102
  preprompt:
103
  "You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
104
  generateSettings: {
 
1
  import { env } from "$env/dynamic/private";
2
  import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
 
3
  import { logger } from "$lib/server/logger";
4
  import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
5
  import type { Conversation } from "$lib/types/Conversation";
 
57
  },
58
  ],
59
  preprompt:
60
+ "You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence. Here is the user message: \n",
61
  tool: titleTool,
62
  endpoint,
63
  });
 
70
  }
71
  }
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  return await getReturnFromGenerator(
74
  generateFromDefaultEndpoint({
75
+ messages: [{ from: "user", content: prompt }],
76
  preprompt:
77
  "You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
78
  generateSettings: {
src/lib/server/tools/getToolOutput.ts CHANGED
@@ -42,6 +42,10 @@ export async function getToolOutput<T = string>({
42
  );
43
  calls.push(...extractedCalls);
44
  }
 
 
 
 
45
  }
46
 
47
  if (calls.length > 0) {
@@ -52,7 +56,9 @@ export async function getToolOutput<T = string>({
52
  if (toolCall?.parameters) {
53
  // Get the first parameter value since most tools have a single main parameter
54
  const firstParamValue = Object.values(toolCall.parameters)[0];
55
- return firstParamValue as T;
 
 
56
  }
57
  }
58
 
 
42
  );
43
  calls.push(...extractedCalls);
44
  }
45
+
46
+ if (calls.length > 0) {
47
+ break;
48
+ }
49
  }
50
 
51
  if (calls.length > 0) {
 
56
  if (toolCall?.parameters) {
57
  // Get the first parameter value since most tools have a single main parameter
58
  const firstParamValue = Object.values(toolCall.parameters)[0];
59
+ if (typeof firstParamValue === "string") {
60
+ return firstParamValue as T;
61
+ }
62
  }
63
  }
64
 
src/lib/server/websearch/search/generateQuery.ts CHANGED
@@ -59,7 +59,7 @@ export async function generateQuery(messages: Message[]) {
59
  const webQuery = await getReturnFromGenerator(
60
  generateFromDefaultEndpoint({
61
  messages: convQuery,
62
- preprompt: `The user wants you to search the web for information. Give a relevant google search query to answer the question. Answer with only the query. Today is ${currentDate}`,
63
  generateSettings: {
64
  max_new_tokens: 30,
65
  },
 
59
  const webQuery = await getReturnFromGenerator(
60
  generateFromDefaultEndpoint({
61
  messages: convQuery,
62
+ preprompt: `The user wants you to search the web for information. Give a relevant google search query to answer the question. Answer with only the query. Today is ${currentDate}. The conversation follows: \n`,
63
  generateSettings: {
64
  max_new_tokens: 30,
65
  },