Spaces:

jdelavande
/

chat-ui-energy

Running on CPU Upgrade

nsarrazin HF Staff commited on Mar 12

Commit

d37f9ca

unverified ·

1 Parent(s): 185c2ff

feat: improve task model tool calling (#1755)

* feat: improve task model tool calling

* feat(prod): add tokenizer for Hermes-3-Llama-3.1-8B to task configuration

Files changed (3) hide show

chart/env/prod.yaml CHANGED Viewed

@@ -375,11 +375,17 @@ envVars:
         ]
       },
       {
-        "name": "meta-llama/Llama-3.1-8B-Instruct",
-        "tools": true,
         "unlisted": true,
         "parameters": {
-          "stop": ["<|eot_id|>", "<|im_end|>"],
           "temperature": 0.1,
           "max_new_tokens": 256
         }
@@ -456,7 +462,7 @@ envVars:
   PUBLIC_APP_DISCLAIMER: 1
   PUBLIC_PLAUSIBLE_SCRIPT_URL: "/js/script.js"
   REQUIRE_FEATURED_ASSISTANTS: "true"
-  TASK_MODEL: "meta-llama/Llama-3.1-8B-Instruct"
   TEXT_EMBEDDING_MODELS: >
     [{
       "name": "bge-base-en-v1-5-sxa",

         ]
       },
       {
+        "name": "internal/task",
+        "tokenizer" : "NousResearch/Hermes-3-Llama-3.1-8B",
         "unlisted": true,
+        "tools" : true,
+        "endpoints": [
+          {
+            "type": "openai",
+            "baseURL": "https://internal.api-inference.huggingface.co/models/NousResearch/Hermes-3-Llama-3.1-8B/v1"
+          }
+        ],
         "parameters": {
           "temperature": 0.1,
           "max_new_tokens": 256
         }
   PUBLIC_APP_DISCLAIMER: 1
   PUBLIC_PLAUSIBLE_SCRIPT_URL: "/js/script.js"
   REQUIRE_FEATURED_ASSISTANTS: "true"
+  TASK_MODEL: "internal/task"
   TEXT_EMBEDDING_MODELS: >
     [{
       "name": "bge-base-en-v1-5-sxa",

src/lib/server/textGeneration/reasoning.ts CHANGED Viewed

@@ -15,7 +15,7 @@ export async function generateSummaryOfReasoning(buffer: string): Promise<string
 	];
 	const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
-The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points. The reasoning follows: \n`;
 	if (smallModel.tools) {
 		const summaryTool = {
@@ -25,7 +25,8 @@ The text might be incomplete, try your best to summarize it in one very short se
 				{
 					name: "summary",
 					type: "str",
-					description: "The short summary of the reasoning steps",
 					paramType: "required",
 				},
 			],

 	];
 	const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
+The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points. The sentence must be very short, ideally 5 words or less.`;
 	if (smallModel.tools) {
 		const summaryTool = {
 				{
 					name: "summary",
 					type: "str",
+					description:
+						"The short summary of the reasoning steps. 5 words or less. Must start with a gerund.",
 					paramType: "required",
 				},
 			],

src/lib/server/textGeneration/title.ts CHANGED Viewed

@@ -43,7 +43,7 @@ export async function generateTitle(prompt: string) {
 					name: "title",
 					type: "str",
 					description:
-						"The title for the conversation. It should be a single short sentence of four words or less and start with a unicode emoji relevant to the conversation.",
 				},
 			],
 		} as unknown as Tool;
@@ -57,7 +57,7 @@ export async function generateTitle(prompt: string) {
 				},
 			],
 			preprompt:
-				"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence. Here is the user message: \n",
 			tool: titleTool,
 			endpoint,
 		});
@@ -76,7 +76,7 @@ export async function generateTitle(prompt: string) {
 			preprompt:
 				"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
 			generateSettings: {
-				max_new_tokens: 15,
 			},
 		})
 	)

 					name: "title",
 					type: "str",
 					description:
+						"The title for the conversation. It should be 5 words or less and start with a unicode emoji relevant to the query.",
 				},
 			],
 		} as unknown as Tool;
 				},
 			],
 			preprompt:
+				"The task is to generate conversation titles based on text snippets. You'll never answer the provided question directly, but instead summarize the user's request into a short title.",
 			tool: titleTool,
 			endpoint,
 		});
 			preprompt:
 				"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
 			generateSettings: {
+				max_new_tokens: 30,
 			},
 		})
 	)