nsarrazin HF Staff commited on
Commit
55ff303
·
1 Parent(s): 6655689

feat(model): configure Llama-3.1-8B-Instruct with OpenAI-compatible endpoint

Browse files
chart/env/prod.yaml CHANGED
@@ -381,11 +381,13 @@ envVars:
381
  {
382
  "name": "meta-llama/Llama-3.1-8B-Instruct",
383
  "tools": true,
384
- "parameters": {
385
- "temperature": 0.6,
386
- "stop": ["<|endoftext|>", "<|eot_id|>"]
387
- },
388
- "unlisted": true
 
 
389
  }
390
  ]
391
 
 
381
  {
382
  "name": "meta-llama/Llama-3.1-8B-Instruct",
383
  "tools": true,
384
+ "unlisted": true,
385
+ "endpoints": [
386
+ {
387
+ "type": "openai",
388
+ "baseURL": "https://internal.api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct/v1"
389
+ }
390
+ ]
391
  }
392
  ]
393
 
src/lib/server/websearch/search/generateQuery.ts CHANGED
@@ -43,44 +43,6 @@ export async function generateQuery(messages: Message[]) {
43
  const lastMessage = userMessages.slice(-1)[0];
44
 
45
  const convQuery: Array<EndpointMessage> = [
46
- {
47
- from: "user",
48
- content: `Previous Questions:
49
- - Who is the president of France?
50
-
51
- Current Question: What about Mexico?
52
- `,
53
- },
54
- {
55
- from: "assistant",
56
- content: "President of Mexico",
57
- },
58
- {
59
- from: "user",
60
- content: `Previous questions:
61
- - When is the next formula 1 grand prix?
62
-
63
- Current Question: Where is it being hosted?`,
64
- },
65
- {
66
- from: "assistant",
67
- content: "location of next formula 1 grand prix",
68
- },
69
- {
70
- from: "user",
71
- content: "Current Question: What type of printhead does the Epson F2270 DTG printer use?",
72
- },
73
- {
74
- from: "assistant",
75
- content: "Epson F2270 DTG printer printhead",
76
- },
77
- { from: "user", content: "What were the news yesterday?" },
78
- {
79
- from: "assistant",
80
- content: `news ${format(new Date(Date.now() - 864e5), "MMMM d, yyyy")}`,
81
- },
82
- { from: "user", content: "What is the current weather in Paris?" },
83
- { from: "assistant", content: `weather in Paris ${currentDate}` },
84
  {
85
  from: "user",
86
  content:
 
43
  const lastMessage = userMessages.slice(-1)[0];
44
 
45
  const convQuery: Array<EndpointMessage> = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  {
47
  from: "user",
48
  content: