Spaces:
Running
on
CPU Upgrade
feat: add sources for websearch (#1551)
Browse files* feat: playwright, spatial parsing, markdown for web search
Co-authored-by: Aaditya Sahay <[email protected]>
* feat: choose multiple clusters if necessary (#2)
* chore: resolve linting failures
* feat: improve paring performance and error messages
* feat: inline citations
* feat: adjust inline citation prompt, less intrusive tokens
* feat: add sources to message when using websearch
* fix: clean up packages
* fix: packages
* fix: packages lol
* fix: make websearch citation work better wiht tools
* fix: use single brackets for sources, only render source element if a matching source is available
* fix: bad import
---------
Co-authored-by: Liam Dyer <[email protected]>
Co-authored-by: Aaditya Sahay <[email protected]>
Co-authored-by: Aaditya Sahay <[email protected]>
@@ -13434,4 +13434,4 @@
|
|
13434 |
}
|
13435 |
}
|
13436 |
}
|
13437 |
-
}
|
|
|
13434 |
}
|
13435 |
}
|
13436 |
}
|
13437 |
+
}
|
@@ -37,6 +37,28 @@
|
|
37 |
import DOMPurify from "isomorphic-dompurify";
|
38 |
import { enhance } from "$app/forms";
|
39 |
import { browser } from "$app/environment";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
function sanitizeMd(md: string) {
|
42 |
let ret = md
|
@@ -114,7 +136,7 @@
|
|
114 |
})
|
115 |
);
|
116 |
|
117 |
-
$: tokens = marked.lexer(sanitizeMd(message.content
|
118 |
|
119 |
$: emptyLoad =
|
120 |
!message.content && (webSearchIsDone || (searchUpdates && searchUpdates.length === 0));
|
|
|
37 |
import DOMPurify from "isomorphic-dompurify";
|
38 |
import { enhance } from "$app/forms";
|
39 |
import { browser } from "$app/environment";
|
40 |
+
import type { WebSearchSource } from "$lib/types/WebSearch";
|
41 |
+
|
42 |
+
function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string {
|
43 |
+
const linkStyle =
|
44 |
+
"color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;";
|
45 |
+
|
46 |
+
return md.replace(/\[(\d+)\]/g, (match: string) => {
|
47 |
+
const indices: number[] = (match.match(/\d+/g) || []).map(Number);
|
48 |
+
const links: string = indices
|
49 |
+
.map((index: number) => {
|
50 |
+
const source = webSearchSources[index - 1];
|
51 |
+
if (source) {
|
52 |
+
return `<a href="${source.link}" target="_blank" rel="noreferrer" style="${linkStyle}">${index}</a>`;
|
53 |
+
}
|
54 |
+
return "";
|
55 |
+
})
|
56 |
+
.filter(Boolean)
|
57 |
+
.join(", ");
|
58 |
+
|
59 |
+
return links ? ` <sup>${links}</sup>` : match;
|
60 |
+
});
|
61 |
+
}
|
62 |
|
63 |
function sanitizeMd(md: string) {
|
64 |
let ret = md
|
|
|
136 |
})
|
137 |
);
|
138 |
|
139 |
+
$: tokens = marked.lexer(addInlineCitations(sanitizeMd(message.content), webSearchSources));
|
140 |
|
141 |
$: emptyLoad =
|
142 |
!message.content && (webSearchIsDone || (searchUpdates && searchUpdates.length === 0));
|
@@ -17,7 +17,7 @@ export async function preprocessMessages(
|
|
17 |
|
18 |
function addWebSearchContext(messages: Message[], webSearch: Message["webSearch"]) {
|
19 |
const webSearchContext = webSearch?.contextSources
|
20 |
-
.map(({ context }) => context.trim())
|
21 |
.join("\n\n----------\n\n");
|
22 |
|
23 |
// No web search context available, skip
|
@@ -35,7 +35,7 @@ function addWebSearchContext(messages: Message[], webSearch: Message["webSearch"
|
|
35 |
const finalMessage = {
|
36 |
...messages[messages.length - 1],
|
37 |
content: `I searched the web using the query: ${webSearch.searchQuery}.
|
38 |
-
Today is ${currentDate} and here are the results:
|
39 |
=====================
|
40 |
${webSearchContext}
|
41 |
=====================
|
|
|
17 |
|
18 |
function addWebSearchContext(messages: Message[], webSearch: Message["webSearch"]) {
|
19 |
const webSearchContext = webSearch?.contextSources
|
20 |
+
.map(({ context }, idx) => `Source [${idx + 1}]\n${context.trim()}`)
|
21 |
.join("\n\n----------\n\n");
|
22 |
|
23 |
// No web search context available, skip
|
|
|
35 |
const finalMessage = {
|
36 |
...messages[messages.length - 1],
|
37 |
content: `I searched the web using the query: ${webSearch.searchQuery}.
|
38 |
+
Today is ${currentDate} and here are the results. When answering the question, if you use a source, cite its index inline like this: [1], [2], etc.
|
39 |
=====================
|
40 |
${webSearchContext}
|
41 |
=====================
|
@@ -25,12 +25,21 @@ const websearch: ConfigTool = {
|
|
25 |
showOutput: false,
|
26 |
async *call({ query }, { conv, assistant, messages }) {
|
27 |
const webSearchToolResults = yield* runWebSearch(conv, messages, assistant?.rag, String(query));
|
28 |
-
|
29 |
-
|
30 |
-
.
|
|
|
31 |
|
32 |
return {
|
33 |
-
outputs: [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
display: false,
|
35 |
};
|
36 |
},
|
|
|
25 |
showOutput: false,
|
26 |
async *call({ query }, { conv, assistant, messages }) {
|
27 |
const webSearchToolResults = yield* runWebSearch(conv, messages, assistant?.rag, String(query));
|
28 |
+
|
29 |
+
const webSearchContext = webSearchToolResults?.contextSources
|
30 |
+
.map(({ context }, idx) => `Source [${idx + 1}]\n${context.trim()}`)
|
31 |
+
.join("\n\n----------\n\n");
|
32 |
|
33 |
return {
|
34 |
+
outputs: [
|
35 |
+
{
|
36 |
+
websearch: webSearchContext,
|
37 |
+
},
|
38 |
+
{
|
39 |
+
instructions:
|
40 |
+
"When answering the question, if you use sources from the websearch results above, cite each index inline individually wrapped like: [1], [2] etc.",
|
41 |
+
},
|
42 |
+
],
|
43 |
display: false,
|
44 |
};
|
45 |
},
|