nsarrazin HF Staff Liam Dyer Aaditya Sahay Aaditya Sahay commited on
Commit
69dee14
·
unverified ·
1 Parent(s): 451ffc4

feat: add sources for websearch (#1551)

Browse files

* feat: playwright, spatial parsing, markdown for web search

Co-authored-by: Aaditya Sahay <[email protected]>

* feat: choose multiple clusters if necessary (#2)

* chore: resolve linting failures

* feat: improve paring performance and error messages

* feat: inline citations

* feat: adjust inline citation prompt, less intrusive tokens

* feat: add sources to message when using websearch

* fix: clean up packages

* fix: packages

* fix: packages lol

* fix: make websearch citation work better wiht tools

* fix: use single brackets for sources, only render source element if a matching source is available

* fix: bad import

---------

Co-authored-by: Liam Dyer <[email protected]>
Co-authored-by: Aaditya Sahay <[email protected]>
Co-authored-by: Aaditya Sahay <[email protected]>

package-lock.json CHANGED
@@ -13434,4 +13434,4 @@
13434
  }
13435
  }
13436
  }
13437
- }
 
13434
  }
13435
  }
13436
  }
13437
+ }
src/lib/components/chat/ChatMessage.svelte CHANGED
@@ -37,6 +37,28 @@
37
  import DOMPurify from "isomorphic-dompurify";
38
  import { enhance } from "$app/forms";
39
  import { browser } from "$app/environment";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  function sanitizeMd(md: string) {
42
  let ret = md
@@ -114,7 +136,7 @@
114
  })
115
  );
116
 
117
- $: tokens = marked.lexer(sanitizeMd(message.content ?? ""));
118
 
119
  $: emptyLoad =
120
  !message.content && (webSearchIsDone || (searchUpdates && searchUpdates.length === 0));
 
37
  import DOMPurify from "isomorphic-dompurify";
38
  import { enhance } from "$app/forms";
39
  import { browser } from "$app/environment";
40
+ import type { WebSearchSource } from "$lib/types/WebSearch";
41
+
42
+ function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string {
43
+ const linkStyle =
44
+ "color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;";
45
+
46
+ return md.replace(/\[(\d+)\]/g, (match: string) => {
47
+ const indices: number[] = (match.match(/\d+/g) || []).map(Number);
48
+ const links: string = indices
49
+ .map((index: number) => {
50
+ const source = webSearchSources[index - 1];
51
+ if (source) {
52
+ return `<a href="${source.link}" target="_blank" rel="noreferrer" style="${linkStyle}">${index}</a>`;
53
+ }
54
+ return "";
55
+ })
56
+ .filter(Boolean)
57
+ .join(", ");
58
+
59
+ return links ? ` <sup>${links}</sup>` : match;
60
+ });
61
+ }
62
 
63
  function sanitizeMd(md: string) {
64
  let ret = md
 
136
  })
137
  );
138
 
139
+ $: tokens = marked.lexer(addInlineCitations(sanitizeMd(message.content), webSearchSources));
140
 
141
  $: emptyLoad =
142
  !message.content && (webSearchIsDone || (searchUpdates && searchUpdates.length === 0));
src/lib/server/endpoints/preprocessMessages.ts CHANGED
@@ -17,7 +17,7 @@ export async function preprocessMessages(
17
 
18
  function addWebSearchContext(messages: Message[], webSearch: Message["webSearch"]) {
19
  const webSearchContext = webSearch?.contextSources
20
- .map(({ context }) => context.trim())
21
  .join("\n\n----------\n\n");
22
 
23
  // No web search context available, skip
@@ -35,7 +35,7 @@ function addWebSearchContext(messages: Message[], webSearch: Message["webSearch"
35
  const finalMessage = {
36
  ...messages[messages.length - 1],
37
  content: `I searched the web using the query: ${webSearch.searchQuery}.
38
- Today is ${currentDate} and here are the results:
39
  =====================
40
  ${webSearchContext}
41
  =====================
 
17
 
18
  function addWebSearchContext(messages: Message[], webSearch: Message["webSearch"]) {
19
  const webSearchContext = webSearch?.contextSources
20
+ .map(({ context }, idx) => `Source [${idx + 1}]\n${context.trim()}`)
21
  .join("\n\n----------\n\n");
22
 
23
  // No web search context available, skip
 
35
  const finalMessage = {
36
  ...messages[messages.length - 1],
37
  content: `I searched the web using the query: ${webSearch.searchQuery}.
38
+ Today is ${currentDate} and here are the results. When answering the question, if you use a source, cite its index inline like this: [1], [2], etc.
39
  =====================
40
  ${webSearchContext}
41
  =====================
src/lib/server/tools/web/search.ts CHANGED
@@ -25,12 +25,21 @@ const websearch: ConfigTool = {
25
  showOutput: false,
26
  async *call({ query }, { conv, assistant, messages }) {
27
  const webSearchToolResults = yield* runWebSearch(conv, messages, assistant?.rag, String(query));
28
- const chunks = webSearchToolResults?.contextSources
29
- .map(({ context }) => context)
30
- .join("\n------------\n");
 
31
 
32
  return {
33
- outputs: [{ websearch: chunks }],
 
 
 
 
 
 
 
 
34
  display: false,
35
  };
36
  },
 
25
  showOutput: false,
26
  async *call({ query }, { conv, assistant, messages }) {
27
  const webSearchToolResults = yield* runWebSearch(conv, messages, assistant?.rag, String(query));
28
+
29
+ const webSearchContext = webSearchToolResults?.contextSources
30
+ .map(({ context }, idx) => `Source [${idx + 1}]\n${context.trim()}`)
31
+ .join("\n\n----------\n\n");
32
 
33
  return {
34
+ outputs: [
35
+ {
36
+ websearch: webSearchContext,
37
+ },
38
+ {
39
+ instructions:
40
+ "When answering the question, if you use sources from the websearch results above, cite each index inline individually wrapped like: [1], [2] etc.",
41
+ },
42
+ ],
43
  display: false,
44
  };
45
  },