nsarrazin HF Staff commited on
Commit
1bcd186
·
unverified ·
1 Parent(s): 97ebd13

feat: add MarkdownRenderer component & fix latex formatting (#1569)

Browse files

* feat: add extra latex delimiters

* fix: imports & afterUpdate

* feat: create MarkdownRenderer and fix latex formatting

* fix: remove extra whitespace

* fix: remove unnecessary `extraLatexDelimiters`

* fix: make sure links go to newpage

package-lock.json CHANGED
@@ -35,9 +35,9 @@
35
  "jsdom": "^22.0.0",
36
  "json5": "^2.2.3",
37
  "jsonpath": "^1.1.1",
 
38
  "lint-staged": "^15.2.7",
39
  "marked": "^12.0.1",
40
- "marked-katex-extension": "^5.0.1",
41
  "mongodb": "^5.8.0",
42
  "nanoid": "^4.0.2",
43
  "openid-client": "^5.4.2",
@@ -69,6 +69,7 @@
69
  "@types/js-yaml": "^4.0.9",
70
  "@types/jsdom": "^21.1.1",
71
  "@types/jsonpath": "^0.2.4",
 
72
  "@types/mime-types": "^2.1.4",
73
  "@types/minimist": "^1.2.5",
74
  "@types/node": "^22.1.0",
@@ -4302,7 +4303,9 @@
4302
  "node_modules/@types/katex": {
4303
  "version": "0.16.7",
4304
  "resolved": "https://registry.npmjs.org/@types/katex/-/katex-0.16.7.tgz",
4305
- "integrity": "sha512-HMwFiRujE5PjrgwHQ25+bsLJgowjGjm5Z8FVSf0N6PwgJrwxH0QxzHYDcKsTfV3wva0vzrpqMTJS2jXPr5BMEQ=="
 
 
4306
  },
4307
  "node_modules/@types/mime": {
4308
  "version": "1.3.5",
@@ -5764,7 +5767,7 @@
5764
  "version": "8.3.0",
5765
  "resolved": "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz",
5766
  "integrity": "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==",
5767
- "peer": true,
5768
  "engines": {
5769
  "node": ">= 12"
5770
  }
@@ -8351,7 +8354,7 @@
8351
  "https://opencollective.com/katex",
8352
  "https://github.com/sponsors/katex"
8353
  ],
8354
- "peer": true,
8355
  "dependencies": {
8356
  "commander": "^8.3.0"
8357
  },
@@ -8816,18 +8819,6 @@
8816
  "node": ">= 18"
8817
  }
8818
  },
8819
- "node_modules/marked-katex-extension": {
8820
- "version": "5.1.2",
8821
- "resolved": "https://registry.npmjs.org/marked-katex-extension/-/marked-katex-extension-5.1.2.tgz",
8822
- "integrity": "sha512-jRtacvDAPULKBWArDno0IGpzzpUw12yb8OaEsv3dTlvcIr21+mF9kD+Bxo2m/ErX/2ZIml6zFVMnpxCpqx3stw==",
8823
- "dependencies": {
8824
- "@types/katex": "^0.16.7"
8825
- },
8826
- "peerDependencies": {
8827
- "katex": ">=0.16 <0.17",
8828
- "marked": ">=4 <15"
8829
- }
8830
- },
8831
  "node_modules/mdn-data": {
8832
  "version": "2.0.30",
8833
  "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.30.tgz",
 
35
  "jsdom": "^22.0.0",
36
  "json5": "^2.2.3",
37
  "jsonpath": "^1.1.1",
38
+ "katex": "^0.16.11",
39
  "lint-staged": "^15.2.7",
40
  "marked": "^12.0.1",
 
41
  "mongodb": "^5.8.0",
42
  "nanoid": "^4.0.2",
43
  "openid-client": "^5.4.2",
 
69
  "@types/js-yaml": "^4.0.9",
70
  "@types/jsdom": "^21.1.1",
71
  "@types/jsonpath": "^0.2.4",
72
+ "@types/katex": "^0.16.7",
73
  "@types/mime-types": "^2.1.4",
74
  "@types/minimist": "^1.2.5",
75
  "@types/node": "^22.1.0",
 
4303
  "node_modules/@types/katex": {
4304
  "version": "0.16.7",
4305
  "resolved": "https://registry.npmjs.org/@types/katex/-/katex-0.16.7.tgz",
4306
+ "integrity": "sha512-HMwFiRujE5PjrgwHQ25+bsLJgowjGjm5Z8FVSf0N6PwgJrwxH0QxzHYDcKsTfV3wva0vzrpqMTJS2jXPr5BMEQ==",
4307
+ "dev": true,
4308
+ "license": "MIT"
4309
  },
4310
  "node_modules/@types/mime": {
4311
  "version": "1.3.5",
 
5767
  "version": "8.3.0",
5768
  "resolved": "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz",
5769
  "integrity": "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==",
5770
+ "license": "MIT",
5771
  "engines": {
5772
  "node": ">= 12"
5773
  }
 
8354
  "https://opencollective.com/katex",
8355
  "https://github.com/sponsors/katex"
8356
  ],
8357
+ "license": "MIT",
8358
  "dependencies": {
8359
  "commander": "^8.3.0"
8360
  },
 
8819
  "node": ">= 18"
8820
  }
8821
  },
 
 
 
 
 
 
 
 
 
 
 
 
8822
  "node_modules/mdn-data": {
8823
  "version": "2.0.30",
8824
  "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.30.tgz",
package.json CHANGED
@@ -28,6 +28,7 @@
28
  "@types/js-yaml": "^4.0.9",
29
  "@types/jsdom": "^21.1.1",
30
  "@types/jsonpath": "^0.2.4",
 
31
  "@types/mime-types": "^2.1.4",
32
  "@types/minimist": "^1.2.5",
33
  "@types/node": "^22.1.0",
@@ -87,9 +88,9 @@
87
  "jsdom": "^22.0.0",
88
  "json5": "^2.2.3",
89
  "jsonpath": "^1.1.1",
 
90
  "lint-staged": "^15.2.7",
91
  "marked": "^12.0.1",
92
- "marked-katex-extension": "^5.0.1",
93
  "mongodb": "^5.8.0",
94
  "nanoid": "^4.0.2",
95
  "openid-client": "^5.4.2",
 
28
  "@types/js-yaml": "^4.0.9",
29
  "@types/jsdom": "^21.1.1",
30
  "@types/jsonpath": "^0.2.4",
31
+ "@types/katex": "^0.16.7",
32
  "@types/mime-types": "^2.1.4",
33
  "@types/minimist": "^1.2.5",
34
  "@types/node": "^22.1.0",
 
88
  "jsdom": "^22.0.0",
89
  "json5": "^2.2.3",
90
  "jsonpath": "^1.1.1",
91
+ "katex": "^0.16.11",
92
  "lint-staged": "^15.2.7",
93
  "marked": "^12.0.1",
 
94
  "mongodb": "^5.8.0",
95
  "nanoid": "^4.0.2",
96
  "openid-client": "^5.4.2",
src/lib/components/chat/ChatMessage.svelte CHANGED
@@ -1,12 +1,9 @@
1
  <script lang="ts">
2
- import { marked, type MarkedOptions } from "marked";
3
- import markedKatex from "marked-katex-extension";
4
  import type { Message } from "$lib/types/Message";
5
  import { afterUpdate, createEventDispatcher, tick } from "svelte";
6
  import { deepestChild } from "$lib/utils/deepestChild";
7
  import { page } from "$app/stores";
8
 
9
- import CodeBlock from "../CodeBlock.svelte";
10
  import CopyToClipBoardBtn from "../CopyToClipBoardBtn.svelte";
11
  import IconLoading from "../icons/IconLoading.svelte";
12
  import CarbonRotate360 from "~icons/carbon/rotate-360";
@@ -17,7 +14,6 @@
17
  import CarbonPen from "~icons/carbon/pen";
18
  import CarbonChevronLeft from "~icons/carbon/chevron-left";
19
  import CarbonChevronRight from "~icons/carbon/chevron-right";
20
- import { PUBLIC_SEP_TOKEN } from "$lib/constants/publicSepToken";
21
  import type { Model } from "$lib/types/Model";
22
  import UploadedFile from "./UploadedFile.svelte";
23
 
@@ -34,56 +30,9 @@
34
  import { useConvTreeStore } from "$lib/stores/convTree";
35
  import ToolUpdate from "./ToolUpdate.svelte";
36
  import { useSettingsStore } from "$lib/stores/settings";
37
- import DOMPurify from "isomorphic-dompurify";
38
  import { enhance } from "$app/forms";
39
  import { browser } from "$app/environment";
40
- import type { WebSearchSource } from "$lib/types/WebSearch";
41
-
42
- function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string {
43
- const linkStyle =
44
- "color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;";
45
-
46
- return md.replace(/\[(\d+)\]/g, (match: string) => {
47
- const indices: number[] = (match.match(/\d+/g) || []).map(Number);
48
- const links: string = indices
49
- .map((index: number) => {
50
- if (index === 0) return false;
51
- const source = webSearchSources[index - 1];
52
- if (source) {
53
- return `<a href="${source.link}" target="_blank" rel="noreferrer" style="${linkStyle}">${index}</a>`;
54
- }
55
- return "";
56
- })
57
- .filter(Boolean)
58
- .join(", ");
59
-
60
- return links ? ` <sup>${links}</sup>` : match;
61
- });
62
- }
63
-
64
- function sanitizeMd(md: string) {
65
- let ret = md
66
- .replace(/<\|[a-z]*$/, "")
67
- .replace(/<\|[a-z]+\|$/, "")
68
- .replace(/<$/, "")
69
- .replaceAll(PUBLIC_SEP_TOKEN, " ")
70
- .replaceAll(/<\|[a-z]+\|>/g, " ")
71
- .replaceAll(/<br\s?\/?>/gi, "\n")
72
- .replaceAll("<", "&lt;")
73
- .trim();
74
-
75
- for (const stop of [...(model.parameters?.stop ?? []), "<|endoftext|>"]) {
76
- if (ret.endsWith(stop)) {
77
- ret = ret.slice(0, -stop.length).trim();
78
- }
79
- }
80
-
81
- return ret;
82
- }
83
-
84
- function unsanitizeMd(md: string) {
85
- return md.replaceAll("&lt;", "<");
86
- }
87
 
88
  export let model: Model;
89
  export let id: Message["id"];
@@ -108,36 +57,6 @@
108
  let isCopied = false;
109
 
110
  let initialized = false;
111
- const renderer = new marked.Renderer();
112
- // For code blocks with simple backticks
113
- renderer.codespan = (code) => {
114
- // Unsanitize double-sanitized code
115
- return `<code>${code.replaceAll("&amp;", "&")}</code>`;
116
- };
117
-
118
- renderer.link = (href, title, text) => {
119
- return `<a href="${href?.replace(/>$/, "")}" target="_blank" rel="noreferrer">${text}</a>`;
120
- };
121
-
122
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
123
- const { extensions, ...defaults } = marked.getDefaults() as MarkedOptions & {
124
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
125
- extensions: any;
126
- };
127
- const options: MarkedOptions = {
128
- ...defaults,
129
- gfm: true,
130
- breaks: true,
131
- renderer,
132
- };
133
-
134
- marked.use(
135
- markedKatex({
136
- throwOnError: false,
137
- })
138
- );
139
-
140
- $: tokens = marked.lexer(addInlineCitations(sanitizeMd(message.content), webSearchSources));
141
 
142
  $: emptyLoad =
143
  !message.content && (webSearchIsDone || (searchUpdates && searchUpdates.length === 0));
@@ -258,7 +177,7 @@
258
 
259
  {#if message.from === "assistant"}
260
  <div
261
- class="group relative -mb-4 flex items-start justify-start gap-4 pb-4 leading-relaxed"
262
  id="message-assistant-{message.id}"
263
  role="presentation"
264
  on:click={() => (isTapped = !isTapped)}
@@ -289,7 +208,7 @@
289
  {/if}
290
  {#if searchUpdates && searchUpdates.length > 0}
291
  <OpenWebSearchResults
292
- classNames={tokens.length ? "mb-3.5" : ""}
293
  webSearchMessages={searchUpdates}
294
  />
295
  {/if}
@@ -304,23 +223,11 @@
304
  {/each}
305
  {/if}
306
 
307
- <div
308
- class="prose max-w-none dark:prose-invert max-sm:prose-sm prose-headings:font-semibold prose-h1:text-lg prose-h2:text-base prose-h3:text-base prose-pre:bg-gray-800 dark:prose-pre:bg-gray-900"
309
- bind:this={contentEl}
310
- >
311
  {#if isLast && loading && $settings.disableStream}
312
  <IconLoading classNames="loading inline ml-2 first:ml-0" />
313
  {/if}
314
- {#each tokens as token}
315
- {#if token.type === "code"}
316
- <CodeBlock lang={token.lang} code={unsanitizeMd(token.text)} />
317
- {:else}
318
- {#await marked.parse(token.raw, options) then parsed}
319
- <!-- eslint-disable-next-line svelte/no-at-html-tags -->
320
- {@html DOMPurify.sanitize(parsed)}
321
- {/await}
322
- {/if}
323
- {/each}
324
  </div>
325
 
326
  <!-- Web Search sources -->
@@ -425,7 +332,7 @@
425
  {/if}
426
  {#if message.from === "user"}
427
  <div
428
- class="group relative w-full items-start justify-start gap-4 max-sm:text-sm"
429
  id="message-user-{message.id}"
430
  role="presentation"
431
  on:click={() => (isTapped = !isTapped)}
 
1
  <script lang="ts">
 
 
2
  import type { Message } from "$lib/types/Message";
3
  import { afterUpdate, createEventDispatcher, tick } from "svelte";
4
  import { deepestChild } from "$lib/utils/deepestChild";
5
  import { page } from "$app/stores";
6
 
 
7
  import CopyToClipBoardBtn from "../CopyToClipBoardBtn.svelte";
8
  import IconLoading from "../icons/IconLoading.svelte";
9
  import CarbonRotate360 from "~icons/carbon/rotate-360";
 
14
  import CarbonPen from "~icons/carbon/pen";
15
  import CarbonChevronLeft from "~icons/carbon/chevron-left";
16
  import CarbonChevronRight from "~icons/carbon/chevron-right";
 
17
  import type { Model } from "$lib/types/Model";
18
  import UploadedFile from "./UploadedFile.svelte";
19
 
 
30
  import { useConvTreeStore } from "$lib/stores/convTree";
31
  import ToolUpdate from "./ToolUpdate.svelte";
32
  import { useSettingsStore } from "$lib/stores/settings";
 
33
  import { enhance } from "$app/forms";
34
  import { browser } from "$app/environment";
35
+ import MarkdownRenderer from "./MarkdownRenderer.svelte";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  export let model: Model;
38
  export let id: Message["id"];
 
57
  let isCopied = false;
58
 
59
  let initialized = false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  $: emptyLoad =
62
  !message.content && (webSearchIsDone || (searchUpdates && searchUpdates.length === 0));
 
177
 
178
  {#if message.from === "assistant"}
179
  <div
180
+ class="message-assistant group relative -mb-4 flex items-start justify-start gap-4 pb-4 leading-relaxed"
181
  id="message-assistant-{message.id}"
182
  role="presentation"
183
  on:click={() => (isTapped = !isTapped)}
 
208
  {/if}
209
  {#if searchUpdates && searchUpdates.length > 0}
210
  <OpenWebSearchResults
211
+ classNames={message.content.length ? "mb-3.5" : ""}
212
  webSearchMessages={searchUpdates}
213
  />
214
  {/if}
 
223
  {/each}
224
  {/if}
225
 
226
+ <div bind:this={contentEl}>
 
 
 
227
  {#if isLast && loading && $settings.disableStream}
228
  <IconLoading classNames="loading inline ml-2 first:ml-0" />
229
  {/if}
230
+ <MarkdownRenderer content={message.content} sources={webSearchSources} />
 
 
 
 
 
 
 
 
 
231
  </div>
232
 
233
  <!-- Web Search sources -->
 
332
  {/if}
333
  {#if message.from === "user"}
334
  <div
335
+ class="message-user group relative w-full items-start justify-start gap-4 max-sm:text-sm"
336
  id="message-user-{message.id}"
337
  role="presentation"
338
  on:click={() => (isTapped = !isTapped)}
src/lib/components/chat/MarkdownRenderer.svelte ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <script lang="ts">
2
+ import type { WebSearchSource } from "$lib/types/WebSearch";
3
+ import katex from "katex";
4
+ import DOMPurify from "isomorphic-dompurify";
5
+ import { marked, type MarkedOptions } from "marked";
6
+ import CodeBlock from "../CodeBlock.svelte";
7
+
8
+ export let content: string;
9
+ export let sources: WebSearchSource[] = [];
10
+
11
+ function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string {
12
+ const linkStyle =
13
+ "color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;";
14
+
15
+ return md.replace(/\[(\d+)\]/g, (match: string) => {
16
+ const indices: number[] = (match.match(/\d+/g) || []).map(Number);
17
+ const links: string = indices
18
+ .map((index: number) => {
19
+ if (index === 0) return false;
20
+ const source = webSearchSources[index - 1];
21
+ if (source) {
22
+ return `<a href="${source.link}" target="_blank" rel="noreferrer" style="${linkStyle}">${index}</a>`;
23
+ }
24
+ return "";
25
+ })
26
+ .filter(Boolean)
27
+ .join(", ");
28
+
29
+ return links ? ` <sup>${links}</sup>` : match;
30
+ });
31
+ }
32
+
33
+ const renderer = new marked.Renderer();
34
+
35
+ // For code blocks with simple backticks
36
+ renderer.codespan = (code) => {
37
+ // Unsanitize double-sanitized code
38
+ return `<code>${code.replaceAll("&amp;", "&")}</code>`;
39
+ };
40
+
41
+ renderer.link = (href, title, text) => {
42
+ return `<a href="${href?.replace(/>$/, "")}" target="_blank" rel="noreferrer">${text}</a>`;
43
+ };
44
+
45
+ const options: MarkedOptions = {
46
+ gfm: true,
47
+ // breaks: true,
48
+ renderer,
49
+ };
50
+
51
+ $: tokens = marked.lexer(addInlineCitations(content, sources));
52
+
53
+ function processLatex(parsed: string) {
54
+ const delimiters = [
55
+ { left: "$$", right: "$$", display: true },
56
+ { left: "$", right: "$", display: false },
57
+ { left: "\\(", right: "\\)", display: false },
58
+ { left: "\\[", right: "\\]", display: true },
59
+ ];
60
+
61
+ for (const { left, right, display } of delimiters) {
62
+ // Escape special regex characters in the delimiters
63
+ const escapedLeft = left.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
64
+ const escapedRight = right.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
65
+
66
+ // Create regex pattern that matches content between delimiters
67
+ const pattern = new RegExp(`${escapedLeft}([^]*?)${escapedRight}`, "g");
68
+
69
+ parsed = parsed.replace(pattern, (match, latex) => {
70
+ try {
71
+ // Remove the delimiters from the latex content
72
+ const cleanLatex = latex.trim();
73
+ const rendered = katex.renderToString(cleanLatex, { displayMode: display });
74
+
75
+ // For display mode, wrap in centered paragraph
76
+ if (display) {
77
+ return `<p style="width:100%;text-align:center;">${rendered}</p>`;
78
+ }
79
+ return rendered;
80
+ } catch (error) {
81
+ console.error("KaTeX error:", error);
82
+ return match; // Return original on error
83
+ }
84
+ });
85
+ }
86
+ return parsed;
87
+ }
88
+
89
+ DOMPurify.addHook("afterSanitizeAttributes", (node) => {
90
+ if (node.tagName === "A") {
91
+ node.setAttribute("rel", "noreferrer");
92
+ node.setAttribute("target", "_blank");
93
+ }
94
+ });
95
+ </script>
96
+
97
+ <div
98
+ class="prose max-w-none dark:prose-invert max-sm:prose-sm prose-headings:font-semibold prose-h1:text-lg prose-h2:text-base prose-h3:text-base prose-pre:bg-gray-800 dark:prose-pre:bg-gray-900"
99
+ >
100
+ {#each tokens as token}
101
+ {#if token.type === "code"}
102
+ <CodeBlock lang={token.lang} code={token.text} />
103
+ {:else}
104
+ {@const parsed = marked.parse(processLatex(token.raw), options)}
105
+ {#await parsed then parsed}
106
+ <!-- eslint-disable-next-line svelte/no-at-html-tags -->
107
+ {@html DOMPurify.sanitize(parsed)}
108
+ {/await}
109
+ {/if}
110
+ {/each}
111
+ </div>