calycekr nsarrazin HF Staff commited on
Commit
cbd723d
·
unverified ·
1 Parent(s): a2e30c2

feat: add KaTeX support for block and inline math rendering in Markdown (#1643)

Browse files

* feat: add KaTeX support for block and inline math rendering in Markdown

* fix: Reintroduce escapeHTML to prevent raw HTML injection

- Properly escape user-generated HTML to avoid DOM injection vulnerabilities.
- Ensures KaTeX rendering remains unaffected while blocking malicious content.

* feat: add KaTeX mhchem extension to enable KaTeX the \ce and \pu functions from the mhchem package.

- https://github.com/KaTeX/KaTeX/tree/main/contrib/mhchem

---------

Co-authored-by: Nathan Sarrazin <[email protected]>

src/lib/components/chat/MarkdownRenderer.svelte CHANGED
@@ -1,13 +1,144 @@
1
  <script lang="ts">
2
  import type { WebSearchSource } from "$lib/types/WebSearch";
3
  import katex from "katex";
 
4
  import DOMPurify from "isomorphic-dompurify";
5
  import { Marked } from "marked";
 
6
  import CodeBlock from "../CodeBlock.svelte";
7
 
8
  export let content: string;
9
  export let sources: WebSearchSource[] = [];
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string {
12
  const linkStyle =
13
  "color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;";
@@ -30,63 +161,14 @@
30
  });
31
  }
32
 
33
- function escapeHTML(content: string) {
34
- return content.replace(
35
- /[<>&\n]/g,
36
- (x) =>
37
- ({
38
- "<": "&lt;",
39
- ">": "&gt;",
40
- "&": "&amp;",
41
- }[x] || x)
42
- );
43
- }
44
-
45
- function processLatex(parsed: string) {
46
- const delimiters = [
47
- { left: "$$", right: "$$", display: true },
48
- { left: "$", right: "$", display: false },
49
- { left: "( ", right: " )", display: false },
50
- { left: "[ ", right: " ]", display: true },
51
- ];
52
-
53
- for (const { left, right, display } of delimiters) {
54
- // Escape special regex characters in the delimiters
55
- const escapedLeft = left.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
56
- const escapedRight = right.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
57
-
58
- // Create regex pattern that matches content between delimiters
59
- const pattern = new RegExp(`(?<!\\w)${escapedLeft}([^]*?)${escapedRight}(?!\\w)`, "g");
60
-
61
- parsed = parsed.replace(pattern, (match, latex) => {
62
- try {
63
- // Remove the delimiters from the latex content
64
- const cleanLatex = latex.trim();
65
- const rendered = katex.renderToString(cleanLatex, { displayMode: display });
66
-
67
- // For display mode, wrap in centered paragraph
68
- if (display) {
69
- return `<p style="width:100%;text-align:center;">${rendered}</p>`;
70
- }
71
- return rendered;
72
- } catch (error) {
73
- console.error("KaTeX error:", error);
74
- return match; // Return original on error
75
- }
76
- });
77
- }
78
- return parsed;
79
- }
80
-
81
  const marked = new Marked({
82
  hooks: {
83
- preprocess: (md) => addInlineCitations(escapeHTML(md), sources),
84
- postprocess: (html) => {
85
- return DOMPurify.sanitize(processLatex(html));
86
- },
87
  },
 
88
  renderer: {
89
- codespan: (code) => `<code>${code.replaceAll("&amp;", "&")}</code>`,
90
  link: (href, title, text) =>
91
  `<a href="${href?.replace(/>$/, "")}" target="_blank" rel="noreferrer">${text}</a>`,
92
  },
 
1
  <script lang="ts">
2
  import type { WebSearchSource } from "$lib/types/WebSearch";
3
  import katex from "katex";
4
+ import "katex/dist/contrib/mhchem.mjs";
5
  import DOMPurify from "isomorphic-dompurify";
6
  import { Marked } from "marked";
7
+ import type { Tokens, TokenizerExtension, RendererExtension } from "marked";
8
  import CodeBlock from "../CodeBlock.svelte";
9
 
10
  export let content: string;
11
  export let sources: WebSearchSource[] = [];
12
 
13
+ interface katexBlockToken extends Tokens.Generic {
14
+ type: "katexBlock";
15
+ raw: string;
16
+ text: string;
17
+ displayMode: true;
18
+ }
19
+
20
+ interface katexInlineToken extends Tokens.Generic {
21
+ type: "katexInline";
22
+ raw: string;
23
+ text: string;
24
+ displayMode: false;
25
+ }
26
+
27
+ export const katexBlockExtension: TokenizerExtension & RendererExtension = {
28
+ name: "katexBlock",
29
+ level: "block",
30
+
31
+ start(src: string): number | undefined {
32
+ const match = src.match(/(\${2}|\\\[)/);
33
+ return match ? match.index : -1;
34
+ },
35
+
36
+ tokenizer(src: string): katexBlockToken | undefined {
37
+ // 1) $$ ... $$
38
+ const rule1 = /^\${2}([\s\S]+?)\${2}/;
39
+ const match1 = rule1.exec(src);
40
+ if (match1) {
41
+ const token: katexBlockToken = {
42
+ type: "katexBlock",
43
+ raw: match1[0],
44
+ text: match1[1].trim(),
45
+ displayMode: true,
46
+ };
47
+ return token;
48
+ }
49
+
50
+ // 2) \[ ... \]
51
+ const rule2 = /^\\\[([\s\S]+?)\\\]/;
52
+ const match2 = rule2.exec(src);
53
+ if (match2) {
54
+ const token: katexBlockToken = {
55
+ type: "katexBlock",
56
+ raw: match2[0],
57
+ text: match2[1].trim(),
58
+ displayMode: true,
59
+ };
60
+ return token;
61
+ }
62
+
63
+ return undefined;
64
+ },
65
+
66
+ renderer(token) {
67
+ if (token.type === "katexBlock") {
68
+ return katex.renderToString(token.text, {
69
+ throwOnError: false,
70
+ displayMode: token.displayMode,
71
+ });
72
+ }
73
+
74
+ return undefined;
75
+ },
76
+ };
77
+
78
+ const katexInlineExtension: TokenizerExtension & RendererExtension = {
79
+ name: "katexInline",
80
+ level: "inline",
81
+
82
+ start(src: string): number | undefined {
83
+ const match = src.match(/(\$|\\\()/);
84
+ return match ? match.index : -1;
85
+ },
86
+
87
+ tokenizer(src: string): katexInlineToken | undefined {
88
+ // 1) $...$
89
+ const rule1 = /^\$([^$]+?)\$/;
90
+ const match1 = rule1.exec(src);
91
+ if (match1) {
92
+ const token: katexInlineToken = {
93
+ type: "katexInline",
94
+ raw: match1[0],
95
+ text: match1[1].trim(),
96
+ displayMode: false,
97
+ };
98
+ return token;
99
+ }
100
+
101
+ // 2) \(...\)
102
+ const rule2 = /^\\\(([\s\S]+?)\\\)/;
103
+ const match2 = rule2.exec(src);
104
+ if (match2) {
105
+ const token: katexInlineToken = {
106
+ type: "katexInline",
107
+ raw: match2[0],
108
+ text: match2[1].trim(),
109
+ displayMode: false,
110
+ };
111
+ return token;
112
+ }
113
+
114
+ return undefined;
115
+ },
116
+
117
+ renderer(token) {
118
+ if (token.type === "katexInline") {
119
+ return katex.renderToString(token.text, {
120
+ throwOnError: false,
121
+ displayMode: token.displayMode,
122
+ });
123
+ }
124
+ return undefined;
125
+ },
126
+ };
127
+
128
+ function escapeHTML(content: string) {
129
+ return content.replace(
130
+ /[<>&"']/g,
131
+ (x) =>
132
+ ({
133
+ "<": "&lt;",
134
+ ">": "&gt;",
135
+ "&": "&amp;",
136
+ "'": "&#39;",
137
+ '"': "&quot;",
138
+ }[x] || x)
139
+ );
140
+ }
141
+
142
  function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string {
143
  const linkStyle =
144
  "color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;";
 
161
  });
162
  }
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  const marked = new Marked({
165
  hooks: {
166
+ preprocess: (md) => addInlineCitations(md, sources),
167
+ postprocess: (html) => DOMPurify.sanitize(html),
 
 
168
  },
169
+ extensions: [katexBlockExtension, katexInlineExtension],
170
  renderer: {
171
+ html: (html) => escapeHTML(html),
172
  link: (href, title, text) =>
173
  `<a href="${href?.replace(/>$/, "")}" target="_blank" rel="noreferrer">${text}</a>`,
174
  },