import katex from "katex"; import "katex/dist/contrib/mhchem.mjs"; import { Marked } from "marked"; import type { Tokens, TokenizerExtension, RendererExtension } from "marked"; import type { WebSearchSource } from "$lib/types/WebSearch"; import hljs from "highlight.js"; interface katexBlockToken extends Tokens.Generic { type: "katexBlock"; raw: string; text: string; displayMode: true; } interface katexInlineToken extends Tokens.Generic { type: "katexInline"; raw: string; text: string; displayMode: false; } export const katexBlockExtension: TokenizerExtension & RendererExtension = { name: "katexBlock", level: "block", start(src: string): number | undefined { const match = src.match(/(\${2}|\\\[)/); return match ? match.index : -1; }, tokenizer(src: string): katexBlockToken | undefined { // 1) $$ ... $$ const rule1 = /^\${2}([\s\S]+?)\${2}/; const match1 = rule1.exec(src); if (match1) { const token: katexBlockToken = { type: "katexBlock", raw: match1[0], text: match1[1].trim(), displayMode: true, }; return token; } // 2) \[ ... \] const rule2 = /^\\\[([\s\S]+?)\\\]/; const match2 = rule2.exec(src); if (match2) { const token: katexBlockToken = { type: "katexBlock", raw: match2[0], text: match2[1].trim(), displayMode: true, }; return token; } return undefined; }, renderer(token) { if (token.type === "katexBlock") { return katex.renderToString(token.text, { throwOnError: false, displayMode: token.displayMode, }); } return undefined; }, }; const katexInlineExtension: TokenizerExtension & RendererExtension = { name: "katexInline", level: "inline", start(src: string): number | undefined { const match = src.match(/(\$|\\\()/); return match ? match.index : -1; }, tokenizer(src: string): katexInlineToken | undefined { // 1) $...$ const rule1 = /^\$([^$]+?)\$/; const match1 = rule1.exec(src); if (match1) { const token: katexInlineToken = { type: "katexInline", raw: match1[0], text: match1[1].trim(), displayMode: false, }; return token; } // 2) \(...\) const rule2 = /^\\\(([\s\S]+?)\\\)/; const match2 = rule2.exec(src); if (match2) { const token: katexInlineToken = { type: "katexInline", raw: match2[0], text: match2[1].trim(), displayMode: false, }; return token; } return undefined; }, renderer(token) { if (token.type === "katexInline") { return katex.renderToString(token.text, { throwOnError: false, displayMode: token.displayMode, }); } return undefined; }, }; function escapeHTML(content: string) { return content.replace( /[<>&"']/g, (x) => ({ "<": "<", ">": ">", "&": "&", "'": "'", '"': """, })[x] || x ); } function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string { const linkStyle = "color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;"; return md.replace(/\[(\d+)\]/g, (match: string) => { const indices: number[] = (match.match(/\d+/g) || []).map(Number); const links: string = indices .map((index: number) => { if (index === 0) return false; const source = webSearchSources[index - 1]; if (source) { return `${index}`; } return ""; }) .filter(Boolean) .join(", "); return links ? ` ${links}` : match; }); } function createMarkedInstance(sources: WebSearchSource[]): Marked { return new Marked({ hooks: { postprocess: (html) => addInlineCitations(html, sources), }, extensions: [katexBlockExtension, katexInlineExtension], renderer: { link: (href, title, text) => `${text}`, html: (html) => escapeHTML(html), }, gfm: true, breaks: true, }); } type CodeToken = { type: "code"; lang: string; code: string; rawCode: string; }; type TextToken = { type: "text"; html: string | Promise; }; export async function processTokens(content: string, sources: WebSearchSource[]): Promise { const marked = createMarkedInstance(sources); const tokens = marked.lexer(content); const processedTokens = await Promise.all( tokens.map(async (token) => { if (token.type === "code") { return { type: "code" as const, lang: token.lang, code: hljs.highlightAuto(token.text, hljs.getLanguage(token.lang)?.aliases).value, rawCode: token.text, }; } else { return { type: "text" as const, html: marked.parse(token.raw), }; } }) ); return processedTokens; } export function processTokensSync(content: string, sources: WebSearchSource[]): Token[] { const marked = createMarkedInstance(sources); const tokens = marked.lexer(content); return tokens.map((token) => { if (token.type === "code") { return { type: "code" as const, lang: token.lang, code: hljs.highlightAuto(token.text, hljs.getLanguage(token.lang)?.aliases).value, rawCode: token.text, }; } return { type: "text" as const, html: marked.parse(token.raw) }; }); } export type Token = CodeToken | TextToken;