Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Add metrics for models, tools, websearch (#1186)
Browse files* Add custom metrics for messages and conversations
* lint
* Add metrics for
- model health
- tools
- websearch
* Add time window & age buckets to summaries
* Increase max age for tool use duration
---------
Co-authored-by: antoniora <[email protected]>
- src/lib/server/metrics.ts +142 -1
- src/lib/server/textGeneration/tools.ts +18 -0
- src/lib/server/websearch/embed/embed.ts +5 -0
- src/lib/server/websearch/runWebSearch.ts +3 -0
- src/lib/server/websearch/scrape/scrape.ts +8 -0
- src/routes/conversation/+server.ts +3 -0
- src/routes/conversation/[id]/+server.ts +30 -0
src/lib/server/metrics.ts
CHANGED
@@ -1,10 +1,39 @@
|
|
1 |
-
import { collectDefaultMetrics, Registry } from "prom-client";
|
2 |
import express from "express";
|
3 |
import { logger } from "$lib/server/logger";
|
4 |
import { env } from "$env/dynamic/private";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
export class MetricsServer {
|
7 |
private static instance: MetricsServer;
|
|
|
8 |
|
9 |
private constructor() {
|
10 |
const app = express();
|
@@ -17,6 +46,114 @@ export class MetricsServer {
|
|
17 |
const register = new Registry();
|
18 |
collectDefaultMetrics({ register });
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
app.get("/metrics", (req, res) => {
|
21 |
register.metrics().then((metrics) => {
|
22 |
res.set("Content-Type", "text/plain");
|
@@ -40,4 +177,8 @@ export class MetricsServer {
|
|
40 |
|
41 |
return MetricsServer.instance;
|
42 |
}
|
|
|
|
|
|
|
|
|
43 |
}
|
|
|
1 |
+
import { collectDefaultMetrics, Registry, Counter, Summary } from "prom-client";
|
2 |
import express from "express";
|
3 |
import { logger } from "$lib/server/logger";
|
4 |
import { env } from "$env/dynamic/private";
|
5 |
+
import type { Model } from "$lib/types/Model";
|
6 |
+
import type { Tool } from "$lib/types/Tool";
|
7 |
+
|
8 |
+
interface Metrics {
|
9 |
+
model: {
|
10 |
+
conversationsTotal: Counter<Model["id"]>;
|
11 |
+
messagesTotal: Counter<Model["id"]>;
|
12 |
+
tokenCountTotal: Counter<Model["id"]>;
|
13 |
+
timePerOutputToken: Summary<Model["id"]>;
|
14 |
+
timeToFirstToken: Summary<Model["id"]>;
|
15 |
+
latency: Summary<Model["id"]>;
|
16 |
+
};
|
17 |
+
|
18 |
+
webSearch: {
|
19 |
+
requestCount: Counter;
|
20 |
+
pageFetchCount: Counter;
|
21 |
+
pageFetchCountError: Counter;
|
22 |
+
pageFetchDuration: Summary;
|
23 |
+
embeddingDuration: Summary;
|
24 |
+
};
|
25 |
+
|
26 |
+
tool: {
|
27 |
+
toolUseCount: Counter<Tool["name"]>;
|
28 |
+
toolUseCountError: Counter<Tool["name"]>;
|
29 |
+
toolUseDuration: Summary<Tool["name"]>;
|
30 |
+
timeToChooseTools: Summary;
|
31 |
+
};
|
32 |
+
}
|
33 |
|
34 |
export class MetricsServer {
|
35 |
private static instance: MetricsServer;
|
36 |
+
private metrics: Metrics;
|
37 |
|
38 |
private constructor() {
|
39 |
const app = express();
|
|
|
46 |
const register = new Registry();
|
47 |
collectDefaultMetrics({ register });
|
48 |
|
49 |
+
this.metrics = {
|
50 |
+
model: {
|
51 |
+
conversationsTotal: new Counter({
|
52 |
+
name: "model_conversations_total",
|
53 |
+
help: "Total number of conversations",
|
54 |
+
labelNames: ["model"],
|
55 |
+
registers: [register],
|
56 |
+
}),
|
57 |
+
messagesTotal: new Counter({
|
58 |
+
name: "model_messages_total",
|
59 |
+
help: "Total number of messages",
|
60 |
+
labelNames: ["model"],
|
61 |
+
registers: [register],
|
62 |
+
}),
|
63 |
+
tokenCountTotal: new Counter({
|
64 |
+
name: "model_token_count_total",
|
65 |
+
help: "Total number of tokens",
|
66 |
+
labelNames: ["model"],
|
67 |
+
registers: [register],
|
68 |
+
}),
|
69 |
+
timePerOutputToken: new Summary({
|
70 |
+
name: "model_time_per_output_token_ms",
|
71 |
+
help: "Time per output token in ms",
|
72 |
+
labelNames: ["model"],
|
73 |
+
registers: [register],
|
74 |
+
maxAgeSeconds: 5 * 60,
|
75 |
+
ageBuckets: 5,
|
76 |
+
}),
|
77 |
+
timeToFirstToken: new Summary({
|
78 |
+
name: "model_time_to_first_token_ms",
|
79 |
+
help: "Time to first token",
|
80 |
+
labelNames: ["model"],
|
81 |
+
registers: [register],
|
82 |
+
maxAgeSeconds: 5 * 60,
|
83 |
+
ageBuckets: 5,
|
84 |
+
}),
|
85 |
+
latency: new Summary({
|
86 |
+
name: "model_latency_ms",
|
87 |
+
help: "Total latency until end of answer",
|
88 |
+
labelNames: ["model"],
|
89 |
+
registers: [register],
|
90 |
+
maxAgeSeconds: 5 * 60,
|
91 |
+
ageBuckets: 5,
|
92 |
+
}),
|
93 |
+
},
|
94 |
+
webSearch: {
|
95 |
+
requestCount: new Counter({
|
96 |
+
name: "web_search_request_count",
|
97 |
+
help: "Total number of web search requests",
|
98 |
+
registers: [register],
|
99 |
+
}),
|
100 |
+
pageFetchCount: new Counter({
|
101 |
+
name: "web_search_page_fetch_count",
|
102 |
+
help: "Total number of web search page fetches",
|
103 |
+
registers: [register],
|
104 |
+
}),
|
105 |
+
pageFetchCountError: new Counter({
|
106 |
+
name: "web_search_page_fetch_count_error",
|
107 |
+
help: "Total number of web search page fetch errors",
|
108 |
+
registers: [register],
|
109 |
+
}),
|
110 |
+
pageFetchDuration: new Summary({
|
111 |
+
name: "web_search_page_fetch_duration_ms",
|
112 |
+
help: "Web search page fetch duration",
|
113 |
+
registers: [register],
|
114 |
+
maxAgeSeconds: 5 * 60,
|
115 |
+
ageBuckets: 5,
|
116 |
+
}),
|
117 |
+
embeddingDuration: new Summary({
|
118 |
+
name: "web_search_embedding_duration_ms",
|
119 |
+
help: "Web search embedding duration",
|
120 |
+
registers: [register],
|
121 |
+
maxAgeSeconds: 5 * 60,
|
122 |
+
ageBuckets: 5,
|
123 |
+
}),
|
124 |
+
},
|
125 |
+
tool: {
|
126 |
+
toolUseCount: new Counter({
|
127 |
+
name: "tool_use_count",
|
128 |
+
help: "Total number of tool uses",
|
129 |
+
labelNames: ["tool"],
|
130 |
+
registers: [register],
|
131 |
+
}),
|
132 |
+
toolUseCountError: new Counter({
|
133 |
+
name: "tool_use_count_error",
|
134 |
+
help: "Total number of tool use errors",
|
135 |
+
labelNames: ["tool"],
|
136 |
+
registers: [register],
|
137 |
+
}),
|
138 |
+
toolUseDuration: new Summary({
|
139 |
+
name: "tool_use_duration_ms",
|
140 |
+
help: "Tool use duration",
|
141 |
+
labelNames: ["tool"],
|
142 |
+
registers: [register],
|
143 |
+
maxAgeSeconds: 30 * 60, // longer duration since we use this to give feedback to the user
|
144 |
+
ageBuckets: 5,
|
145 |
+
}),
|
146 |
+
timeToChooseTools: new Summary({
|
147 |
+
name: "time_to_choose_tools_ms",
|
148 |
+
help: "Time to choose tools",
|
149 |
+
labelNames: ["model"],
|
150 |
+
registers: [register],
|
151 |
+
maxAgeSeconds: 5 * 60,
|
152 |
+
ageBuckets: 5,
|
153 |
+
}),
|
154 |
+
},
|
155 |
+
};
|
156 |
+
|
157 |
app.get("/metrics", (req, res) => {
|
158 |
register.metrics().then((metrics) => {
|
159 |
res.set("Content-Type", "text/plain");
|
|
|
177 |
|
178 |
return MetricsServer.instance;
|
179 |
}
|
180 |
+
|
181 |
+
public static getMetrics(): Metrics {
|
182 |
+
return MetricsServer.getInstance().metrics;
|
183 |
+
}
|
184 |
}
|
src/lib/server/textGeneration/tools.ts
CHANGED
@@ -18,6 +18,7 @@ import { logger } from "../logger";
|
|
18 |
import { toolHasName } from "../tools/utils";
|
19 |
import type { MessageFile } from "$lib/types/Message";
|
20 |
import { mergeAsyncGenerators } from "$lib/utils/mergeAsyncGenerators";
|
|
|
21 |
|
22 |
function makeFilesPrompt(files: MessageFile[], fileMessageIndex: number): string {
|
23 |
if (files.length === 0) {
|
@@ -62,6 +63,9 @@ async function* runTool(
|
|
62 |
// Special case for directly_answer tool where we ignore
|
63 |
if (toolHasName(directlyAnswer.name, tool)) return;
|
64 |
|
|
|
|
|
|
|
65 |
yield {
|
66 |
type: MessageUpdateType.Tool,
|
67 |
subtype: MessageToolUpdateType.Call,
|
@@ -92,8 +96,14 @@ async function* runTool(
|
|
92 |
};
|
93 |
}
|
94 |
|
|
|
|
|
|
|
|
|
|
|
95 |
return { ...toolResult, call } as ToolResult;
|
96 |
} catch (e) {
|
|
|
97 |
yield {
|
98 |
type: MessageUpdateType.Tool,
|
99 |
subtype: MessageToolUpdateType.Error,
|
@@ -102,6 +112,7 @@ async function* runTool(
|
|
102 |
};
|
103 |
}
|
104 |
} catch (cause) {
|
|
|
105 |
console.error(Error(`Failed while running tool ${call.name}`), { cause });
|
106 |
return {
|
107 |
call,
|
@@ -126,6 +137,8 @@ export async function* runTools(
|
|
126 |
};
|
127 |
});
|
128 |
|
|
|
|
|
129 |
// do the function calling bits here
|
130 |
for await (const output of await endpoint({
|
131 |
messages: messagesWithFilesPrompt,
|
@@ -163,6 +176,11 @@ export async function* runTools(
|
|
163 |
}
|
164 |
}
|
165 |
|
|
|
|
|
|
|
|
|
|
|
166 |
const toolContext: BackendToolContext = { conv, messages, preprompt, assistant };
|
167 |
const toolResults: (ToolResult | undefined)[] = yield* mergeAsyncGenerators(
|
168 |
calls.map((call) => runTool(toolContext, tools, call))
|
|
|
18 |
import { toolHasName } from "../tools/utils";
|
19 |
import type { MessageFile } from "$lib/types/Message";
|
20 |
import { mergeAsyncGenerators } from "$lib/utils/mergeAsyncGenerators";
|
21 |
+
import { MetricsServer } from "../metrics";
|
22 |
|
23 |
function makeFilesPrompt(files: MessageFile[], fileMessageIndex: number): string {
|
24 |
if (files.length === 0) {
|
|
|
63 |
// Special case for directly_answer tool where we ignore
|
64 |
if (toolHasName(directlyAnswer.name, tool)) return;
|
65 |
|
66 |
+
const startTime = Date.now();
|
67 |
+
MetricsServer.getMetrics().tool.toolUseCount.inc({ tool: call.name });
|
68 |
+
|
69 |
yield {
|
70 |
type: MessageUpdateType.Tool,
|
71 |
subtype: MessageToolUpdateType.Call,
|
|
|
96 |
};
|
97 |
}
|
98 |
|
99 |
+
MetricsServer.getMetrics().tool.toolUseDuration.observe(
|
100 |
+
{ tool: call.name },
|
101 |
+
Date.now() - startTime
|
102 |
+
);
|
103 |
+
|
104 |
return { ...toolResult, call } as ToolResult;
|
105 |
} catch (e) {
|
106 |
+
MetricsServer.getMetrics().tool.toolUseCountError.inc({ tool: call.name });
|
107 |
yield {
|
108 |
type: MessageUpdateType.Tool,
|
109 |
subtype: MessageToolUpdateType.Error,
|
|
|
112 |
};
|
113 |
}
|
114 |
} catch (cause) {
|
115 |
+
MetricsServer.getMetrics().tool.toolUseCountError.inc({ tool: call.name });
|
116 |
console.error(Error(`Failed while running tool ${call.name}`), { cause });
|
117 |
return {
|
118 |
call,
|
|
|
137 |
};
|
138 |
});
|
139 |
|
140 |
+
const pickToolStartTime = Date.now();
|
141 |
+
|
142 |
// do the function calling bits here
|
143 |
for await (const output of await endpoint({
|
144 |
messages: messagesWithFilesPrompt,
|
|
|
176 |
}
|
177 |
}
|
178 |
|
179 |
+
MetricsServer.getMetrics().tool.timeToChooseTools.observe(
|
180 |
+
{ model: conv.model },
|
181 |
+
Date.now() - pickToolStartTime
|
182 |
+
);
|
183 |
+
|
184 |
const toolContext: BackendToolContext = { conv, messages, preprompt, assistant };
|
185 |
const toolResults: (ToolResult | undefined)[] = yield* mergeAsyncGenerators(
|
186 |
calls.map((call) => runTool(toolContext, tools, call))
|
src/lib/server/websearch/embed/embed.ts
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import type { WebSearchScrapedSource, WebSearchUsedSource } from "$lib/types/WebSearch";
|
2 |
import type { EmbeddingBackendModel } from "../../embeddingModels";
|
3 |
import { getSentenceSimilarity, innerProduct } from "../../sentenceSimilarity";
|
@@ -14,6 +15,8 @@ export async function findContextSources(
|
|
14 |
prompt: string,
|
15 |
embeddingModel: EmbeddingBackendModel
|
16 |
) {
|
|
|
|
|
17 |
const sourcesMarkdownElems = sources.map((source) => flattenTree(source.page.markdownTree));
|
18 |
const markdownElems = sourcesMarkdownElems.flat();
|
19 |
|
@@ -76,5 +79,7 @@ export async function findContextSources(
|
|
76 |
})
|
77 |
.filter((contextSource) => contextSource.context.length > 0);
|
78 |
|
|
|
|
|
79 |
return contextSources;
|
80 |
}
|
|
|
1 |
+
import { MetricsServer } from "$lib/server/metrics";
|
2 |
import type { WebSearchScrapedSource, WebSearchUsedSource } from "$lib/types/WebSearch";
|
3 |
import type { EmbeddingBackendModel } from "../../embeddingModels";
|
4 |
import { getSentenceSimilarity, innerProduct } from "../../sentenceSimilarity";
|
|
|
15 |
prompt: string,
|
16 |
embeddingModel: EmbeddingBackendModel
|
17 |
) {
|
18 |
+
const startTime = Date.now();
|
19 |
+
|
20 |
const sourcesMarkdownElems = sources.map((source) => flattenTree(source.page.markdownTree));
|
21 |
const markdownElems = sourcesMarkdownElems.flat();
|
22 |
|
|
|
79 |
})
|
80 |
.filter((contextSource) => contextSource.context.length > 0);
|
81 |
|
82 |
+
MetricsServer.getMetrics().webSearch.embeddingDuration.observe(Date.now() - startTime);
|
83 |
+
|
84 |
return contextSources;
|
85 |
}
|
src/lib/server/websearch/runWebSearch.ts
CHANGED
@@ -17,6 +17,7 @@ import {
|
|
17 |
makeSourcesUpdate,
|
18 |
} from "./update";
|
19 |
import { mergeAsyncGenerators } from "$lib/utils/mergeAsyncGenerators";
|
|
|
20 |
|
21 |
const MAX_N_PAGES_TO_SCRAPE = 8 as const;
|
22 |
const MAX_N_PAGES_TO_EMBED = 5 as const;
|
@@ -31,6 +32,8 @@ export async function* runWebSearch(
|
|
31 |
const createdAt = new Date();
|
32 |
const updatedAt = new Date();
|
33 |
|
|
|
|
|
34 |
try {
|
35 |
const embeddingModel =
|
36 |
embeddingModels.find((m) => m.id === conv.embeddingModel) ?? defaultEmbeddingModel;
|
|
|
17 |
makeSourcesUpdate,
|
18 |
} from "./update";
|
19 |
import { mergeAsyncGenerators } from "$lib/utils/mergeAsyncGenerators";
|
20 |
+
import { MetricsServer } from "../metrics";
|
21 |
|
22 |
const MAX_N_PAGES_TO_SCRAPE = 8 as const;
|
23 |
const MAX_N_PAGES_TO_EMBED = 5 as const;
|
|
|
32 |
const createdAt = new Date();
|
33 |
const updatedAt = new Date();
|
34 |
|
35 |
+
MetricsServer.getMetrics().webSearch.requestCount.inc();
|
36 |
+
|
37 |
try {
|
38 |
const embeddingModel =
|
39 |
embeddingModels.find((m) => m.id === conv.embeddingModel) ?? defaultEmbeddingModel;
|
src/lib/server/websearch/scrape/scrape.ts
CHANGED
@@ -6,16 +6,24 @@ import { spatialParser } from "./parser";
|
|
6 |
import { htmlToMarkdownTree } from "../markdown/tree";
|
7 |
import { timeout } from "$lib/utils/timeout";
|
8 |
import { makeErrorUpdate, makeGeneralUpdate } from "../update";
|
|
|
9 |
|
10 |
export const scrape = (maxCharsPerElem: number) =>
|
11 |
async function* (
|
12 |
source: WebSearchSource
|
13 |
): AsyncGenerator<MessageWebSearchUpdate, WebSearchScrapedSource | undefined, undefined> {
|
14 |
try {
|
|
|
|
|
|
|
15 |
const page = await scrapeUrl(source.link, maxCharsPerElem);
|
|
|
|
|
|
|
16 |
yield makeGeneralUpdate({ message: "Browsing webpage", args: [source.link] });
|
17 |
return { ...source, page };
|
18 |
} catch (e) {
|
|
|
19 |
const message = e instanceof Error ? e.message : String(e);
|
20 |
yield makeErrorUpdate({ message: "Failed to parse webpage", args: [message, source.link] });
|
21 |
}
|
|
|
6 |
import { htmlToMarkdownTree } from "../markdown/tree";
|
7 |
import { timeout } from "$lib/utils/timeout";
|
8 |
import { makeErrorUpdate, makeGeneralUpdate } from "../update";
|
9 |
+
import { MetricsServer } from "$lib/server/metrics";
|
10 |
|
11 |
export const scrape = (maxCharsPerElem: number) =>
|
12 |
async function* (
|
13 |
source: WebSearchSource
|
14 |
): AsyncGenerator<MessageWebSearchUpdate, WebSearchScrapedSource | undefined, undefined> {
|
15 |
try {
|
16 |
+
const startTime = Date.now();
|
17 |
+
MetricsServer.getMetrics().webSearch.pageFetchCount.inc();
|
18 |
+
|
19 |
const page = await scrapeUrl(source.link, maxCharsPerElem);
|
20 |
+
|
21 |
+
MetricsServer.getMetrics().webSearch.pageFetchDuration.observe(Date.now() - startTime);
|
22 |
+
|
23 |
yield makeGeneralUpdate({ message: "Browsing webpage", args: [source.link] });
|
24 |
return { ...source, page };
|
25 |
} catch (e) {
|
26 |
+
MetricsServer.getMetrics().webSearch.pageFetchCountError.inc();
|
27 |
const message = e instanceof Error ? e.message : String(e);
|
28 |
yield makeErrorUpdate({ message: "Failed to parse webpage", args: [message, source.link] });
|
29 |
}
|
src/routes/conversation/+server.ts
CHANGED
@@ -10,6 +10,7 @@ import { defaultEmbeddingModel } from "$lib/server/embeddingModels";
|
|
10 |
import { v4 } from "uuid";
|
11 |
import { authCondition } from "$lib/server/auth";
|
12 |
import { usageLimits } from "$lib/server/usageLimits";
|
|
|
13 |
|
14 |
export const POST: RequestHandler = async ({ locals, request }) => {
|
15 |
const body = await request.text();
|
@@ -115,6 +116,8 @@ export const POST: RequestHandler = async ({ locals, request }) => {
|
|
115 |
...(values.fromShare ? { meta: { fromShareId: values.fromShare } } : {}),
|
116 |
});
|
117 |
|
|
|
|
|
118 |
return new Response(
|
119 |
JSON.stringify({
|
120 |
conversationId: res.insertedId.toString(),
|
|
|
10 |
import { v4 } from "uuid";
|
11 |
import { authCondition } from "$lib/server/auth";
|
12 |
import { usageLimits } from "$lib/server/usageLimits";
|
13 |
+
import { MetricsServer } from "$lib/server/metrics";
|
14 |
|
15 |
export const POST: RequestHandler = async ({ locals, request }) => {
|
16 |
const body = await request.text();
|
|
|
116 |
...(values.fromShare ? { meta: { fromShareId: values.fromShare } } : {}),
|
117 |
});
|
118 |
|
119 |
+
MetricsServer.getMetrics().model.conversationsTotal.inc({ model: values.model });
|
120 |
+
|
121 |
return new Response(
|
122 |
JSON.stringify({
|
123 |
conversationId: res.insertedId.toString(),
|
src/routes/conversation/[id]/+server.ts
CHANGED
@@ -21,6 +21,7 @@ import { buildSubtree } from "$lib/utils/tree/buildSubtree.js";
|
|
21 |
import { addChildren } from "$lib/utils/tree/addChildren.js";
|
22 |
import { addSibling } from "$lib/utils/tree/addSibling.js";
|
23 |
import { usageLimits } from "$lib/server/usageLimits";
|
|
|
24 |
import { textGeneration } from "$lib/server/textGeneration";
|
25 |
import type { TextGenerationContext } from "$lib/server/textGeneration/types";
|
26 |
|
@@ -293,6 +294,8 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
293 |
|
294 |
let doneStreaming = false;
|
295 |
|
|
|
|
|
296 |
// we now build the stream
|
297 |
const stream = new ReadableStream({
|
298 |
async start(controller) {
|
@@ -306,6 +309,25 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
306 |
if (event.type === MessageUpdateType.Stream) {
|
307 |
if (event.token === "") return;
|
308 |
messageToWriteTo.content += event.token;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
}
|
310 |
|
311 |
// Set the title
|
@@ -321,6 +343,12 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
321 |
else if (event.type === MessageUpdateType.FinalAnswer) {
|
322 |
messageToWriteTo.interrupted = event.interrupted;
|
323 |
messageToWriteTo.content = initialMessageContent + event.text;
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
}
|
325 |
|
326 |
// Add file
|
@@ -428,6 +456,8 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
428 |
);
|
429 |
}
|
430 |
|
|
|
|
|
431 |
// Todo: maybe we should wait for the message to be saved before ending the response - in case of errors
|
432 |
return new Response(stream, {
|
433 |
headers: {
|
|
|
21 |
import { addChildren } from "$lib/utils/tree/addChildren.js";
|
22 |
import { addSibling } from "$lib/utils/tree/addSibling.js";
|
23 |
import { usageLimits } from "$lib/server/usageLimits";
|
24 |
+
import { MetricsServer } from "$lib/server/metrics";
|
25 |
import { textGeneration } from "$lib/server/textGeneration";
|
26 |
import type { TextGenerationContext } from "$lib/server/textGeneration/types";
|
27 |
|
|
|
294 |
|
295 |
let doneStreaming = false;
|
296 |
|
297 |
+
let lastTokenTimestamp: undefined | Date = undefined;
|
298 |
+
|
299 |
// we now build the stream
|
300 |
const stream = new ReadableStream({
|
301 |
async start(controller) {
|
|
|
309 |
if (event.type === MessageUpdateType.Stream) {
|
310 |
if (event.token === "") return;
|
311 |
messageToWriteTo.content += event.token;
|
312 |
+
|
313 |
+
// add to token total
|
314 |
+
MetricsServer.getMetrics().model.tokenCountTotal.inc({ model: model?.id });
|
315 |
+
|
316 |
+
// if this is the first token, add to time to first token
|
317 |
+
if (!lastTokenTimestamp) {
|
318 |
+
MetricsServer.getMetrics().model.timeToFirstToken.observe(
|
319 |
+
{ model: model?.id },
|
320 |
+
Date.now() - promptedAt.getTime()
|
321 |
+
);
|
322 |
+
lastTokenTimestamp = new Date();
|
323 |
+
}
|
324 |
+
|
325 |
+
// add to time per token
|
326 |
+
MetricsServer.getMetrics().model.timePerOutputToken.observe(
|
327 |
+
{ model: model?.id },
|
328 |
+
Date.now() - (lastTokenTimestamp ?? promptedAt).getTime()
|
329 |
+
);
|
330 |
+
lastTokenTimestamp = new Date();
|
331 |
}
|
332 |
|
333 |
// Set the title
|
|
|
343 |
else if (event.type === MessageUpdateType.FinalAnswer) {
|
344 |
messageToWriteTo.interrupted = event.interrupted;
|
345 |
messageToWriteTo.content = initialMessageContent + event.text;
|
346 |
+
|
347 |
+
// add to latency
|
348 |
+
MetricsServer.getMetrics().model.latency.observe(
|
349 |
+
{ model: model?.id },
|
350 |
+
Date.now() - promptedAt.getTime()
|
351 |
+
);
|
352 |
}
|
353 |
|
354 |
// Add file
|
|
|
456 |
);
|
457 |
}
|
458 |
|
459 |
+
const metrics = MetricsServer.getMetrics();
|
460 |
+
metrics.model.messagesTotal.inc({ model: model?.id });
|
461 |
// Todo: maybe we should wait for the message to be saved before ending the response - in case of errors
|
462 |
return new Response(stream, {
|
463 |
headers: {
|