Spaces:
Running
Running
feat(migrations): clean up empty conversations from db (#1561)
Browse files* feat(migrations): clean up empty conversations from db
* fix(tests): better test coverage, fix bug with sessions
* feat: better migration
* fix: stop test server after running tests
* fix: migration code
* fix: refactor to take into account large size of session collection
scripts/setupTest.ts
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import { vi } from "vitest";
|
2 |
import dotenv from "dotenv";
|
3 |
import { resolve } from "path";
|
4 |
import fs from "fs";
|
@@ -41,3 +41,9 @@ vi.mock("$env/dynamic/private", async () => {
|
|
41 |
},
|
42 |
};
|
43 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { vi, afterAll } from "vitest";
|
2 |
import dotenv from "dotenv";
|
3 |
import { resolve } from "path";
|
4 |
import fs from "fs";
|
|
|
41 |
},
|
42 |
};
|
43 |
});
|
44 |
+
|
45 |
+
afterAll(async () => {
|
46 |
+
if (mongoServer) {
|
47 |
+
await mongoServer.stop();
|
48 |
+
}
|
49 |
+
});
|
src/lib/migrations/routines/09-delete-empty-conversations.spec.ts
ADDED
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { Session } from "$lib/types/Session";
|
2 |
+
import type { User } from "$lib/types/User";
|
3 |
+
import type { Conversation } from "$lib/types/Conversation";
|
4 |
+
import { ObjectId } from "mongodb";
|
5 |
+
import { deleteConversations } from "./09-delete-empty-conversations";
|
6 |
+
import { afterAll, afterEach, beforeAll, describe, expect, test } from "vitest";
|
7 |
+
import { collections } from "$lib/server/database";
|
8 |
+
|
9 |
+
type Message = Conversation["messages"][number];
|
10 |
+
|
11 |
+
const userData = {
|
12 |
+
_id: new ObjectId(),
|
13 |
+
createdAt: new Date(),
|
14 |
+
updatedAt: new Date(),
|
15 |
+
username: "new-username",
|
16 |
+
name: "name",
|
17 |
+
avatarUrl: "https://example.com/avatar.png",
|
18 |
+
hfUserId: "9999999999",
|
19 |
+
} satisfies User;
|
20 |
+
Object.freeze(userData);
|
21 |
+
|
22 |
+
const sessionForUser = {
|
23 |
+
_id: new ObjectId(),
|
24 |
+
createdAt: new Date(),
|
25 |
+
updatedAt: new Date(),
|
26 |
+
userId: userData._id,
|
27 |
+
sessionId: "session-id-9999999999",
|
28 |
+
expiresAt: new Date(Date.now() + 1000 * 60 * 60 * 24),
|
29 |
+
} satisfies Session;
|
30 |
+
Object.freeze(sessionForUser);
|
31 |
+
|
32 |
+
const userMessage = {
|
33 |
+
from: "user",
|
34 |
+
id: "user-message-id",
|
35 |
+
content: "Hello, how are you?",
|
36 |
+
} satisfies Message;
|
37 |
+
|
38 |
+
const assistantMessage = {
|
39 |
+
from: "assistant",
|
40 |
+
id: "assistant-message-id",
|
41 |
+
content: "I'm fine, thank you!",
|
42 |
+
} satisfies Message;
|
43 |
+
|
44 |
+
const systemMessage = {
|
45 |
+
from: "system",
|
46 |
+
id: "system-message-id",
|
47 |
+
content: "This is a system message",
|
48 |
+
} satisfies Message;
|
49 |
+
|
50 |
+
const conversationBase = {
|
51 |
+
_id: new ObjectId(),
|
52 |
+
createdAt: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000),
|
53 |
+
updatedAt: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000),
|
54 |
+
model: "model-id",
|
55 |
+
embeddingModel: "embedding-model-id",
|
56 |
+
title: "title",
|
57 |
+
messages: [],
|
58 |
+
} satisfies Conversation;
|
59 |
+
|
60 |
+
describe.sequential("Deleting discarded conversations", async () => {
|
61 |
+
test("a conversation with no messages should get deleted", async () => {
|
62 |
+
await collections.conversations.insertOne({
|
63 |
+
...conversationBase,
|
64 |
+
sessionId: sessionForUser.sessionId,
|
65 |
+
});
|
66 |
+
|
67 |
+
const result = await deleteConversations(collections);
|
68 |
+
|
69 |
+
expect(result).toBe(1);
|
70 |
+
});
|
71 |
+
test("a conversation with no messages that is less than 1 hour old should not get deleted", async () => {
|
72 |
+
await collections.conversations.insertOne({
|
73 |
+
...conversationBase,
|
74 |
+
sessionId: sessionForUser.sessionId,
|
75 |
+
createdAt: new Date(Date.now() - 30 * 60 * 1000),
|
76 |
+
});
|
77 |
+
|
78 |
+
const result = await deleteConversations(collections);
|
79 |
+
|
80 |
+
expect(result).toBe(0);
|
81 |
+
});
|
82 |
+
test("a conversation with only system messages should get deleted", async () => {
|
83 |
+
await collections.conversations.insertOne({
|
84 |
+
...conversationBase,
|
85 |
+
sessionId: sessionForUser.sessionId,
|
86 |
+
messages: [systemMessage],
|
87 |
+
});
|
88 |
+
|
89 |
+
const result = await deleteConversations(collections);
|
90 |
+
|
91 |
+
expect(result).toBe(1);
|
92 |
+
});
|
93 |
+
test("a conversation with a user message should not get deleted", async () => {
|
94 |
+
await collections.conversations.insertOne({
|
95 |
+
...conversationBase,
|
96 |
+
sessionId: sessionForUser.sessionId,
|
97 |
+
messages: [userMessage],
|
98 |
+
});
|
99 |
+
|
100 |
+
const result = await deleteConversations(collections);
|
101 |
+
|
102 |
+
expect(result).toBe(0);
|
103 |
+
});
|
104 |
+
test("a conversation with an assistant message should not get deleted", async () => {
|
105 |
+
await collections.conversations.insertOne({
|
106 |
+
...conversationBase,
|
107 |
+
sessionId: sessionForUser.sessionId,
|
108 |
+
messages: [assistantMessage],
|
109 |
+
});
|
110 |
+
|
111 |
+
const result = await deleteConversations(collections);
|
112 |
+
|
113 |
+
expect(result).toBe(0);
|
114 |
+
});
|
115 |
+
test("a conversation with a mix of messages should not get deleted", async () => {
|
116 |
+
await collections.conversations.insertOne({
|
117 |
+
...conversationBase,
|
118 |
+
sessionId: sessionForUser.sessionId,
|
119 |
+
messages: [systemMessage, userMessage, assistantMessage, userMessage, assistantMessage],
|
120 |
+
});
|
121 |
+
|
122 |
+
const result = await deleteConversations(collections);
|
123 |
+
|
124 |
+
expect(result).toBe(0);
|
125 |
+
});
|
126 |
+
test("a conversation with a userId and no sessionId should not get deleted", async () => {
|
127 |
+
await collections.conversations.insertOne({
|
128 |
+
...conversationBase,
|
129 |
+
messages: [userMessage, assistantMessage],
|
130 |
+
userId: userData._id,
|
131 |
+
});
|
132 |
+
|
133 |
+
const result = await deleteConversations(collections);
|
134 |
+
|
135 |
+
expect(result).toBe(0);
|
136 |
+
});
|
137 |
+
test("a conversation with no userId or sessionId should get deleted", async () => {
|
138 |
+
await collections.conversations.insertOne({
|
139 |
+
...conversationBase,
|
140 |
+
messages: [userMessage, assistantMessage],
|
141 |
+
});
|
142 |
+
|
143 |
+
const result = await deleteConversations(collections);
|
144 |
+
|
145 |
+
expect(result).toBe(1);
|
146 |
+
});
|
147 |
+
test("a conversation with a sessionId that exists should not get deleted", async () => {
|
148 |
+
await collections.conversations.insertOne({
|
149 |
+
...conversationBase,
|
150 |
+
messages: [userMessage, assistantMessage],
|
151 |
+
sessionId: sessionForUser.sessionId,
|
152 |
+
});
|
153 |
+
|
154 |
+
const result = await deleteConversations(collections);
|
155 |
+
|
156 |
+
expect(result).toBe(0);
|
157 |
+
});
|
158 |
+
test("a conversation with a userId and a sessionId that doesn't exist should NOT get deleted", async () => {
|
159 |
+
await collections.conversations.insertOne({
|
160 |
+
...conversationBase,
|
161 |
+
userId: userData._id,
|
162 |
+
messages: [userMessage, assistantMessage],
|
163 |
+
sessionId: new ObjectId().toString(),
|
164 |
+
});
|
165 |
+
|
166 |
+
const result = await deleteConversations(collections);
|
167 |
+
|
168 |
+
expect(result).toBe(0);
|
169 |
+
});
|
170 |
+
test("a conversation with only a sessionId that doesn't exist, should get deleted", async () => {
|
171 |
+
await collections.conversations.insertOne({
|
172 |
+
...conversationBase,
|
173 |
+
messages: [userMessage, assistantMessage],
|
174 |
+
sessionId: new ObjectId().toString(),
|
175 |
+
});
|
176 |
+
|
177 |
+
const result = await deleteConversations(collections);
|
178 |
+
|
179 |
+
expect(result).toBe(1);
|
180 |
+
});
|
181 |
+
test("many conversations should get deleted", async () => {
|
182 |
+
const conversations = Array.from({ length: 10010 }, () => ({
|
183 |
+
...conversationBase,
|
184 |
+
_id: new ObjectId(),
|
185 |
+
}));
|
186 |
+
|
187 |
+
await collections.conversations.insertMany(conversations);
|
188 |
+
|
189 |
+
const result = await deleteConversations(collections);
|
190 |
+
|
191 |
+
expect(result).toBe(10010);
|
192 |
+
});
|
193 |
+
});
|
194 |
+
|
195 |
+
beforeAll(async () => {
|
196 |
+
await collections.users.insertOne(userData);
|
197 |
+
await collections.sessions.insertOne(sessionForUser);
|
198 |
+
});
|
199 |
+
|
200 |
+
afterAll(async () => {
|
201 |
+
await collections.users.deleteOne({
|
202 |
+
_id: userData._id,
|
203 |
+
});
|
204 |
+
await collections.sessions.deleteOne({
|
205 |
+
_id: sessionForUser._id,
|
206 |
+
});
|
207 |
+
await collections.conversations.deleteMany({});
|
208 |
+
});
|
209 |
+
|
210 |
+
afterEach(async () => {
|
211 |
+
await collections.conversations.deleteMany({
|
212 |
+
_id: { $in: [conversationBase._id] },
|
213 |
+
});
|
214 |
+
});
|
src/lib/migrations/routines/09-delete-empty-conversations.ts
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { Migration } from ".";
|
2 |
+
import { collections } from "$lib/server/database";
|
3 |
+
import { Collection, FindCursor, ObjectId } from "mongodb";
|
4 |
+
import { logger } from "$lib/server/logger";
|
5 |
+
import type { Conversation } from "$lib/types/Conversation";
|
6 |
+
|
7 |
+
const BATCH_SIZE = 1000;
|
8 |
+
const DELETE_THRESHOLD_MS = 60 * 60 * 1000;
|
9 |
+
|
10 |
+
async function deleteBatch(conversations: Collection<Conversation>, ids: ObjectId[]) {
|
11 |
+
if (ids.length === 0) return 0;
|
12 |
+
const deleteResult = await conversations.deleteMany({ _id: { $in: ids } });
|
13 |
+
return deleteResult.deletedCount;
|
14 |
+
}
|
15 |
+
|
16 |
+
async function processCursor<T>(
|
17 |
+
cursor: FindCursor<T>,
|
18 |
+
processBatchFn: (batch: T[]) => Promise<void>
|
19 |
+
) {
|
20 |
+
let batch = [];
|
21 |
+
while (await cursor.hasNext()) {
|
22 |
+
const doc = await cursor.next();
|
23 |
+
if (doc) {
|
24 |
+
batch.push(doc);
|
25 |
+
}
|
26 |
+
if (batch.length >= BATCH_SIZE) {
|
27 |
+
await processBatchFn(batch);
|
28 |
+
batch = [];
|
29 |
+
}
|
30 |
+
}
|
31 |
+
if (batch.length > 0) {
|
32 |
+
await processBatchFn(batch);
|
33 |
+
}
|
34 |
+
}
|
35 |
+
|
36 |
+
export async function deleteConversations(
|
37 |
+
collections: typeof import("$lib/server/database").collections
|
38 |
+
) {
|
39 |
+
let deleteCount = 0;
|
40 |
+
const { conversations, sessions } = collections;
|
41 |
+
|
42 |
+
// First criteria: Delete conversations with no user/assistant messages older than 1 hour
|
43 |
+
const emptyConvCursor = conversations
|
44 |
+
.find({
|
45 |
+
"messages.from": { $not: { $in: ["user", "assistant"] } },
|
46 |
+
createdAt: { $lt: new Date(Date.now() - DELETE_THRESHOLD_MS) },
|
47 |
+
})
|
48 |
+
.batchSize(BATCH_SIZE);
|
49 |
+
|
50 |
+
await processCursor(emptyConvCursor, async (batch) => {
|
51 |
+
const ids = batch.map((doc) => doc._id);
|
52 |
+
deleteCount += await deleteBatch(conversations, ids);
|
53 |
+
});
|
54 |
+
|
55 |
+
// Second criteria: Process conversations without users in batches and check sessions
|
56 |
+
const noUserCursor = conversations.find({ userId: { $exists: false } }).batchSize(BATCH_SIZE);
|
57 |
+
|
58 |
+
await processCursor(noUserCursor, async (batch) => {
|
59 |
+
const sessionIds = [
|
60 |
+
...new Set(batch.map((conv) => conv.sessionId).filter((id): id is string => !!id)),
|
61 |
+
];
|
62 |
+
|
63 |
+
const existingSessions = await sessions.find({ sessionId: { $in: sessionIds } }).toArray();
|
64 |
+
const validSessionIds = new Set(existingSessions.map((s) => s.sessionId));
|
65 |
+
|
66 |
+
const invalidConvs = batch.filter(
|
67 |
+
(conv) => !conv.sessionId || !validSessionIds.has(conv.sessionId)
|
68 |
+
);
|
69 |
+
const idsToDelete = invalidConvs.map((conv) => conv._id);
|
70 |
+
deleteCount += await deleteBatch(conversations, idsToDelete);
|
71 |
+
});
|
72 |
+
|
73 |
+
logger.info(`[MIGRATIONS] Deleted ${deleteCount} conversations in total.`);
|
74 |
+
return deleteCount;
|
75 |
+
}
|
76 |
+
|
77 |
+
const deleteEmptyConversations: Migration = {
|
78 |
+
_id: new ObjectId("000000000000000000000009"),
|
79 |
+
name: "Delete conversations with no user or assistant messages or valid sessions",
|
80 |
+
up: async () => {
|
81 |
+
await deleteConversations(collections);
|
82 |
+
return true;
|
83 |
+
},
|
84 |
+
runEveryTime: true,
|
85 |
+
runForHuggingChat: "only",
|
86 |
+
};
|
87 |
+
|
88 |
+
export default deleteEmptyConversations;
|
src/lib/migrations/routines/index.ts
CHANGED
@@ -9,7 +9,7 @@ import updateMessageFiles from "./05-update-message-files";
|
|
9 |
import trimMessageUpdates from "./06-trim-message-updates";
|
10 |
import resetTools from "./07-reset-tools-in-settings";
|
11 |
import updateFeaturedToReview from "./08-update-featured-to-review";
|
12 |
-
|
13 |
export interface Migration {
|
14 |
_id: ObjectId;
|
15 |
name: string;
|
@@ -29,4 +29,5 @@ export const migrations: Migration[] = [
|
|
29 |
trimMessageUpdates,
|
30 |
resetTools,
|
31 |
updateFeaturedToReview,
|
|
|
32 |
];
|
|
|
9 |
import trimMessageUpdates from "./06-trim-message-updates";
|
10 |
import resetTools from "./07-reset-tools-in-settings";
|
11 |
import updateFeaturedToReview from "./08-update-featured-to-review";
|
12 |
+
import deleteEmptyConversations from "./09-delete-empty-conversations";
|
13 |
export interface Migration {
|
14 |
_id: ObjectId;
|
15 |
name: string;
|
|
|
29 |
trimMessageUpdates,
|
30 |
resetTools,
|
31 |
updateFeaturedToReview,
|
32 |
+
deleteEmptyConversations,
|
33 |
];
|
src/lib/server/database.ts
CHANGED
@@ -228,6 +228,20 @@ export class Database {
|
|
228 |
tools.createIndex({ createdById: 1, userCount: -1 }).catch((e) => logger.error(e));
|
229 |
tools.createIndex({ userCount: 1 }).catch((e) => logger.error(e));
|
230 |
tools.createIndex({ last24HoursCount: 1 }).catch((e) => logger.error(e));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
}
|
232 |
}
|
233 |
|
|
|
228 |
tools.createIndex({ createdById: 1, userCount: -1 }).catch((e) => logger.error(e));
|
229 |
tools.createIndex({ userCount: 1 }).catch((e) => logger.error(e));
|
230 |
tools.createIndex({ last24HoursCount: 1 }).catch((e) => logger.error(e));
|
231 |
+
|
232 |
+
conversations
|
233 |
+
.createIndex({
|
234 |
+
"messages.from": 1,
|
235 |
+
createdAt: 1,
|
236 |
+
})
|
237 |
+
.catch((e) => logger.error(e));
|
238 |
+
|
239 |
+
conversations
|
240 |
+
.createIndex({
|
241 |
+
userId: 1,
|
242 |
+
sessionId: 1,
|
243 |
+
})
|
244 |
+
.catch((e) => logger.error(e));
|
245 |
}
|
246 |
}
|
247 |
|
vite.config.ts
CHANGED
@@ -41,5 +41,6 @@ export default defineConfig({
|
|
41 |
setupFiles: ["./scripts/setupTest.ts"],
|
42 |
deps: { inline: ["@sveltejs/kit"] },
|
43 |
globals: true,
|
|
|
44 |
},
|
45 |
});
|
|
|
41 |
setupFiles: ["./scripts/setupTest.ts"],
|
42 |
deps: { inline: ["@sveltejs/kit"] },
|
43 |
globals: true,
|
44 |
+
testTimeout: 10000,
|
45 |
},
|
46 |
});
|