Quazim0t0 commited on
Commit
6e7f801
·
verified ·
1 Parent(s): 366c4b0

Delete database_schema.py

Browse files
Files changed (1) hide show
  1. database_schema.py +0 -483
database_schema.py DELETED
@@ -1,483 +0,0 @@
1
- """
2
- Database schema for Dynamic Highscores system.
3
-
4
- This module defines the SQLite database schema for the Dynamic Highscores system,
5
- which integrates benchmark selection, model evaluation, and leaderboard functionality.
6
- """
7
-
8
- import sqlite3
9
- import os
10
- import json
11
- import threading
12
- from datetime import datetime, timedelta
13
- import pandas as pd
14
-
15
- class ThreadLocalDB:
16
- """Thread-local database connection manager."""
17
-
18
- _thread_local = threading.local()
19
-
20
- def __init__(self, db_path):
21
- """Initialize with database path."""
22
- self.db_path = db_path
23
-
24
- def get_connection(self):
25
- """Get a thread-local database connection."""
26
- if not hasattr(self._thread_local, 'conn') or self._thread_local.conn is None:
27
- self._thread_local.conn = sqlite3.connect(self.db_path)
28
- self._thread_local.conn.row_factory = sqlite3.Row
29
- return self._thread_local.conn
30
-
31
- def get_cursor(self):
32
- """Get a cursor from the thread-local connection."""
33
- conn = self.get_connection()
34
- if not hasattr(self._thread_local, 'cursor') or self._thread_local.cursor is None:
35
- self._thread_local.cursor = conn.cursor()
36
- return self._thread_local.cursor
37
-
38
- def close(self):
39
- """Close the thread-local connection if it exists."""
40
- if hasattr(self._thread_local, 'conn') and self._thread_local.conn is not None:
41
- if hasattr(self._thread_local, 'cursor') and self._thread_local.cursor is not None:
42
- self._thread_local.cursor.close()
43
- self._thread_local.cursor = None
44
- self._thread_local.conn.close()
45
- self._thread_local.conn = None
46
-
47
- class DynamicHighscoresDB:
48
- """Database manager for the Dynamic Highscores system."""
49
-
50
- def __init__(self, db_path="dynamic_highscores.db"):
51
- """Initialize the database connection and create tables if they don't exist."""
52
- self.db_path = db_path
53
- self.thread_local_db = ThreadLocalDB(db_path)
54
- self.create_tables()
55
-
56
- def get_conn(self):
57
- """Get the thread-local database connection."""
58
- return self.thread_local_db.get_connection()
59
-
60
- def get_cursor(self):
61
- """Get the thread-local database cursor."""
62
- return self.thread_local_db.get_cursor()
63
-
64
- def close(self):
65
- """Close the thread-local database connection."""
66
- self.thread_local_db.close()
67
-
68
- def create_tables(self):
69
- """Create all necessary tables if they don't exist."""
70
- cursor = self.get_cursor()
71
- conn = self.get_conn()
72
-
73
- # Users table - stores user information
74
- cursor.execute('''
75
- CREATE TABLE IF NOT EXISTS users (
76
- id INTEGER PRIMARY KEY AUTOINCREMENT,
77
- username TEXT UNIQUE NOT NULL,
78
- hf_user_id TEXT UNIQUE NOT NULL,
79
- is_admin BOOLEAN DEFAULT 0,
80
- last_submission_date TEXT,
81
- created_at TEXT DEFAULT CURRENT_TIMESTAMP
82
- )
83
- ''')
84
-
85
- # Benchmarks table - stores information about available benchmarks
86
- cursor.execute('''
87
- CREATE TABLE IF NOT EXISTS benchmarks (
88
- id INTEGER PRIMARY KEY AUTOINCREMENT,
89
- name TEXT NOT NULL,
90
- dataset_id TEXT NOT NULL,
91
- description TEXT,
92
- metrics TEXT, -- JSON string of metrics
93
- created_at TEXT DEFAULT CURRENT_TIMESTAMP
94
- )
95
- ''')
96
-
97
- # Models table - stores information about submitted models
98
- cursor.execute('''
99
- CREATE TABLE IF NOT EXISTS models (
100
- id INTEGER PRIMARY KEY AUTOINCREMENT,
101
- name TEXT NOT NULL,
102
- hf_model_id TEXT NOT NULL,
103
- user_id INTEGER NOT NULL,
104
- tag TEXT NOT NULL, -- One of: Merge, Agent, Reasoning, Coding, etc.
105
- parameters TEXT, -- Number of parameters (can be NULL)
106
- description TEXT,
107
- created_at TEXT DEFAULT CURRENT_TIMESTAMP,
108
- FOREIGN KEY (user_id) REFERENCES users (id),
109
- UNIQUE (hf_model_id, user_id)
110
- )
111
- ''')
112
-
113
- # Evaluations table - stores evaluation results
114
- cursor.execute('''
115
- CREATE TABLE IF NOT EXISTS evaluations (
116
- id INTEGER PRIMARY KEY AUTOINCREMENT,
117
- model_id INTEGER NOT NULL,
118
- benchmark_id INTEGER NOT NULL,
119
- status TEXT NOT NULL, -- pending, running, completed, failed
120
- results TEXT, -- JSON string of results
121
- score REAL, -- Overall score (can be NULL)
122
- submitted_at TEXT DEFAULT CURRENT_TIMESTAMP,
123
- started_at TEXT,
124
- completed_at TEXT,
125
- FOREIGN KEY (model_id) REFERENCES models (id),
126
- FOREIGN KEY (benchmark_id) REFERENCES benchmarks (id)
127
- )
128
- ''')
129
-
130
- # Queue table - stores evaluation queue
131
- cursor.execute('''
132
- CREATE TABLE IF NOT EXISTS queue (
133
- id INTEGER PRIMARY KEY AUTOINCREMENT,
134
- evaluation_id INTEGER NOT NULL,
135
- priority INTEGER DEFAULT 0, -- Higher number = higher priority
136
- added_at TEXT DEFAULT CURRENT_TIMESTAMP,
137
- FOREIGN KEY (evaluation_id) REFERENCES evaluations (id)
138
- )
139
- ''')
140
-
141
- conn.commit()
142
-
143
- # User management methods
144
- def add_user(self, username, hf_user_id, is_admin=False):
145
- """Add a new user to the database."""
146
- cursor = self.get_cursor()
147
- conn = self.get_conn()
148
-
149
- try:
150
- cursor.execute(
151
- "INSERT INTO users (username, hf_user_id, is_admin) VALUES (?, ?, ?)",
152
- (username, hf_user_id, is_admin)
153
- )
154
- conn.commit()
155
- return cursor.lastrowid
156
- except sqlite3.IntegrityError:
157
- # User already exists
158
- cursor.execute(
159
- "SELECT id FROM users WHERE hf_user_id = ?",
160
- (hf_user_id,)
161
- )
162
- row = cursor.fetchone()
163
- return row[0] if row else None
164
-
165
- def get_user(self, hf_user_id):
166
- """Get user information by HuggingFace user ID."""
167
- cursor = self.get_cursor()
168
-
169
- cursor.execute(
170
- "SELECT * FROM users WHERE hf_user_id = ?",
171
- (hf_user_id,)
172
- )
173
- row = cursor.fetchone()
174
- return dict(row) if row else None
175
-
176
- def get_user_by_username(self, username):
177
- """Get user information by username."""
178
- cursor = self.get_cursor()
179
-
180
- cursor.execute(
181
- "SELECT * FROM users WHERE username = ?",
182
- (username,)
183
- )
184
- row = cursor.fetchone()
185
- return dict(row) if row else None
186
-
187
- def can_submit_today(self, user_id):
188
- """Check if a user can submit a benchmark evaluation today."""
189
- cursor = self.get_cursor()
190
-
191
- cursor.execute(
192
- "SELECT is_admin, last_submission_date FROM users WHERE id = ?",
193
- (user_id,)
194
- )
195
- result = cursor.fetchone()
196
-
197
- if not result:
198
- return False
199
-
200
- user_data = dict(result)
201
-
202
- # Admin can always submit
203
- if user_data['is_admin']:
204
- return True
205
-
206
- # If no previous submission, user can submit
207
- if not user_data['last_submission_date']:
208
- return True
209
-
210
- # Check if last submission was before today
211
- last_date = datetime.fromisoformat(user_data['last_submission_date'])
212
- today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
213
-
214
- return last_date < today
215
-
216
- def update_submission_date(self, user_id):
217
- """Update the last submission date for a user."""
218
- cursor = self.get_cursor()
219
- conn = self.get_conn()
220
-
221
- current_time = datetime.now().isoformat()
222
- cursor.execute(
223
- "UPDATE users SET last_submission_date = ? WHERE id = ?",
224
- (current_time, user_id)
225
- )
226
- conn.commit()
227
-
228
- # Benchmark management methods
229
- def add_benchmark(self, name, dataset_id, description="", metrics=None):
230
- """Add a new benchmark to the database."""
231
- cursor = self.get_cursor()
232
- conn = self.get_conn()
233
-
234
- if metrics is None:
235
- metrics = {}
236
-
237
- metrics_json = json.dumps(metrics)
238
-
239
- try:
240
- cursor.execute(
241
- "INSERT INTO benchmarks (name, dataset_id, description, metrics) VALUES (?, ?, ?, ?)",
242
- (name, dataset_id, description, metrics_json)
243
- )
244
- conn.commit()
245
- return cursor.lastrowid
246
- except sqlite3.IntegrityError:
247
- # Benchmark already exists with this dataset_id
248
- cursor.execute(
249
- "SELECT id FROM benchmarks WHERE dataset_id = ?",
250
- (dataset_id,)
251
- )
252
- row = cursor.fetchone()
253
- return row[0] if row else None
254
-
255
- def get_benchmarks(self):
256
- """Get all available benchmarks."""
257
- cursor = self.get_cursor()
258
-
259
- cursor.execute("SELECT * FROM benchmarks")
260
- benchmarks = [dict(row) for row in cursor.fetchall()]
261
-
262
- # Parse metrics JSON
263
- for benchmark in benchmarks:
264
- if benchmark['metrics']:
265
- benchmark['metrics'] = json.loads(benchmark['metrics'])
266
- else:
267
- benchmark['metrics'] = {}
268
-
269
- return benchmarks
270
-
271
- def get_benchmark(self, benchmark_id):
272
- """Get benchmark information by ID."""
273
- cursor = self.get_cursor()
274
-
275
- cursor.execute(
276
- "SELECT * FROM benchmarks WHERE id = ?",
277
- (benchmark_id,)
278
- )
279
- row = cursor.fetchone()
280
- benchmark = dict(row) if row else None
281
-
282
- if benchmark and benchmark['metrics']:
283
- benchmark['metrics'] = json.loads(benchmark['metrics'])
284
-
285
- return benchmark
286
-
287
- # Model management methods
288
- def add_model(self, name, hf_model_id, user_id, tag, parameters=None, description=""):
289
- """Add a new model to the database."""
290
- cursor = self.get_cursor()
291
- conn = self.get_conn()
292
-
293
- try:
294
- cursor.execute(
295
- "INSERT INTO models (name, hf_model_id, user_id, tag, parameters, description) VALUES (?, ?, ?, ?, ?, ?)",
296
- (name, hf_model_id, user_id, tag, parameters, description)
297
- )
298
- conn.commit()
299
- return cursor.lastrowid
300
- except sqlite3.IntegrityError:
301
- # Model already exists for this user
302
- cursor.execute(
303
- "SELECT id FROM models WHERE hf_model_id = ? AND user_id = ?",
304
- (hf_model_id, user_id)
305
- )
306
- row = cursor.fetchone()
307
- return row[0] if row else None
308
-
309
- def get_models(self, tag=None):
310
- """Get all models, optionally filtered by tag."""
311
- cursor = self.get_cursor()
312
-
313
- if tag and tag.lower() != "all":
314
- cursor.execute(
315
- "SELECT * FROM models WHERE tag = ?",
316
- (tag,)
317
- )
318
- else:
319
- cursor.execute("SELECT * FROM models")
320
-
321
- return [dict(row) for row in cursor.fetchall()]
322
-
323
- def get_model(self, model_id):
324
- """Get model information by ID."""
325
- cursor = self.get_cursor()
326
-
327
- cursor.execute(
328
- "SELECT * FROM models WHERE id = ?",
329
- (model_id,)
330
- )
331
- row = cursor.fetchone()
332
- return dict(row) if row else None
333
-
334
- # Evaluation management methods
335
- def add_evaluation(self, model_id, benchmark_id, priority=0):
336
- """Add a new evaluation to the database and queue."""
337
- cursor = self.get_cursor()
338
- conn = self.get_conn()
339
-
340
- # First, add the evaluation
341
- cursor.execute(
342
- "INSERT INTO evaluations (model_id, benchmark_id, status) VALUES (?, ?, 'pending')",
343
- (model_id, benchmark_id)
344
- )
345
- evaluation_id = cursor.lastrowid
346
-
347
- # Then, add it to the queue
348
- cursor.execute(
349
- "INSERT INTO queue (evaluation_id, priority) VALUES (?, ?)",
350
- (evaluation_id, priority)
351
- )
352
-
353
- conn.commit()
354
- return evaluation_id
355
-
356
- def update_evaluation_status(self, evaluation_id, status, results=None, score=None):
357
- """Update the status of an evaluation."""
358
- cursor = self.get_cursor()
359
- conn = self.get_conn()
360
-
361
- params = [status, evaluation_id]
362
- sql = "UPDATE evaluations SET status = ?"
363
-
364
- if results is not None:
365
- sql += ", results = ?"
366
- params.insert(1, json.dumps(results))
367
-
368
- if score is not None:
369
- sql += ", score = ?"
370
- params.insert(1 if results is None else 2, score)
371
-
372
- if status in ['completed', 'failed']:
373
- sql += ", completed_at = datetime('now')"
374
- elif status == 'running':
375
- sql += ", started_at = datetime('now')"
376
-
377
- sql += " WHERE id = ?"
378
-
379
- cursor.execute(sql, params)
380
- conn.commit()
381
-
382
- def get_next_in_queue(self):
383
- """Get the next evaluation in the queue."""
384
- cursor = self.get_cursor()
385
-
386
- cursor.execute("""
387
- SELECT q.*, e.id as evaluation_id, e.model_id, e.benchmark_id, e.status
388
- FROM queue q
389
- JOIN evaluations e ON q.evaluation_id = e.id
390
- WHERE e.status = 'pending'
391
- ORDER BY q.priority DESC, q.added_at ASC
392
- LIMIT 1
393
- """)
394
-
395
- row = cursor.fetchone()
396
- return dict(row) if row else None
397
-
398
- def get_evaluation_results(self, model_id=None, benchmark_id=None, tag=None, status=None, limit=None):
399
- """Get evaluation results, optionally filtered by model, benchmark, tag, or status."""
400
- cursor = self.get_cursor()
401
-
402
- sql = """
403
- SELECT e.id, e.model_id, e.benchmark_id, e.status, e.results, e.score,
404
- e.submitted_at, e.started_at, e.completed_at, m.name as model_name, m.tag,
405
- b.name as benchmark_name
406
- FROM evaluations e
407
- JOIN models m ON e.model_id = m.id
408
- JOIN benchmarks b ON e.benchmark_id = b.id
409
- WHERE 1=1
410
- """
411
-
412
- params = []
413
-
414
- if status:
415
- sql += " AND e.status = ?"
416
- params.append(status)
417
-
418
- if model_id:
419
- sql += " AND e.model_id = ?"
420
- params.append(model_id)
421
-
422
- if benchmark_id and benchmark_id != "all" and benchmark_id.lower() != "all":
423
- sql += " AND e.benchmark_id = ?"
424
- params.append(benchmark_id)
425
-
426
- if tag and tag.lower() != "all":
427
- sql += " AND m.tag = ?"
428
- params.append(tag)
429
-
430
- sql += " ORDER BY e.submitted_at DESC"
431
-
432
- if limit:
433
- sql += " LIMIT ?"
434
- params.append(limit)
435
-
436
- cursor.execute(sql, params)
437
- results = [dict(row) for row in cursor.fetchall()]
438
-
439
- # Parse results JSON
440
- for result in results:
441
- if result['results']:
442
- try:
443
- result['results'] = json.loads(result['results'])
444
- except:
445
- result['results'] = {}
446
-
447
- return results
448
-
449
- def get_leaderboard_df(self, tag=None, benchmark_id=None):
450
- """Get a pandas DataFrame of the leaderboard, optionally filtered by tag and benchmark."""
451
- results = self.get_evaluation_results(tag=tag, benchmark_id=benchmark_id, status="completed")
452
-
453
- if not results:
454
- return pd.DataFrame()
455
-
456
- # Create a list of dictionaries for the DataFrame
457
- leaderboard_data = []
458
-
459
- for result in results:
460
- entry = {
461
- 'model_name': result['model_name'],
462
- 'tag': result['tag'],
463
- 'benchmark_name': result['benchmark_name'],
464
- 'score': result['score'],
465
- 'completed_at': result['completed_at']
466
- }
467
-
468
- # Add any additional metrics from results
469
- if result['results'] and isinstance(result['results'], dict):
470
- for key, value in result['results'].items():
471
- if isinstance(value, (int, float)) and key not in entry:
472
- entry[key] = value
473
-
474
- leaderboard_data.append(entry)
475
-
476
- # Convert to DataFrame
477
- df = pd.DataFrame(leaderboard_data)
478
-
479
- # Sort by score (descending)
480
- if not df.empty and 'score' in df.columns:
481
- df = df.sort_values('score', ascending=False)
482
-
483
- return df