taesiri commited on
Commit
ba04608
·
1 Parent(s): 05b8870
Files changed (1) hide show
  1. app.py +58 -21
app.py CHANGED
@@ -13,6 +13,7 @@ import git
13
  from pathlib import Path
14
  from io import BytesIO
15
  import PIL
 
16
 
17
  api = HfApi(token=os.environ["HF_TOKEN"])
18
 
@@ -122,9 +123,10 @@ scheduler = CommitScheduler(
122
  )
123
 
124
 
125
- def save_evaluation(post_id, model_a, model_b, verdict):
126
- """Save evaluation results to CSV. Multiple evaluations per image/model are allowed."""
127
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 
128
 
129
  # Create data directory if it doesn't exist
130
  os.makedirs("data", exist_ok=True)
@@ -134,15 +136,39 @@ def save_evaluation(post_id, model_a, model_b, verdict):
134
  if not os.path.exists(filename):
135
  with open(filename, "w", newline="") as f:
136
  writer = csv.writer(f)
137
- writer.writerow(["timestamp", "post_id", "model_a", "model_b", "verdict"])
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- # Append the new evaluation - multiple entries per image/model are allowed
140
  with open(filename, "a", newline="") as f:
141
  writer = csv.writer(f)
142
- writer.writerow([timestamp, post_id, model_a, model_b, verdict])
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  print(
145
- f"Saved evaluation: {post_id} - Model A: {model_a} - Model B: {model_b} - Verdict: {verdict}"
146
  )
147
 
148
 
@@ -206,7 +232,7 @@ def get_random_sample():
206
 
207
 
208
  def evaluate(verdict, state):
209
- """Handle evaluation button clicks"""
210
  if state is None:
211
  return (
212
  None,
@@ -221,23 +247,33 @@ def evaluate(verdict, state):
221
  False,
222
  False,
223
  None,
224
- gr.update(variant="secondary"), # a_better_btn
225
- gr.update(variant="secondary"), # b_better_btn
226
- gr.update(variant="secondary"), # neither_btn
227
- gr.update(variant="secondary"), # tie_btn
228
- None, # post_id
229
- None, # simplified_instruction
230
- "", # username_debug
231
  )
232
 
233
- # Save the evaluation
234
- save_evaluation(state["post_id"], state["model_a"], state["model_b"], verdict)
 
 
 
 
 
 
 
 
 
235
 
236
  # Get next sample
237
  next_sample = get_random_sample()
238
- # Preserve username in state
239
  next_state = next_sample.copy()
240
  next_state["username"] = state["username"]
 
241
 
242
  # Reset button styles
243
  a_better_reset = gr.update(variant="secondary")
@@ -252,7 +288,7 @@ def evaluate(verdict, state):
252
  next_sample["instruction"],
253
  next_sample["simplified_instruction"],
254
  f"Model A: {next_sample['model_a']} | Model B: {next_sample['model_b']}",
255
- next_state, # Now includes username
256
  None, # selected_verdict
257
  False, # a_better_selected
258
  False, # b_better_selected
@@ -297,9 +333,10 @@ def handle_username_submit(username, current_page):
297
  def initialize(username):
298
  """Initialize the interface with first sample"""
299
  sample = get_random_sample()
300
- # Create state with username included
301
  state = sample.copy()
302
  state["username"] = username
 
303
 
304
  return (
305
  sample["source_image"],
@@ -308,7 +345,7 @@ def initialize(username):
308
  sample["instruction"],
309
  sample["simplified_instruction"],
310
  f"Model A: {sample['model_a']} | Model B: {sample['model_b']}",
311
- state, # Now includes username
312
  None, # selected_verdict
313
  False, # a_better_selected
314
  False, # b_better_selected
@@ -316,7 +353,7 @@ def initialize(username):
316
  False, # tie_selected
317
  sample["post_id"],
318
  sample["simplified_instruction"],
319
- username or "", # Use stored username or empty string
320
  )
321
 
322
 
 
13
  from pathlib import Path
14
  from io import BytesIO
15
  import PIL
16
+ import time # Add this import at the top
17
 
18
  api = HfApi(token=os.environ["HF_TOKEN"])
19
 
 
123
  )
124
 
125
 
126
+ def save_evaluation(post_id, model_a, model_b, verdict, username, start_time, end_time):
127
+ """Save evaluation results to CSV including timing and username information."""
128
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
129
+ duration = end_time - start_time # Calculate duration in seconds
130
 
131
  # Create data directory if it doesn't exist
132
  os.makedirs("data", exist_ok=True)
 
136
  if not os.path.exists(filename):
137
  with open(filename, "w", newline="") as f:
138
  writer = csv.writer(f)
139
+ writer.writerow(
140
+ [
141
+ "timestamp",
142
+ "post_id",
143
+ "model_a",
144
+ "model_b",
145
+ "verdict",
146
+ "username",
147
+ "start_time",
148
+ "end_time",
149
+ "duration_seconds",
150
+ ]
151
+ )
152
 
153
+ # Append the new evaluation
154
  with open(filename, "a", newline="") as f:
155
  writer = csv.writer(f)
156
+ writer.writerow(
157
+ [
158
+ timestamp,
159
+ post_id,
160
+ model_a,
161
+ model_b,
162
+ verdict,
163
+ username,
164
+ start_time,
165
+ end_time,
166
+ duration,
167
+ ]
168
+ )
169
 
170
  print(
171
+ f"Saved evaluation: {post_id} - Model A: {model_a} - Model B: {model_b} - Verdict: {verdict} - Duration: {duration:.2f}s"
172
  )
173
 
174
 
 
232
 
233
 
234
  def evaluate(verdict, state):
235
+ """Handle evaluation button clicks with timing"""
236
  if state is None:
237
  return (
238
  None,
 
247
  False,
248
  False,
249
  None,
250
+ gr.update(variant="secondary"),
251
+ gr.update(variant="secondary"),
252
+ gr.update(variant="secondary"),
253
+ gr.update(variant="secondary"),
254
+ None,
255
+ None,
256
+ "",
257
  )
258
 
259
+ # Record end time and save the evaluation
260
+ end_time = time.time()
261
+ save_evaluation(
262
+ state["post_id"],
263
+ state["model_a"],
264
+ state["model_b"],
265
+ verdict,
266
+ state["username"],
267
+ state["start_time"],
268
+ end_time,
269
+ )
270
 
271
  # Get next sample
272
  next_sample = get_random_sample()
273
+ # Preserve username in state and set new start time
274
  next_state = next_sample.copy()
275
  next_state["username"] = state["username"]
276
+ next_state["start_time"] = time.time() # Set start time for next evaluation
277
 
278
  # Reset button styles
279
  a_better_reset = gr.update(variant="secondary")
 
288
  next_sample["instruction"],
289
  next_sample["simplified_instruction"],
290
  f"Model A: {next_sample['model_a']} | Model B: {next_sample['model_b']}",
291
+ next_state, # Now includes username and start_time
292
  None, # selected_verdict
293
  False, # a_better_selected
294
  False, # b_better_selected
 
333
  def initialize(username):
334
  """Initialize the interface with first sample"""
335
  sample = get_random_sample()
336
+ # Create state with username and start time included
337
  state = sample.copy()
338
  state["username"] = username
339
+ state["start_time"] = time.time() # Record start time
340
 
341
  return (
342
  sample["source_image"],
 
345
  sample["instruction"],
346
  sample["simplified_instruction"],
347
  f"Model A: {sample['model_a']} | Model B: {sample['model_b']}",
348
+ state, # Now includes username and start_time
349
  None, # selected_verdict
350
  False, # a_better_selected
351
  False, # b_better_selected
 
353
  False, # tie_selected
354
  sample["post_id"],
355
  sample["simplified_instruction"],
356
+ username or "",
357
  )
358
 
359