taesiri commited on
Commit
c628077
Β·
1 Parent(s): aa72fc4
Files changed (2) hide show
  1. app.py +472 -0
  2. photoexp_filtered.csv +0 -0
app.py ADDED
@@ -0,0 +1,472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import random
3
+ from datasets import load_dataset
4
+ import csv
5
+ from datetime import datetime
6
+ import os
7
+ import pandas as pd
8
+ import json
9
+ from huggingface_hub import CommitScheduler, HfApi, snapshot_download
10
+ import shutil
11
+ import uuid
12
+ import git
13
+ from pathlib import Path
14
+
15
+ api = HfApi(token=os.environ["HF_TOKEN"])
16
+
17
+ RESULTS_BACKUP_REPO = "taesiri/PhotoEditBattleResults"
18
+
19
+ # Load the experimental dataset
20
+ dataset = load_dataset("taesiri/IERv2-BattleResults_exp", split="train")
21
+ dataset_post_ids = list(
22
+ set(
23
+ load_dataset(
24
+ "taesiri/IERv2-BattleResults_exp", columns=["post_id"], split="train"
25
+ )
26
+ .to_pandas()
27
+ .post_id.tolist()
28
+ )
29
+ )
30
+
31
+
32
+ photoexp = pd.read_csv("./photoexp_filtered.csv")
33
+
34
+
35
+ # Download existing data from hub
36
+ def sync_with_hub():
37
+ """
38
+ Synchronize local data with the hub by cloning the dataset repo
39
+ """
40
+ print("Starting sync with hub...")
41
+ data_dir = Path("./data")
42
+ if data_dir.exists():
43
+ # Backup existing data
44
+ backup_dir = Path("./data_backup")
45
+ if backup_dir.exists():
46
+ shutil.rmtree(backup_dir)
47
+ shutil.copytree(data_dir, backup_dir)
48
+
49
+ # Clone/pull latest data from hub
50
+ # Use token in the URL for authentication following HF's new format
51
+ token = os.environ["HF_TOKEN"]
52
+ username = "taesiri" # Extract from DATASET_REPO
53
+ repo_url = (
54
+ f"https://{username}:{token}@huggingface.co/datasets/{RESULTS_BACKUP_REPO}"
55
+ )
56
+ hub_data_dir = Path("hub_data")
57
+
58
+ if hub_data_dir.exists():
59
+ # If repo exists, do a git pull
60
+ print("Pulling latest changes...")
61
+ repo = git.Repo(hub_data_dir)
62
+ origin = repo.remotes.origin
63
+ # Set the new URL with token
64
+ if "https://" in origin.url:
65
+ origin.set_url(repo_url)
66
+ origin.pull()
67
+ else:
68
+ # Clone the repo with token
69
+ print("Cloning repository...")
70
+ git.Repo.clone_from(repo_url, hub_data_dir)
71
+
72
+ # Merge hub data with local data
73
+ hub_data_source = hub_data_dir / "data"
74
+ if hub_data_source.exists():
75
+ # Create data dir if it doesn't exist
76
+ data_dir.mkdir(exist_ok=True)
77
+
78
+ # Copy files from hub
79
+ for item in hub_data_source.glob("*"):
80
+ if item.is_dir():
81
+ dest = data_dir / item.name
82
+ if not dest.exists(): # Only copy if doesn't exist locally
83
+ shutil.copytree(item, dest)
84
+
85
+ # Clean up cloned repo
86
+ if hub_data_dir.exists():
87
+ shutil.rmtree(hub_data_dir)
88
+ print("Finished syncing with hub!")
89
+
90
+
91
+ scheduler = CommitScheduler(
92
+ repo_id=RESULTS_BACKUP_REPO,
93
+ repo_type="dataset",
94
+ folder_path="./data",
95
+ path_in_repo="data",
96
+ every=1,
97
+ )
98
+
99
+
100
+ def save_evaluation(post_id, model_a, model_b, verdict):
101
+ """Save evaluation results to CSV. Multiple evaluations per image/model are allowed."""
102
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
103
+
104
+ # Create data directory if it doesn't exist
105
+ os.makedirs("data", exist_ok=True)
106
+ filename = "data/evaluation_results_exp.csv"
107
+
108
+ # Create file with headers if it doesn't exist
109
+ if not os.path.exists(filename):
110
+ with open(filename, "w", newline="") as f:
111
+ writer = csv.writer(f)
112
+ writer.writerow(["timestamp", "post_id", "model_a", "model_b", "verdict"])
113
+
114
+ # Append the new evaluation - multiple entries per image/model are allowed
115
+ with open(filename, "a", newline="") as f:
116
+ writer = csv.writer(f)
117
+ writer.writerow([timestamp, post_id, model_a, model_b, verdict])
118
+
119
+ print(
120
+ f"Saved evaluation: {post_id} - Model A: {model_a} - Model B: {model_b} - Verdict: {verdict}"
121
+ )
122
+
123
+
124
+ def get_random_sample():
125
+ """Get a random sample from the dataset. Sampling with replacement is allowed."""
126
+ idx = random.randint(0, len(dataset) - 1)
127
+ sample = dataset[idx]
128
+
129
+ # Filter photoexp to get entries with the same post_id
130
+ matching_photoexp_entries = photoexp[photoexp.post_id == sample["post_id"]]
131
+
132
+ # Randomly select one entry from the matching entries
133
+ if not matching_photoexp_entries.empty:
134
+ random_photoexp_entry = matching_photoexp_entries.sample(n=1).iloc[0]
135
+ additional_edited_image = random_photoexp_entry["edited_image"]
136
+ # Add REDDIT_ prefix when using comment_id instead of model
137
+ model_b = random_photoexp_entry.get("model")
138
+ if model_b is None:
139
+ model_b = f"REDDIT_{random_photoexp_entry['comment_id']}"
140
+ else:
141
+ additional_edited_image = None
142
+ model_b = None
143
+
144
+ # Randomly assign images to A and B
145
+ if random.choice([True, False]):
146
+ image_a = sample["edited_image"]
147
+ model_a = sample["model"]
148
+ image_b = additional_edited_image
149
+ else:
150
+ image_a = additional_edited_image
151
+ model_a = model_b
152
+ image_b = sample["edited_image"]
153
+ model_b = sample["model"]
154
+
155
+ print(f"Selected sample {idx}: {sample['post_id']} - {sample['model']}")
156
+
157
+ return {
158
+ "post_id": sample["post_id"],
159
+ "instruction": '## User Request: "' + sample["instruction"] + '"',
160
+ "simplified_instruction": '## User Request: "'
161
+ + sample["simplified_instruction"]
162
+ + '"',
163
+ "source_image": sample["source_image"],
164
+ "image_a": image_a,
165
+ "image_b": image_b,
166
+ "model_a": model_a,
167
+ "model_b": model_b,
168
+ }
169
+
170
+
171
+ def evaluate(verdict, state):
172
+ """Handle evaluation button clicks"""
173
+ if state is None:
174
+ return (
175
+ None,
176
+ None,
177
+ None,
178
+ None,
179
+ None,
180
+ None,
181
+ None,
182
+ False,
183
+ False,
184
+ False,
185
+ None,
186
+ None,
187
+ None,
188
+ None,
189
+ None,
190
+ None,
191
+ None,
192
+ )
193
+
194
+ # Save the evaluation
195
+ save_evaluation(state["post_id"], state["model_a"], state["model_b"], verdict)
196
+
197
+ # Get next sample
198
+ next_sample = get_random_sample()
199
+
200
+ # Reset button styles
201
+ a_better_reset = gr.update(variant="secondary")
202
+ b_better_reset = gr.update(variant="secondary")
203
+ neither_reset = gr.update(variant="secondary")
204
+ tie_reset = gr.update(variant="secondary")
205
+
206
+ return (
207
+ next_sample["source_image"],
208
+ next_sample["image_a"],
209
+ next_sample["image_b"],
210
+ next_sample["instruction"],
211
+ next_sample["simplified_instruction"],
212
+ f"Model A: {next_sample['model_a']} | Model B: {next_sample['model_b']}",
213
+ next_sample,
214
+ None, # selected_verdict
215
+ False,
216
+ False,
217
+ False,
218
+ False, # reset all button states
219
+ a_better_reset, # reset A is better button style
220
+ b_better_reset, # reset B is better button style
221
+ neither_reset, # reset neither is good button style
222
+ tie_reset, # reset tie button style
223
+ next_sample["post_id"],
224
+ )
225
+
226
+
227
+ def select_verdict(verdict, state):
228
+ """Handle first step selection"""
229
+ if state is None:
230
+ return None, False, False, False, False # Ensure it returns 5 values
231
+ return (
232
+ verdict,
233
+ verdict == "A is better",
234
+ verdict == "B is better",
235
+ verdict == "Neither is good",
236
+ verdict == "Tie",
237
+ )
238
+
239
+
240
+ def initialize():
241
+ """Initialize the interface with first sample"""
242
+ sample = get_random_sample()
243
+ return (
244
+ sample["source_image"],
245
+ sample["image_a"],
246
+ sample["image_b"],
247
+ sample["instruction"],
248
+ sample["simplified_instruction"],
249
+ f"Model A: {sample['model_a']} | Model B: {sample['model_b']}",
250
+ sample,
251
+ None, # selected_verdict
252
+ False, # a_better_selected
253
+ False, # b_better_selected
254
+ False, # neither_selected
255
+ False, # tie_selected
256
+ sample["post_id"],
257
+ )
258
+
259
+
260
+ def update_button_styles(verdict):
261
+ """Update button styles based on selection"""
262
+ # Update button labels to use emojis
263
+ a_better_style = gr.update(
264
+ value="πŸ‘ˆ A is better" if verdict == "A is better" else "πŸ‘ˆ A is better"
265
+ )
266
+ b_better_style = gr.update(
267
+ value="πŸ‘‰ B is better" if verdict == "B is better" else "πŸ‘‰ B is better"
268
+ )
269
+ neither_style = gr.update(
270
+ value="πŸ‘Ž Both are bad" if verdict == "Neither is good" else "πŸ‘Ž Both are bad"
271
+ )
272
+ tie_style = gr.update(value="🀝 Tie" if verdict == "Tie" else "🀝 Tie")
273
+ return a_better_style, b_better_style, neither_style, tie_style
274
+
275
+
276
+ # Create Gradio interface
277
+ with gr.Blocks() as demo:
278
+
279
+ # Add instruction panel at the top
280
+ gr.HTML(
281
+ """
282
+ <div style="padding: 0.8rem; margin-bottom: 0.8rem; background-color: #2c3e50; border-radius: 0.5rem; color: white; text-align: center;">
283
+ <div style="font-size: 1.1rem; margin-bottom: 0.5rem;">Read the user instruction, look at the source image, then evaluate which edit (A or B) best satisfies the request better.</div>
284
+ <div style="font-size: 1rem;">
285
+ <strong>🀝 Tie</strong> &nbsp;&nbsp;|&nbsp;&nbsp;
286
+ <strong>πŸ‘ˆ A is better</strong> &nbsp;&nbsp;|&nbsp;&nbsp;
287
+ <strong>πŸ‘‰ B is better</strong>
288
+ </div>
289
+ </div>
290
+ """
291
+ )
292
+
293
+ with gr.Row():
294
+ simplified_instruction = gr.Textbox(
295
+ label="Simplified Instruction", show_label=True, visible=False
296
+ )
297
+ instruction = gr.Markdown(label="Original Instruction", show_label=True)
298
+
299
+ with gr.Row():
300
+ with gr.Column():
301
+ source_image = gr.Image(label="Source Image", show_label=True, height=500)
302
+ tie_btn = gr.Button("🀝 Tie", variant="secondary")
303
+ with gr.Column():
304
+ image_a = gr.Image(label="Image A", show_label=True, height=500)
305
+ a_better_btn = gr.Button("πŸ‘ˆ A is better", variant="secondary")
306
+ with gr.Column():
307
+ image_b = gr.Image(label="Image B", show_label=True, height=500)
308
+ b_better_btn = gr.Button("πŸ‘‰ B is better", variant="secondary")
309
+
310
+ # Add confirmation button in new row
311
+ with gr.Row():
312
+ confirm_btn = gr.Button("Confirm Selection", variant="primary", visible=False)
313
+ with gr.Row():
314
+ neither_btn = gr.Button("πŸ‘Ž Both are bad", variant="secondary", visible=False)
315
+
316
+ with gr.Accordion("DEBUG", open=False):
317
+ with gr.Row():
318
+ post_id_display = gr.Textbox(
319
+ label="Post ID", show_label=True, interactive=False
320
+ )
321
+ model_info = gr.Textbox(label="Model Information", show_label=True)
322
+ state = gr.State()
323
+ selected_verdict = gr.State()
324
+
325
+ # Add states for button selection
326
+ a_better_selected = gr.Checkbox(visible=False)
327
+ b_better_selected = gr.Checkbox(visible=False)
328
+ neither_selected = gr.Checkbox(visible=False)
329
+ tie_selected = gr.Checkbox(visible=False)
330
+
331
+ def update_confirm_visibility(a_better, b_better, neither, tie):
332
+ # Update button text based on selection
333
+ if a_better:
334
+ return gr.update(visible=True, value="Confirm A is better")
335
+ elif b_better:
336
+ return gr.update(visible=True, value="Confirm B is better")
337
+ elif neither:
338
+ return gr.update(visible=True, value="Confirm Neither is good")
339
+ elif tie:
340
+ return gr.update(visible=True, value="Confirm Tie")
341
+ return gr.update(visible=False)
342
+
343
+ # Initialize the interface
344
+ demo.load(
345
+ initialize,
346
+ outputs=[
347
+ source_image,
348
+ image_a,
349
+ image_b,
350
+ instruction,
351
+ simplified_instruction,
352
+ model_info,
353
+ state,
354
+ selected_verdict,
355
+ a_better_selected,
356
+ b_better_selected,
357
+ neither_selected,
358
+ tie_selected,
359
+ post_id_display,
360
+ ],
361
+ )
362
+
363
+ # Handle first step button clicks
364
+ a_better_btn.click(
365
+ lambda state: select_verdict("A is better", state),
366
+ inputs=[state],
367
+ outputs=[
368
+ selected_verdict,
369
+ a_better_selected,
370
+ b_better_selected,
371
+ neither_selected,
372
+ tie_selected,
373
+ ],
374
+ ).then(
375
+ update_button_styles,
376
+ inputs=[selected_verdict],
377
+ outputs=[a_better_btn, b_better_btn, neither_btn, tie_btn],
378
+ )
379
+
380
+ b_better_btn.click(
381
+ lambda state: select_verdict("B is better", state),
382
+ inputs=[state],
383
+ outputs=[
384
+ selected_verdict,
385
+ a_better_selected,
386
+ b_better_selected,
387
+ neither_selected,
388
+ tie_selected,
389
+ ],
390
+ ).then(
391
+ update_button_styles,
392
+ inputs=[selected_verdict],
393
+ outputs=[a_better_btn, b_better_btn, neither_btn, tie_btn],
394
+ )
395
+
396
+ neither_btn.click(
397
+ lambda state: select_verdict("Neither is good", state),
398
+ inputs=[state],
399
+ outputs=[
400
+ selected_verdict,
401
+ a_better_selected,
402
+ b_better_selected,
403
+ neither_selected,
404
+ tie_selected,
405
+ ],
406
+ ).then(
407
+ update_button_styles,
408
+ inputs=[selected_verdict],
409
+ outputs=[a_better_btn, b_better_btn, neither_btn, tie_btn],
410
+ )
411
+
412
+ tie_btn.click(
413
+ lambda state: select_verdict("Tie", state),
414
+ inputs=[state],
415
+ outputs=[
416
+ selected_verdict,
417
+ a_better_selected,
418
+ b_better_selected,
419
+ neither_selected,
420
+ tie_selected,
421
+ ],
422
+ ).then(
423
+ update_button_styles,
424
+ inputs=[selected_verdict],
425
+ outputs=[a_better_btn, b_better_btn, neither_btn, tie_btn],
426
+ )
427
+
428
+ # Update confirm button visibility when selection changes
429
+ for checkbox in [
430
+ a_better_selected,
431
+ b_better_selected,
432
+ neither_selected,
433
+ tie_selected,
434
+ ]:
435
+ checkbox.change(
436
+ update_confirm_visibility,
437
+ inputs=[
438
+ a_better_selected,
439
+ b_better_selected,
440
+ neither_selected,
441
+ tie_selected,
442
+ ],
443
+ outputs=[confirm_btn],
444
+ )
445
+
446
+ # Handle confirmation button click
447
+ confirm_btn.click(
448
+ lambda verdict, state: evaluate(verdict, state),
449
+ inputs=[selected_verdict, state],
450
+ outputs=[
451
+ source_image,
452
+ image_a,
453
+ image_b,
454
+ instruction,
455
+ simplified_instruction,
456
+ model_info,
457
+ state,
458
+ selected_verdict,
459
+ a_better_selected,
460
+ b_better_selected,
461
+ neither_selected,
462
+ tie_selected,
463
+ a_better_btn,
464
+ b_better_btn,
465
+ neither_btn,
466
+ tie_btn,
467
+ post_id_display,
468
+ ],
469
+ )
470
+
471
+ if __name__ == "__main__":
472
+ demo.launch()
photoexp_filtered.csv ADDED
The diff for this file is too large to render. See raw diff