taesiri commited on
Commit
047c657
·
1 Parent(s): ba04608
Files changed (1) hide show
  1. app.py +21 -57
app.py CHANGED
@@ -18,32 +18,18 @@ import time # Add this import at the top
18
  api = HfApi(token=os.environ["HF_TOKEN"])
19
 
20
  RESULTS_BACKUP_REPO = "taesiri/PhotoEditBattleResults"
21
-
22
  # Load the experimental dataset
23
- dataset = load_dataset("taesiri/IERv2-BattleResults_exp", split="train")
24
  dataset_post_ids = list(
25
  set(
26
- load_dataset(
27
- "taesiri/IERv2-BattleResults_exp", columns=["post_id"], split="train"
28
- )
29
  .to_pandas()
30
  .post_id.tolist()
31
  )
32
  )
33
 
34
 
35
- photoexp = pd.read_csv("./photoexp_filtered.csv")
36
- valid_post_ids = set(photoexp.post_id.tolist())
37
-
38
- dataset = dataset.filter(
39
- lambda xs: [x in valid_post_ids for x in xs["post_id"]],
40
- batched=True,
41
- batch_size=256,
42
- )
43
-
44
- print(f"Dataset size after filtering: {len(dataset)}")
45
-
46
-
47
  # Download existing data from hub
48
  def sync_with_hub():
49
  """
@@ -173,49 +159,27 @@ def save_evaluation(post_id, model_a, model_b, verdict, username, start_time, en
173
 
174
 
175
  def get_random_sample():
176
- """Get a random sample by first selecting a post_id then picking random edits for that post."""
177
- # First randomly select a post_id from valid posts
178
- random_post_id = random.choice(list(valid_post_ids))
179
-
180
- # Filter dataset for this post_id using batched processing
181
- post_edits = dataset.filter(
182
- lambda xs: [x == random_post_id for x in xs["post_id"]],
183
- batched=True,
184
- batch_size=256,
185
- )
186
 
187
- # Get matching photoexp entries for this post_id
188
- matching_photoexp_entries = photoexp[photoexp.post_id == random_post_id]
189
-
190
- # Randomly select one edit from the dataset
191
- idx = random.randint(0, len(post_edits) - 1)
192
- sample = post_edits[idx]
193
-
194
- # Randomly select one entry from the matching photoexp entries
195
- if not matching_photoexp_entries.empty:
196
- random_photoexp_entry = matching_photoexp_entries.sample(n=1).iloc[0]
197
- additional_edited_image = random_photoexp_entry["edited_image"]
198
- # Add REDDIT_ prefix when using comment_id instead of model
199
- model_b = random_photoexp_entry.get("model")
200
- if model_b is None:
201
- model_b = f"REDDIT_{random_photoexp_entry['comment_id']}"
202
- else:
203
- additional_edited_image = None
204
- model_b = None
205
-
206
- # Randomly assign images to A and B
207
  if random.choice([True, False]):
208
- image_a = sample["edited_image"]
209
- model_a = sample["model"]
210
- image_b = additional_edited_image
 
 
211
  else:
212
- image_a = additional_edited_image
213
- model_a = model_b
214
- image_b = sample["edited_image"]
215
- model_b = sample["model"]
216
-
217
- print(f"Selected post_id: {random_post_id}")
218
- print(f"Selected edit from model: {sample['model']}")
 
219
 
220
  return {
221
  "post_id": sample["post_id"],
 
18
  api = HfApi(token=os.environ["HF_TOKEN"])
19
 
20
  RESULTS_BACKUP_REPO = "taesiri/PhotoEditBattleResults"
21
+ MAIN_DATASET_REPO = "taesiri/IERv2-BattlePairs"
22
  # Load the experimental dataset
23
+ dataset = load_dataset(MAIN_DATASET_REPO, split="train")
24
  dataset_post_ids = list(
25
  set(
26
+ load_dataset(MAIN_DATASET_REPO, columns=["post_id"], split="train")
 
 
27
  .to_pandas()
28
  .post_id.tolist()
29
  )
30
  )
31
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # Download existing data from hub
34
  def sync_with_hub():
35
  """
 
159
 
160
 
161
  def get_random_sample():
162
+ """Get a random sample from the dataset and randomly assign AI/human edits to A/B positions."""
163
+ # Randomly select an index from the dataset
164
+ idx = random.randint(0, len(dataset) - 1)
165
+ sample = dataset[idx]
 
 
 
 
 
 
166
 
167
+ # Randomly decide which image goes to position A and B
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  if random.choice([True, False]):
169
+ # AI edit is A, human edit is B
170
+ image_a = sample["ai_edited_image"]
171
+ image_b = sample["human_edited_image"]
172
+ model_a = sample["model"] # AI model name
173
+ model_b = "HUMAN" # Human edit identifier
174
  else:
175
+ # Human edit is A, AI edit is B
176
+ image_a = sample["human_edited_image"]
177
+ image_b = sample["ai_edited_image"]
178
+ model_a = "HUMAN" # Human edit identifier
179
+ model_b = sample["model"] # AI model name
180
+
181
+ print(f"Selected post_id: {sample['post_id']}")
182
+ print(f"AI model: {sample['model']}")
183
 
184
  return {
185
  "post_id": sample["post_id"],