PhotoshopRequest-Arena

Sleeping

App Files Files Community

taesiri commited on Feb 23

Commit

047c657

1 Parent(s): ba04608

backup

Browse files

Files changed (1) hide show

app.py +21 -57

app.py CHANGED Viewed

@@ -18,32 +18,18 @@ import time  # Add this import at the top
 api = HfApi(token=os.environ["HF_TOKEN"])
 RESULTS_BACKUP_REPO = "taesiri/PhotoEditBattleResults"
 # Load the experimental dataset
-dataset = load_dataset("taesiri/IERv2-BattleResults_exp", split="train")
 dataset_post_ids = list(
     set(
-        load_dataset(
-            "taesiri/IERv2-BattleResults_exp", columns=["post_id"], split="train"
-        )
         .to_pandas()
         .post_id.tolist()
     )
 )
-photoexp = pd.read_csv("./photoexp_filtered.csv")
-valid_post_ids = set(photoexp.post_id.tolist())
-dataset = dataset.filter(
-    lambda xs: [x in valid_post_ids for x in xs["post_id"]],
-    batched=True,
-    batch_size=256,
-)
-print(f"Dataset size after filtering: {len(dataset)}")
 # Download existing data from hub
 def sync_with_hub():
     """
@@ -173,49 +159,27 @@ def save_evaluation(post_id, model_a, model_b, verdict, username, start_time, en
 def get_random_sample():
-    """Get a random sample by first selecting a post_id then picking random edits for that post."""
-    # First randomly select a post_id from valid posts
-    random_post_id = random.choice(list(valid_post_ids))
-    # Filter dataset for this post_id using batched processing
-    post_edits = dataset.filter(
-        lambda xs: [x == random_post_id for x in xs["post_id"]],
-        batched=True,
-        batch_size=256,
-    )
-    # Get matching photoexp entries for this post_id
-    matching_photoexp_entries = photoexp[photoexp.post_id == random_post_id]
-    # Randomly select one edit from the dataset
-    idx = random.randint(0, len(post_edits) - 1)
-    sample = post_edits[idx]
-    # Randomly select one entry from the matching photoexp entries
-    if not matching_photoexp_entries.empty:
-        random_photoexp_entry = matching_photoexp_entries.sample(n=1).iloc[0]
-        additional_edited_image = random_photoexp_entry["edited_image"]
-        # Add REDDIT_ prefix when using comment_id instead of model
-        model_b = random_photoexp_entry.get("model")
-        if model_b is None:
-            model_b = f"REDDIT_{random_photoexp_entry['comment_id']}"
-    else:
-        additional_edited_image = None
-        model_b = None
-    # Randomly assign images to A and B
     if random.choice([True, False]):
-        image_a = sample["edited_image"]
-        model_a = sample["model"]
-        image_b = additional_edited_image
     else:
-        image_a = additional_edited_image
-        model_a = model_b
-        image_b = sample["edited_image"]
-        model_b = sample["model"]
-    print(f"Selected post_id: {random_post_id}")
-    print(f"Selected edit from model: {sample['model']}")
     return {
         "post_id": sample["post_id"],

 api = HfApi(token=os.environ["HF_TOKEN"])
 RESULTS_BACKUP_REPO = "taesiri/PhotoEditBattleResults"
+MAIN_DATASET_REPO = "taesiri/IERv2-BattlePairs"
 # Load the experimental dataset
+dataset = load_dataset(MAIN_DATASET_REPO, split="train")
 dataset_post_ids = list(
     set(
+        load_dataset(MAIN_DATASET_REPO, columns=["post_id"], split="train")
         .to_pandas()
         .post_id.tolist()
     )
 )
 # Download existing data from hub
 def sync_with_hub():
     """
 def get_random_sample():
+    """Get a random sample from the dataset and randomly assign AI/human edits to A/B positions."""
+    # Randomly select an index from the dataset
+    idx = random.randint(0, len(dataset) - 1)
+    sample = dataset[idx]
+    # Randomly decide which image goes to position A and B
     if random.choice([True, False]):
+        # AI edit is A, human edit is B
+        image_a = sample["ai_edited_image"]
+        image_b = sample["human_edited_image"]
+        model_a = sample["model"]  # AI model name
+        model_b = "HUMAN"  # Human edit identifier
     else:
+        # Human edit is A, AI edit is B
+        image_a = sample["human_edited_image"]
+        image_b = sample["ai_edited_image"]
+        model_a = "HUMAN"  # Human edit identifier
+        model_b = sample["model"]  # AI model name
+    print(f"Selected post_id: {sample['post_id']}")
+    print(f"AI model: {sample['model']}")
     return {
         "post_id": sample["post_id"],