Spaces:
Sleeping
Sleeping
backup
Browse files
app.py
CHANGED
@@ -18,32 +18,18 @@ import time # Add this import at the top
|
|
18 |
api = HfApi(token=os.environ["HF_TOKEN"])
|
19 |
|
20 |
RESULTS_BACKUP_REPO = "taesiri/PhotoEditBattleResults"
|
21 |
-
|
22 |
# Load the experimental dataset
|
23 |
-
dataset = load_dataset(
|
24 |
dataset_post_ids = list(
|
25 |
set(
|
26 |
-
load_dataset(
|
27 |
-
"taesiri/IERv2-BattleResults_exp", columns=["post_id"], split="train"
|
28 |
-
)
|
29 |
.to_pandas()
|
30 |
.post_id.tolist()
|
31 |
)
|
32 |
)
|
33 |
|
34 |
|
35 |
-
photoexp = pd.read_csv("./photoexp_filtered.csv")
|
36 |
-
valid_post_ids = set(photoexp.post_id.tolist())
|
37 |
-
|
38 |
-
dataset = dataset.filter(
|
39 |
-
lambda xs: [x in valid_post_ids for x in xs["post_id"]],
|
40 |
-
batched=True,
|
41 |
-
batch_size=256,
|
42 |
-
)
|
43 |
-
|
44 |
-
print(f"Dataset size after filtering: {len(dataset)}")
|
45 |
-
|
46 |
-
|
47 |
# Download existing data from hub
|
48 |
def sync_with_hub():
|
49 |
"""
|
@@ -173,49 +159,27 @@ def save_evaluation(post_id, model_a, model_b, verdict, username, start_time, en
|
|
173 |
|
174 |
|
175 |
def get_random_sample():
|
176 |
-
"""Get a random sample
|
177 |
-
#
|
178 |
-
|
179 |
-
|
180 |
-
# Filter dataset for this post_id using batched processing
|
181 |
-
post_edits = dataset.filter(
|
182 |
-
lambda xs: [x == random_post_id for x in xs["post_id"]],
|
183 |
-
batched=True,
|
184 |
-
batch_size=256,
|
185 |
-
)
|
186 |
|
187 |
-
#
|
188 |
-
matching_photoexp_entries = photoexp[photoexp.post_id == random_post_id]
|
189 |
-
|
190 |
-
# Randomly select one edit from the dataset
|
191 |
-
idx = random.randint(0, len(post_edits) - 1)
|
192 |
-
sample = post_edits[idx]
|
193 |
-
|
194 |
-
# Randomly select one entry from the matching photoexp entries
|
195 |
-
if not matching_photoexp_entries.empty:
|
196 |
-
random_photoexp_entry = matching_photoexp_entries.sample(n=1).iloc[0]
|
197 |
-
additional_edited_image = random_photoexp_entry["edited_image"]
|
198 |
-
# Add REDDIT_ prefix when using comment_id instead of model
|
199 |
-
model_b = random_photoexp_entry.get("model")
|
200 |
-
if model_b is None:
|
201 |
-
model_b = f"REDDIT_{random_photoexp_entry['comment_id']}"
|
202 |
-
else:
|
203 |
-
additional_edited_image = None
|
204 |
-
model_b = None
|
205 |
-
|
206 |
-
# Randomly assign images to A and B
|
207 |
if random.choice([True, False]):
|
208 |
-
|
209 |
-
|
210 |
-
image_b =
|
|
|
|
|
211 |
else:
|
212 |
-
|
213 |
-
|
214 |
-
image_b = sample["
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
print(f"Selected
|
|
|
219 |
|
220 |
return {
|
221 |
"post_id": sample["post_id"],
|
|
|
18 |
api = HfApi(token=os.environ["HF_TOKEN"])
|
19 |
|
20 |
RESULTS_BACKUP_REPO = "taesiri/PhotoEditBattleResults"
|
21 |
+
MAIN_DATASET_REPO = "taesiri/IERv2-BattlePairs"
|
22 |
# Load the experimental dataset
|
23 |
+
dataset = load_dataset(MAIN_DATASET_REPO, split="train")
|
24 |
dataset_post_ids = list(
|
25 |
set(
|
26 |
+
load_dataset(MAIN_DATASET_REPO, columns=["post_id"], split="train")
|
|
|
|
|
27 |
.to_pandas()
|
28 |
.post_id.tolist()
|
29 |
)
|
30 |
)
|
31 |
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
# Download existing data from hub
|
34 |
def sync_with_hub():
|
35 |
"""
|
|
|
159 |
|
160 |
|
161 |
def get_random_sample():
|
162 |
+
"""Get a random sample from the dataset and randomly assign AI/human edits to A/B positions."""
|
163 |
+
# Randomly select an index from the dataset
|
164 |
+
idx = random.randint(0, len(dataset) - 1)
|
165 |
+
sample = dataset[idx]
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
+
# Randomly decide which image goes to position A and B
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
if random.choice([True, False]):
|
169 |
+
# AI edit is A, human edit is B
|
170 |
+
image_a = sample["ai_edited_image"]
|
171 |
+
image_b = sample["human_edited_image"]
|
172 |
+
model_a = sample["model"] # AI model name
|
173 |
+
model_b = "HUMAN" # Human edit identifier
|
174 |
else:
|
175 |
+
# Human edit is A, AI edit is B
|
176 |
+
image_a = sample["human_edited_image"]
|
177 |
+
image_b = sample["ai_edited_image"]
|
178 |
+
model_a = "HUMAN" # Human edit identifier
|
179 |
+
model_b = sample["model"] # AI model name
|
180 |
+
|
181 |
+
print(f"Selected post_id: {sample['post_id']}")
|
182 |
+
print(f"AI model: {sample['model']}")
|
183 |
|
184 |
return {
|
185 |
"post_id": sample["post_id"],
|