taesiri commited on
Commit
8cc37b4
·
1 Parent(s): 62f463b
Files changed (1) hide show
  1. app.py +40 -41
app.py CHANGED
@@ -110,12 +110,13 @@ scheduler = CommitScheduler(
110
  )
111
 
112
 
113
- def save_evaluation(post_id, model_a, model_b, verdict, username, start_time, end_time):
114
- """Save evaluation results to CSV including timing and username information."""
 
 
115
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
116
- duration = end_time - start_time # Calculate duration in seconds
117
 
118
- # Create data directory if it doesn't exist
119
  os.makedirs("data", exist_ok=True)
120
  filename = "data/evaluation_results_exp.csv"
121
 
@@ -134,6 +135,7 @@ def save_evaluation(post_id, model_a, model_b, verdict, username, start_time, en
134
  "start_time",
135
  "end_time",
136
  "duration_seconds",
 
137
  ]
138
  )
139
 
@@ -151,6 +153,7 @@ def save_evaluation(post_id, model_a, model_b, verdict, username, start_time, en
151
  start_time,
152
  end_time,
153
  duration,
 
154
  ]
155
  )
156
 
@@ -159,57 +162,52 @@ def save_evaluation(post_id, model_a, model_b, verdict, username, start_time, en
159
  )
160
 
161
 
162
- def get_previously_annotated_posts(username):
163
- """Get list of post_ids already annotated by this username."""
164
  filename = "data/evaluation_results_exp.csv"
165
  if not os.path.exists(filename):
166
  return set()
167
 
168
- df = pd.read_csv(filename)
169
- user_annotations = df[df["username"] == username]["post_id"].unique()
170
- return set(user_annotations)
 
 
 
 
 
171
 
172
 
173
  def get_random_sample(username):
174
- """Get a random sample from the dataset, avoiding previously annotated items."""
175
- # Get previously annotated posts for this user
176
- annotated_posts = get_previously_annotated_posts(username)
177
-
178
- # Get available post_ids (not yet annotated by this user)
179
- available_posts = set(dataset_post_ids) - annotated_posts
180
-
181
- if not available_posts:
182
- # If user has annotated all posts, allow repeats
183
- available_posts = set(dataset_post_ids)
184
- print(f"User {username} has annotated all posts, allowing repeats")
185
-
186
- # Select random post_id from available ones
187
- selected_post_id = random.choice(list(available_posts))
188
-
189
- # Find the corresponding dataset index
190
- for idx, item in enumerate(dataset):
191
- if item["post_id"] == selected_post_id:
192
- sample = item
193
- break
194
- else:
195
- raise ValueError(f"Could not find post_id {selected_post_id} in dataset")
196
 
197
  # Randomly decide which image goes to position A and B
198
  if random.choice([True, False]):
199
  # AI edit is A, human edit is B
200
  image_a = sample["ai_edited_image"]
201
  image_b = sample["human_edited_image"]
202
- model_a = sample["model"] # AI model name
203
- model_b = "HUMAN" # Human edit identifier
204
  else:
205
  # Human edit is A, AI edit is B
206
  image_a = sample["human_edited_image"]
207
  image_b = sample["ai_edited_image"]
208
- model_a = "HUMAN" # Human edit identifier
209
- model_b = sample["model"] # AI model name
210
-
211
- print(f"Selected post_id: {sample['post_id']}")
212
- print(f"AI model: {sample['model']}")
213
 
214
  return {
215
  "post_id": sample["post_id"],
@@ -222,6 +220,7 @@ def get_random_sample(username):
222
  "image_b": image_b,
223
  "model_a": model_a,
224
  "model_b": model_b,
 
225
  }
226
 
227
 
@@ -260,11 +259,11 @@ def evaluate(verdict, state):
260
  state["username"],
261
  state["start_time"],
262
  end_time,
 
263
  )
264
 
265
- # Get next sample, passing username to avoid repeats
266
  next_sample = get_random_sample(state["username"])
267
-
268
  # Preserve username in state and set new start time
269
  next_state = next_sample.copy()
270
  next_state["username"] = state["username"]
@@ -339,7 +338,7 @@ def handle_username_submit(email, current_page):
339
 
340
  def initialize(username):
341
  """Initialize the interface with first sample"""
342
- sample = get_random_sample(username) # Pass username to avoid repeats
343
  # Create state with username and start time included
344
  state = sample.copy()
345
  state["username"] = username
 
110
  )
111
 
112
 
113
+ def save_evaluation(
114
+ post_id, model_a, model_b, verdict, username, start_time, end_time, dataset_idx
115
+ ):
116
+ """Save evaluation results to CSV including timing, username and dataset index information."""
117
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
118
+ duration = end_time - start_time
119
 
 
120
  os.makedirs("data", exist_ok=True)
121
  filename = "data/evaluation_results_exp.csv"
122
 
 
135
  "start_time",
136
  "end_time",
137
  "duration_seconds",
138
+ "dataset_idx",
139
  ]
140
  )
141
 
 
153
  start_time,
154
  end_time,
155
  duration,
156
+ dataset_idx,
157
  ]
158
  )
159
 
 
162
  )
163
 
164
 
165
+ def get_annotated_indices(username):
166
+ """Get list of dataset indices already annotated by this user"""
167
  filename = "data/evaluation_results_exp.csv"
168
  if not os.path.exists(filename):
169
  return set()
170
 
171
+ try:
172
+ df = pd.read_csv(filename)
173
+ if "dataset_idx" not in df.columns or "username" not in df.columns:
174
+ return set()
175
+ user_annotations = df[df["username"] == username]["dataset_idx"].tolist()
176
+ return set(user_annotations)
177
+ except:
178
+ return set()
179
 
180
 
181
  def get_random_sample(username):
182
+ """Get a random sample excluding previously annotated items"""
183
+ # Get indices already annotated by this user
184
+ annotated_indices = get_annotated_indices(username)
185
+
186
+ # Get all valid indices that haven't been annotated
187
+ all_indices = set(range(len(dataset)))
188
+ available_indices = list(all_indices - annotated_indices)
189
+
190
+ if not available_indices:
191
+ # If user has annotated all items, allow repeats
192
+ available_indices = list(all_indices)
193
+
194
+ # Randomly select from available indices
195
+ idx = random.choice(available_indices)
196
+ sample = dataset[idx]
 
 
 
 
 
 
 
197
 
198
  # Randomly decide which image goes to position A and B
199
  if random.choice([True, False]):
200
  # AI edit is A, human edit is B
201
  image_a = sample["ai_edited_image"]
202
  image_b = sample["human_edited_image"]
203
+ model_a = sample["model"]
204
+ model_b = "HUMAN"
205
  else:
206
  # Human edit is A, AI edit is B
207
  image_a = sample["human_edited_image"]
208
  image_b = sample["ai_edited_image"]
209
+ model_a = "HUMAN"
210
+ model_b = sample["model"]
 
 
 
211
 
212
  return {
213
  "post_id": sample["post_id"],
 
220
  "image_b": image_b,
221
  "model_a": model_a,
222
  "model_b": model_b,
223
+ "dataset_idx": idx, # Include the dataset index in the return
224
  }
225
 
226
 
 
259
  state["username"],
260
  state["start_time"],
261
  end_time,
262
+ state["dataset_idx"],
263
  )
264
 
265
+ # Get next sample using username to avoid repeats
266
  next_sample = get_random_sample(state["username"])
 
267
  # Preserve username in state and set new start time
268
  next_state = next_sample.copy()
269
  next_state["username"] = state["username"]
 
338
 
339
  def initialize(username):
340
  """Initialize the interface with first sample"""
341
+ sample = get_random_sample(username)
342
  # Create state with username and start time included
343
  state = sample.copy()
344
  state["username"] = username