taesiri commited on
Commit
bcec278
·
1 Parent(s): 8cc37b4
Files changed (3) hide show
  1. app.py +71 -16
  2. instructions/home.jpg +0 -0
  3. instructions/page2.jpg +0 -0
app.py CHANGED
@@ -166,15 +166,19 @@ def get_annotated_indices(username):
166
  """Get list of dataset indices already annotated by this user"""
167
  filename = "data/evaluation_results_exp.csv"
168
  if not os.path.exists(filename):
 
169
  return set()
170
 
171
  try:
172
  df = pd.read_csv(filename)
173
  if "dataset_idx" not in df.columns or "username" not in df.columns:
 
174
  return set()
175
  user_annotations = df[df["username"] == username]["dataset_idx"].tolist()
 
176
  return set(user_annotations)
177
  except:
 
178
  return set()
179
 
180
 
@@ -312,28 +316,79 @@ def select_verdict(verdict, state):
312
 
313
 
314
  def is_valid_email(email):
315
- """Validate email format using regex pattern"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
317
- return bool(re.match(pattern, email))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
 
320
  def handle_username_submit(email, current_page):
321
- """Handle email submission and advance to next page"""
322
- if not email or not is_valid_email(email.strip()):
323
- # Return current page and show error message
324
- gr.Warning("Please enter a valid email address (e.g., [email protected])")
 
 
 
 
 
 
 
 
 
 
 
 
325
  return (
326
- current_page, # stay on current page
327
- gr.update(value=email), # keep current email value
328
- gr.update(value=""), # clear debug
329
- None, # no username state
330
  )
331
- return (
332
- 2, # next page
333
- gr.update(value=""), # clear input
334
- gr.update(value=email), # update debug
335
- email, # update state
336
- )
337
 
338
 
339
  def initialize(username):
 
166
  """Get list of dataset indices already annotated by this user"""
167
  filename = "data/evaluation_results_exp.csv"
168
  if not os.path.exists(filename):
169
+ print(f"No annotations found for user {username} (file doesn't exist)")
170
  return set()
171
 
172
  try:
173
  df = pd.read_csv(filename)
174
  if "dataset_idx" not in df.columns or "username" not in df.columns:
175
+ print(f"No annotations found for user {username} (missing columns)")
176
  return set()
177
  user_annotations = df[df["username"] == username]["dataset_idx"].tolist()
178
+ print(f"User {username} has already processed {len(user_annotations)} posts")
179
  return set(user_annotations)
180
  except:
181
+ print(f"Error reading annotations for user {username}")
182
  return set()
183
 
184
 
 
316
 
317
 
318
  def is_valid_email(email):
319
+ """
320
+ Validate email format and content more strictly:
321
+ - Check basic email format
322
+ - Prevent common injection attempts
323
+ - Limit length
324
+ - Restrict to printable ASCII characters
325
+ """
326
+ if not email or not isinstance(email, str):
327
+ return False
328
+
329
+ # Check length limits
330
+ if len(email) > 254: # Maximum length per RFC 5321
331
+ return False
332
+
333
+ # Remove any whitespace
334
+ email = email.strip()
335
+
336
+ # Check for common injection characters
337
+ dangerous_chars = [";", '"', "'", ",", "\\", "\n", "\r", "\t"]
338
+ if any(char in email for char in dangerous_chars):
339
+ return False
340
+
341
+ # Ensure all characters are printable ASCII
342
+ if not all(32 <= ord(char) <= 126 for char in email):
343
+ return False
344
+
345
+ # Validate email format using comprehensive regex
346
  pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
347
+ if not re.match(pattern, email):
348
+ return False
349
+
350
+ # Additional checks for common patterns
351
+ if ".." in email: # No consecutive dots
352
+ return False
353
+ if email.count("@") != 1: # Exactly one @ symbol
354
+ return False
355
+
356
+ # Validate lengths of local and domain parts
357
+ local, domain = email.split("@")
358
+ if len(local) > 64 or len(domain) > 255: # RFC 5321 limits
359
+ return False
360
+
361
+ return True
362
 
363
 
364
  def handle_username_submit(email, current_page):
365
+ """Handle email submission with enhanced validation"""
366
+ try:
367
+ if not email:
368
+ gr.Warning("Please enter an email address")
369
+ return current_page, gr.update(value=email), gr.update(value=""), None
370
+
371
+ # Clean the input
372
+ email = str(email).strip()
373
+
374
+ if not is_valid_email(email):
375
+ gr.Warning("Please enter a valid email address (e.g., [email protected])")
376
+ return current_page, gr.update(value=email), gr.update(value=""), None
377
+
378
+ # Sanitize email for CSV storage
379
+ safe_email = email.replace('"', "").replace("'", "")
380
+
381
  return (
382
+ 2, # next page
383
+ gr.update(value=""), # clear input
384
+ gr.update(value=safe_email), # update debug
385
+ safe_email, # update state
386
  )
387
+
388
+ except Exception as e:
389
+ print(f"Error in handle_username_submit: {str(e)}")
390
+ gr.Warning("An error occurred. Please try again.")
391
+ return current_page, gr.update(value=""), gr.update(value=""), None
 
392
 
393
 
394
  def initialize(username):
instructions/home.jpg CHANGED
instructions/page2.jpg CHANGED