Spaces:
Sleeping
Sleeping
backup
Browse files- app.py +71 -16
- instructions/home.jpg +0 -0
- instructions/page2.jpg +0 -0
app.py
CHANGED
@@ -166,15 +166,19 @@ def get_annotated_indices(username):
|
|
166 |
"""Get list of dataset indices already annotated by this user"""
|
167 |
filename = "data/evaluation_results_exp.csv"
|
168 |
if not os.path.exists(filename):
|
|
|
169 |
return set()
|
170 |
|
171 |
try:
|
172 |
df = pd.read_csv(filename)
|
173 |
if "dataset_idx" not in df.columns or "username" not in df.columns:
|
|
|
174 |
return set()
|
175 |
user_annotations = df[df["username"] == username]["dataset_idx"].tolist()
|
|
|
176 |
return set(user_annotations)
|
177 |
except:
|
|
|
178 |
return set()
|
179 |
|
180 |
|
@@ -312,28 +316,79 @@ def select_verdict(verdict, state):
|
|
312 |
|
313 |
|
314 |
def is_valid_email(email):
|
315 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
|
317 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
|
320 |
def handle_username_submit(email, current_page):
|
321 |
-
"""Handle email submission
|
322 |
-
|
323 |
-
|
324 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
return (
|
326 |
-
|
327 |
-
gr.update(value=
|
328 |
-
gr.update(value=
|
329 |
-
|
330 |
)
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
gr.
|
335 |
-
|
336 |
-
)
|
337 |
|
338 |
|
339 |
def initialize(username):
|
|
|
166 |
"""Get list of dataset indices already annotated by this user"""
|
167 |
filename = "data/evaluation_results_exp.csv"
|
168 |
if not os.path.exists(filename):
|
169 |
+
print(f"No annotations found for user {username} (file doesn't exist)")
|
170 |
return set()
|
171 |
|
172 |
try:
|
173 |
df = pd.read_csv(filename)
|
174 |
if "dataset_idx" not in df.columns or "username" not in df.columns:
|
175 |
+
print(f"No annotations found for user {username} (missing columns)")
|
176 |
return set()
|
177 |
user_annotations = df[df["username"] == username]["dataset_idx"].tolist()
|
178 |
+
print(f"User {username} has already processed {len(user_annotations)} posts")
|
179 |
return set(user_annotations)
|
180 |
except:
|
181 |
+
print(f"Error reading annotations for user {username}")
|
182 |
return set()
|
183 |
|
184 |
|
|
|
316 |
|
317 |
|
318 |
def is_valid_email(email):
|
319 |
+
"""
|
320 |
+
Validate email format and content more strictly:
|
321 |
+
- Check basic email format
|
322 |
+
- Prevent common injection attempts
|
323 |
+
- Limit length
|
324 |
+
- Restrict to printable ASCII characters
|
325 |
+
"""
|
326 |
+
if not email or not isinstance(email, str):
|
327 |
+
return False
|
328 |
+
|
329 |
+
# Check length limits
|
330 |
+
if len(email) > 254: # Maximum length per RFC 5321
|
331 |
+
return False
|
332 |
+
|
333 |
+
# Remove any whitespace
|
334 |
+
email = email.strip()
|
335 |
+
|
336 |
+
# Check for common injection characters
|
337 |
+
dangerous_chars = [";", '"', "'", ",", "\\", "\n", "\r", "\t"]
|
338 |
+
if any(char in email for char in dangerous_chars):
|
339 |
+
return False
|
340 |
+
|
341 |
+
# Ensure all characters are printable ASCII
|
342 |
+
if not all(32 <= ord(char) <= 126 for char in email):
|
343 |
+
return False
|
344 |
+
|
345 |
+
# Validate email format using comprehensive regex
|
346 |
pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
|
347 |
+
if not re.match(pattern, email):
|
348 |
+
return False
|
349 |
+
|
350 |
+
# Additional checks for common patterns
|
351 |
+
if ".." in email: # No consecutive dots
|
352 |
+
return False
|
353 |
+
if email.count("@") != 1: # Exactly one @ symbol
|
354 |
+
return False
|
355 |
+
|
356 |
+
# Validate lengths of local and domain parts
|
357 |
+
local, domain = email.split("@")
|
358 |
+
if len(local) > 64 or len(domain) > 255: # RFC 5321 limits
|
359 |
+
return False
|
360 |
+
|
361 |
+
return True
|
362 |
|
363 |
|
364 |
def handle_username_submit(email, current_page):
|
365 |
+
"""Handle email submission with enhanced validation"""
|
366 |
+
try:
|
367 |
+
if not email:
|
368 |
+
gr.Warning("Please enter an email address")
|
369 |
+
return current_page, gr.update(value=email), gr.update(value=""), None
|
370 |
+
|
371 |
+
# Clean the input
|
372 |
+
email = str(email).strip()
|
373 |
+
|
374 |
+
if not is_valid_email(email):
|
375 |
+
gr.Warning("Please enter a valid email address (e.g., [email protected])")
|
376 |
+
return current_page, gr.update(value=email), gr.update(value=""), None
|
377 |
+
|
378 |
+
# Sanitize email for CSV storage
|
379 |
+
safe_email = email.replace('"', "").replace("'", "")
|
380 |
+
|
381 |
return (
|
382 |
+
2, # next page
|
383 |
+
gr.update(value=""), # clear input
|
384 |
+
gr.update(value=safe_email), # update debug
|
385 |
+
safe_email, # update state
|
386 |
)
|
387 |
+
|
388 |
+
except Exception as e:
|
389 |
+
print(f"Error in handle_username_submit: {str(e)}")
|
390 |
+
gr.Warning("An error occurred. Please try again.")
|
391 |
+
return current_page, gr.update(value=""), gr.update(value=""), None
|
|
|
392 |
|
393 |
|
394 |
def initialize(username):
|
instructions/home.jpg
CHANGED
![]() |
![]() |
instructions/page2.jpg
CHANGED
![]() |
![]() |