ApsidalSolid4 commited on
Commit
41365d5
·
verified ·
1 Parent(s): 6fc3054

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -208
app.py CHANGED
@@ -12,8 +12,8 @@ from concurrent.futures import ThreadPoolExecutor
12
  from functools import partial
13
  import time
14
  import csv
15
- import os
16
  from datetime import datetime
 
17
  # Configure logging
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
@@ -27,37 +27,6 @@ CONFIDENCE_THRESHOLD = 0.65
27
  BATCH_SIZE = 8 # Reduced batch size for CPU
28
  MAX_WORKERS = 4 # Number of worker threads for processing
29
 
30
-
31
- def log_prediction_data(input_text, word_count, prediction, confidence, execution_time, mode):
32
- """Log prediction data to a CSV file in the /tmp directory."""
33
- # Define the CSV file path
34
- csv_path = "/tmp/prediction_logs.csv"
35
-
36
- # Check if file exists to determine if we need to write headers
37
- file_exists = os.path.isfile(csv_path)
38
-
39
- try:
40
- with open(csv_path, 'a', newline='', encoding='utf-8') as f:
41
- writer = csv.writer(f)
42
-
43
- # Write headers if the file is newly created
44
- if not file_exists:
45
- writer.writerow(["timestamp", "word_count", "prediction", "confidence", "execution_time_ms", "analysis_mode", "full_text"])
46
-
47
- # Clean up the input text for CSV storage (replace newlines with spaces)
48
- cleaned_text = input_text.replace("\n", " ")
49
-
50
- # Write the data row with the full text
51
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
52
- writer.writerow([timestamp, word_count, prediction, f"{confidence:.2f}", f"{execution_time:.2f}", mode, cleaned_text])
53
-
54
- logger.info(f"Successfully logged prediction data to {csv_path}")
55
- return True
56
- except Exception as e:
57
- logger.error(f"Error logging prediction data: {str(e)}")
58
- return False
59
-
60
-
61
  class TextWindowProcessor:
62
  def __init__(self):
63
  try:
@@ -210,100 +179,6 @@ class TextClassifier:
210
  'num_windows': len(predictions)
211
  }
212
 
213
- # def detailed_scan(self, text: str) -> Dict:
214
- # """Original prediction method with modified window handling"""
215
- # if self.model is None or self.tokenizer is None:
216
- # self.load_model()
217
-
218
- # self.model.eval()
219
- # sentences = self.processor.split_into_sentences(text)
220
- # if not sentences:
221
- # return {}
222
-
223
- # # Create centered windows for each sentence
224
- # windows, window_sentence_indices = self.processor.create_centered_windows(sentences, WINDOW_SIZE)
225
-
226
- # # Track scores for each sentence
227
- # sentence_appearances = {i: 0 for i in range(len(sentences))}
228
- # sentence_scores = {i: {'human_prob': 0.0, 'ai_prob': 0.0} for i in range(len(sentences))}
229
-
230
- # # Process windows in batches
231
- # batch_size = 16
232
- # for i in range(0, len(windows), batch_size):
233
- # batch_windows = windows[i:i + batch_size]
234
- # batch_indices = window_sentence_indices[i:i + batch_size]
235
-
236
- # inputs = self.tokenizer(
237
- # batch_windows,
238
- # truncation=True,
239
- # padding=True,
240
- # max_length=MAX_LENGTH,
241
- # return_tensors="pt"
242
- # ).to(self.device)
243
-
244
- # with torch.no_grad():
245
- # outputs = self.model(**inputs)
246
- # probs = F.softmax(outputs.logits, dim=-1)
247
-
248
- # # Attribute predictions more carefully
249
- # for window_idx, indices in enumerate(batch_indices):
250
- # center_idx = len(indices) // 2
251
- # center_weight = 0.7 # Higher weight for center sentence
252
- # edge_weight = 0.3 / (len(indices) - 1) # Distribute remaining weight
253
-
254
- # for pos, sent_idx in enumerate(indices):
255
- # # Apply higher weight to center sentence
256
- # weight = center_weight if pos == center_idx else edge_weight
257
- # sentence_appearances[sent_idx] += weight
258
- # sentence_scores[sent_idx]['human_prob'] += weight * probs[window_idx][1].item()
259
- # sentence_scores[sent_idx]['ai_prob'] += weight * probs[window_idx][0].item()
260
-
261
- # del inputs, outputs, probs
262
- # if torch.cuda.is_available():
263
- # torch.cuda.empty_cache()
264
-
265
- # # Calculate final predictions
266
- # sentence_predictions = []
267
- # for i in range(len(sentences)):
268
- # if sentence_appearances[i] > 0:
269
- # human_prob = sentence_scores[i]['human_prob'] / sentence_appearances[i]
270
- # ai_prob = sentence_scores[i]['ai_prob'] / sentence_appearances[i]
271
-
272
- # # Only apply minimal smoothing at prediction boundaries
273
- # if i > 0 and i < len(sentences) - 1:
274
- # prev_human = sentence_scores[i-1]['human_prob'] / sentence_appearances[i-1]
275
- # prev_ai = sentence_scores[i-1]['ai_prob'] / sentence_appearances[i-1]
276
- # next_human = sentence_scores[i+1]['human_prob'] / sentence_appearances[i+1]
277
- # next_ai = sentence_scores[i+1]['ai_prob'] / sentence_appearances[i+1]
278
-
279
- # # Check if we're at a prediction boundary
280
- # current_pred = 'human' if human_prob > ai_prob else 'ai'
281
- # prev_pred = 'human' if prev_human > prev_ai else 'ai'
282
- # next_pred = 'human' if next_human > next_ai else 'ai'
283
-
284
- # if current_pred != prev_pred or current_pred != next_pred:
285
- # # Small adjustment at boundaries
286
- # smooth_factor = 0.1
287
- # human_prob = (human_prob * (1 - smooth_factor) +
288
- # (prev_human + next_human) * smooth_factor / 2)
289
- # ai_prob = (ai_prob * (1 - smooth_factor) +
290
- # (prev_ai + next_ai) * smooth_factor / 2)
291
-
292
- # sentence_predictions.append({
293
- # 'sentence': sentences[i],
294
- # 'human_prob': human_prob,
295
- # 'ai_prob': ai_prob,
296
- # 'prediction': 'human' if human_prob > ai_prob else 'ai',
297
- # 'confidence': max(human_prob, ai_prob)
298
- # })
299
-
300
- # return {
301
- # 'sentence_predictions': sentence_predictions,
302
- # 'highlighted_text': self.format_predictions_html(sentence_predictions),
303
- # 'full_text': text,
304
- # 'overall_prediction': self.aggregate_predictions(sentence_predictions)
305
- # }
306
-
307
  def detailed_scan(self, text: str) -> Dict:
308
  """Perform a detailed scan with improved sentence-level analysis."""
309
  # Clean up trailing whitespace
@@ -454,88 +329,124 @@ class TextClassifier:
454
  'num_sentences': num_sentences
455
  }
456
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
 
458
- def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
459
- """Analyze text using specified mode and return formatted results."""
460
- # Start timing
461
- start_time = time.time()
462
-
463
- # Count words in the text
464
- word_count = len(text.split())
465
-
466
- # If text is less than 200 words and detailed mode is selected, switch to quick mode
467
- original_mode = mode
468
- if word_count < 200 and mode == "detailed":
469
- mode = "quick"
470
-
471
- if mode == "quick":
472
- result = classifier.quick_scan(text)
473
-
474
- quick_analysis = f"""
475
- PREDICTION: {result['prediction'].upper()}
476
- Confidence: {result['confidence']*100:.1f}%
477
- Windows analyzed: {result['num_windows']}
478
- """
479
-
480
- # Add note if mode was switched
481
- if original_mode == "detailed":
482
- quick_analysis += f"\n\nNote: Switched to quick mode because text contains only {word_count} words. Minimum 200 words required for detailed analysis."
483
-
484
- # Calculate execution time in milliseconds
485
- execution_time = (time.time() - start_time) * 1000
486
-
487
- # Log the prediction data
488
- log_prediction_data(
489
- input_text=text,
490
- word_count=word_count,
491
- prediction=result['prediction'],
492
- confidence=result['confidence'],
493
- execution_time=execution_time,
494
- mode=original_mode
495
- )
496
-
497
- return (
498
- text, # No highlighting in quick mode
499
- "Quick scan mode - no sentence-level analysis available",
500
- quick_analysis
501
- )
502
- else:
503
- analysis = classifier.detailed_scan(text)
504
-
505
- detailed_analysis = []
506
- for pred in analysis['sentence_predictions']:
507
- confidence = pred['confidence'] * 100
508
- detailed_analysis.append(f"Sentence: {pred['sentence']}")
509
- detailed_analysis.append(f"Prediction: {pred['prediction'].upper()}")
510
- detailed_analysis.append(f"Confidence: {confidence:.1f}%")
511
- detailed_analysis.append("-" * 50)
512
-
513
- final_pred = analysis['overall_prediction']
514
- overall_result = f"""
515
- FINAL PREDICTION: {final_pred['prediction'].upper()}
516
- Overall confidence: {final_pred['confidence']*100:.1f}%
517
- Number of sentences analyzed: {final_pred['num_sentences']}
518
- """
519
-
520
- # Calculate execution time in milliseconds
521
- execution_time = (time.time() - start_time) * 1000
522
-
523
- # Log the prediction data
524
- log_prediction_data(
525
- input_text=text,
526
- word_count=word_count,
527
- prediction=final_pred['prediction'],
528
- confidence=final_pred['confidence'],
529
- execution_time=execution_time,
530
- mode=original_mode
531
- )
532
-
533
- return (
534
- analysis['highlighted_text'],
535
- "\n".join(detailed_analysis),
536
- overall_result
537
- )
538
 
 
 
 
 
 
 
 
 
539
 
540
  # Initialize the classifier globally
541
  classifier = TextClassifier()
@@ -567,8 +478,17 @@ demo = gr.Interface(
567
  flagging_mode="never"
568
  )
569
 
570
- app = demo.app
571
- app.add_middleware(
 
 
 
 
 
 
 
 
 
572
  CORSMiddleware,
573
  allow_origins=["*"], # For development
574
  allow_credentials=True,
 
12
  from functools import partial
13
  import time
14
  import csv
 
15
  from datetime import datetime
16
+
17
  # Configure logging
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
 
27
  BATCH_SIZE = 8 # Reduced batch size for CPU
28
  MAX_WORKERS = 4 # Number of worker threads for processing
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  class TextWindowProcessor:
31
  def __init__(self):
32
  try:
 
179
  'num_windows': len(predictions)
180
  }
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  def detailed_scan(self, text: str) -> Dict:
183
  """Perform a detailed scan with improved sentence-level analysis."""
184
  # Clean up trailing whitespace
 
329
  'num_sentences': num_sentences
330
  }
331
 
332
+ def log_prediction_data(input_text, word_count, prediction, confidence, execution_time, mode):
333
+ """Log prediction data to a CSV file in the /tmp directory."""
334
+ # Define the CSV file path
335
+ csv_path = "/tmp/prediction_logs.csv"
336
+
337
+ # Check if file exists to determine if we need to write headers
338
+ file_exists = os.path.isfile(csv_path)
339
+
340
+ try:
341
+ with open(csv_path, 'a', newline='', encoding='utf-8') as f:
342
+ writer = csv.writer(f)
343
+
344
+ # Write headers if the file is newly created
345
+ if not file_exists:
346
+ writer.writerow(["timestamp", "word_count", "prediction", "confidence", "execution_time_ms", "analysis_mode", "full_text"])
347
+
348
+ # Clean up the input text for CSV storage (replace newlines with spaces)
349
+ cleaned_text = input_text.replace("\n", " ")
350
+
351
+ # Write the data row with the full text
352
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
353
+ writer.writerow([timestamp, word_count, prediction, f"{confidence:.2f}", f"{execution_time:.2f}", mode, cleaned_text])
354
+
355
+ logger.info(f"Successfully logged prediction data to {csv_path}")
356
+ return True
357
+ except Exception as e:
358
+ logger.error(f"Error logging prediction data: {str(e)}")
359
+ return False
360
 
361
+ def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
362
+ """Analyze text using specified mode and return formatted results."""
363
+ # Start timing
364
+ start_time = time.time()
365
+
366
+ # Count words in the text
367
+ word_count = len(text.split())
368
+
369
+ # If text is less than 200 words and detailed mode is selected, switch to quick mode
370
+ original_mode = mode
371
+ if word_count < 200 and mode == "detailed":
372
+ mode = "quick"
373
+
374
+ if mode == "quick":
375
+ result = classifier.quick_scan(text)
376
+
377
+ quick_analysis = f"""
378
+ PREDICTION: {result['prediction'].upper()}
379
+ Confidence: {result['confidence']*100:.1f}%
380
+ Windows analyzed: {result['num_windows']}
381
+ """
382
+
383
+ # Add note if mode was switched
384
+ if original_mode == "detailed":
385
+ quick_analysis += f"\n\nNote: Switched to quick mode because text contains only {word_count} words. Minimum 200 words required for detailed analysis."
386
+
387
+ # Calculate execution time in milliseconds
388
+ execution_time = (time.time() - start_time) * 1000
389
+
390
+ # Log the prediction data
391
+ log_prediction_data(
392
+ input_text=text,
393
+ word_count=word_count,
394
+ prediction=result['prediction'],
395
+ confidence=result['confidence'],
396
+ execution_time=execution_time,
397
+ mode=original_mode
398
+ )
399
+
400
+ return (
401
+ text, # No highlighting in quick mode
402
+ "Quick scan mode - no sentence-level analysis available",
403
+ quick_analysis
404
+ )
405
+ else:
406
+ analysis = classifier.detailed_scan(text)
407
+
408
+ detailed_analysis = []
409
+ for pred in analysis['sentence_predictions']:
410
+ confidence = pred['confidence'] * 100
411
+ detailed_analysis.append(f"Sentence: {pred['sentence']}")
412
+ detailed_analysis.append(f"Prediction: {pred['prediction'].upper()}")
413
+ detailed_analysis.append(f"Confidence: {confidence:.1f}%")
414
+ detailed_analysis.append("-" * 50)
415
+
416
+ final_pred = analysis['overall_prediction']
417
+ overall_result = f"""
418
+ FINAL PREDICTION: {final_pred['prediction'].upper()}
419
+ Overall confidence: {final_pred['confidence']*100:.1f}%
420
+ Number of sentences analyzed: {final_pred['num_sentences']}
421
+ """
422
+
423
+ # Calculate execution time in milliseconds
424
+ execution_time = (time.time() - start_time) * 1000
425
+
426
+ # Log the prediction data
427
+ log_prediction_data(
428
+ input_text=text,
429
+ word_count=word_count,
430
+ prediction=final_pred['prediction'],
431
+ confidence=final_pred['confidence'],
432
+ execution_time=execution_time,
433
+ mode=original_mode
434
+ )
435
+
436
+ return (
437
+ analysis['highlighted_text'],
438
+ "\n".join(detailed_analysis),
439
+ overall_result
440
+ )
441
 
442
+ # Add a function to download the logs
443
+ def download_logs():
444
+ log_path = "/tmp/prediction_logs.csv"
445
+ if os.path.exists(log_path):
446
+ with open(log_path, 'r', encoding='utf-8') as f:
447
+ content = f.read()
448
+ return content
449
+ return "No logs found."
450
 
451
  # Initialize the classifier globally
452
  classifier = TextClassifier()
 
478
  flagging_mode="never"
479
  )
480
 
481
+ # Add admin panel for log access (only visible to space owners)
482
+ with gr.Blocks() as admin_interface:
483
+ gr.Markdown("## Admin Panel - Data Logs")
484
+ download_button = gr.Button("Download Logs")
485
+ log_output = gr.File(label="Prediction Logs")
486
+ download_button.click(fn=download_logs, outputs=log_output)
487
+
488
+ # Combine interfaces
489
+ app = gr.TabbedInterface([demo, admin_interface], ["AI Text Detector", "Admin"])
490
+
491
+ app.app.add_middleware(
492
  CORSMiddleware,
493
  allow_origins=["*"], # For development
494
  allow_credentials=True,