ApsidalSolid4 commited on
Commit
6fc3054
·
verified ·
1 Parent(s): 721ce5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -52
app.py CHANGED
@@ -10,7 +10,10 @@ import gradio as gr
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from concurrent.futures import ThreadPoolExecutor
12
  from functools import partial
13
-
 
 
 
14
  # Configure logging
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
@@ -24,6 +27,37 @@ CONFIDENCE_THRESHOLD = 0.65
24
  BATCH_SIZE = 8 # Reduced batch size for CPU
25
  MAX_WORKERS = 4 # Number of worker threads for processing
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  class TextWindowProcessor:
28
  def __init__(self):
29
  try:
@@ -420,57 +454,88 @@ class TextClassifier:
420
  'num_sentences': num_sentences
421
  }
422
 
423
- def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
424
- """Analyze text using specified mode and return formatted results."""
425
- # Count words in the text
426
- word_count = len(text.split())
427
-
428
- # If text is less than 200 words and detailed mode is selected, switch to quick mode
429
- original_mode = mode
430
- if word_count < 200 and mode == "detailed":
431
- mode = "quick"
432
-
433
- if mode == "quick":
434
- result = classifier.quick_scan(text)
435
-
436
- quick_analysis = f"""
437
- PREDICTION: {result['prediction'].upper()}
438
- Confidence: {result['confidence']*100:.1f}%
439
- Windows analyzed: {result['num_windows']}
440
- """
441
-
442
- # Add note if mode was switched
443
- if original_mode == "detailed":
444
- quick_analysis += f"\n\nNote: Switched to quick mode because text contains only {word_count} words. Minimum 200 words required for detailed analysis."
445
-
446
- return (
447
- text, # No highlighting in quick mode
448
- "Quick scan mode - no sentence-level analysis available",
449
- quick_analysis
450
- )
451
- else:
452
- analysis = classifier.detailed_scan(text)
453
-
454
- detailed_analysis = []
455
- for pred in analysis['sentence_predictions']:
456
- confidence = pred['confidence'] * 100
457
- detailed_analysis.append(f"Sentence: {pred['sentence']}")
458
- detailed_analysis.append(f"Prediction: {pred['prediction'].upper()}")
459
- detailed_analysis.append(f"Confidence: {confidence:.1f}%")
460
- detailed_analysis.append("-" * 50)
461
-
462
- final_pred = analysis['overall_prediction']
463
- overall_result = f"""
464
- FINAL PREDICTION: {final_pred['prediction'].upper()}
465
- Overall confidence: {final_pred['confidence']*100:.1f}%
466
- Number of sentences analyzed: {final_pred['num_sentences']}
467
- """
468
-
469
- return (
470
- analysis['highlighted_text'],
471
- "\n".join(detailed_analysis),
472
- overall_result
473
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
 
475
  # Initialize the classifier globally
476
  classifier = TextClassifier()
 
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from concurrent.futures import ThreadPoolExecutor
12
  from functools import partial
13
+ import time
14
+ import csv
15
+ import os
16
+ from datetime import datetime
17
  # Configure logging
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
 
27
  BATCH_SIZE = 8 # Reduced batch size for CPU
28
  MAX_WORKERS = 4 # Number of worker threads for processing
29
 
30
+
31
+ def log_prediction_data(input_text, word_count, prediction, confidence, execution_time, mode):
32
+ """Log prediction data to a CSV file in the /tmp directory."""
33
+ # Define the CSV file path
34
+ csv_path = "/tmp/prediction_logs.csv"
35
+
36
+ # Check if file exists to determine if we need to write headers
37
+ file_exists = os.path.isfile(csv_path)
38
+
39
+ try:
40
+ with open(csv_path, 'a', newline='', encoding='utf-8') as f:
41
+ writer = csv.writer(f)
42
+
43
+ # Write headers if the file is newly created
44
+ if not file_exists:
45
+ writer.writerow(["timestamp", "word_count", "prediction", "confidence", "execution_time_ms", "analysis_mode", "full_text"])
46
+
47
+ # Clean up the input text for CSV storage (replace newlines with spaces)
48
+ cleaned_text = input_text.replace("\n", " ")
49
+
50
+ # Write the data row with the full text
51
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
52
+ writer.writerow([timestamp, word_count, prediction, f"{confidence:.2f}", f"{execution_time:.2f}", mode, cleaned_text])
53
+
54
+ logger.info(f"Successfully logged prediction data to {csv_path}")
55
+ return True
56
+ except Exception as e:
57
+ logger.error(f"Error logging prediction data: {str(e)}")
58
+ return False
59
+
60
+
61
  class TextWindowProcessor:
62
  def __init__(self):
63
  try:
 
454
  'num_sentences': num_sentences
455
  }
456
 
457
+
458
+ def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
459
+ """Analyze text using specified mode and return formatted results."""
460
+ # Start timing
461
+ start_time = time.time()
462
+
463
+ # Count words in the text
464
+ word_count = len(text.split())
465
+
466
+ # If text is less than 200 words and detailed mode is selected, switch to quick mode
467
+ original_mode = mode
468
+ if word_count < 200 and mode == "detailed":
469
+ mode = "quick"
470
+
471
+ if mode == "quick":
472
+ result = classifier.quick_scan(text)
473
+
474
+ quick_analysis = f"""
475
+ PREDICTION: {result['prediction'].upper()}
476
+ Confidence: {result['confidence']*100:.1f}%
477
+ Windows analyzed: {result['num_windows']}
478
+ """
479
+
480
+ # Add note if mode was switched
481
+ if original_mode == "detailed":
482
+ quick_analysis += f"\n\nNote: Switched to quick mode because text contains only {word_count} words. Minimum 200 words required for detailed analysis."
483
+
484
+ # Calculate execution time in milliseconds
485
+ execution_time = (time.time() - start_time) * 1000
486
+
487
+ # Log the prediction data
488
+ log_prediction_data(
489
+ input_text=text,
490
+ word_count=word_count,
491
+ prediction=result['prediction'],
492
+ confidence=result['confidence'],
493
+ execution_time=execution_time,
494
+ mode=original_mode
495
+ )
496
+
497
+ return (
498
+ text, # No highlighting in quick mode
499
+ "Quick scan mode - no sentence-level analysis available",
500
+ quick_analysis
501
+ )
502
+ else:
503
+ analysis = classifier.detailed_scan(text)
504
+
505
+ detailed_analysis = []
506
+ for pred in analysis['sentence_predictions']:
507
+ confidence = pred['confidence'] * 100
508
+ detailed_analysis.append(f"Sentence: {pred['sentence']}")
509
+ detailed_analysis.append(f"Prediction: {pred['prediction'].upper()}")
510
+ detailed_analysis.append(f"Confidence: {confidence:.1f}%")
511
+ detailed_analysis.append("-" * 50)
512
+
513
+ final_pred = analysis['overall_prediction']
514
+ overall_result = f"""
515
+ FINAL PREDICTION: {final_pred['prediction'].upper()}
516
+ Overall confidence: {final_pred['confidence']*100:.1f}%
517
+ Number of sentences analyzed: {final_pred['num_sentences']}
518
+ """
519
+
520
+ # Calculate execution time in milliseconds
521
+ execution_time = (time.time() - start_time) * 1000
522
+
523
+ # Log the prediction data
524
+ log_prediction_data(
525
+ input_text=text,
526
+ word_count=word_count,
527
+ prediction=final_pred['prediction'],
528
+ confidence=final_pred['confidence'],
529
+ execution_time=execution_time,
530
+ mode=original_mode
531
+ )
532
+
533
+ return (
534
+ analysis['highlighted_text'],
535
+ "\n".join(detailed_analysis),
536
+ overall_result
537
+ )
538
+
539
 
540
  # Initialize the classifier globally
541
  classifier = TextClassifier()