Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,10 @@ import gradio as gr
|
|
10 |
from fastapi.middleware.cors import CORSMiddleware
|
11 |
from concurrent.futures import ThreadPoolExecutor
|
12 |
from functools import partial
|
13 |
-
|
|
|
|
|
|
|
14 |
# Configure logging
|
15 |
logging.basicConfig(level=logging.INFO)
|
16 |
logger = logging.getLogger(__name__)
|
@@ -24,6 +27,37 @@ CONFIDENCE_THRESHOLD = 0.65
|
|
24 |
BATCH_SIZE = 8 # Reduced batch size for CPU
|
25 |
MAX_WORKERS = 4 # Number of worker threads for processing
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
class TextWindowProcessor:
|
28 |
def __init__(self):
|
29 |
try:
|
@@ -420,57 +454,88 @@ class TextClassifier:
|
|
420 |
'num_sentences': num_sentences
|
421 |
}
|
422 |
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
474 |
|
475 |
# Initialize the classifier globally
|
476 |
classifier = TextClassifier()
|
|
|
10 |
from fastapi.middleware.cors import CORSMiddleware
|
11 |
from concurrent.futures import ThreadPoolExecutor
|
12 |
from functools import partial
|
13 |
+
import time
|
14 |
+
import csv
|
15 |
+
import os
|
16 |
+
from datetime import datetime
|
17 |
# Configure logging
|
18 |
logging.basicConfig(level=logging.INFO)
|
19 |
logger = logging.getLogger(__name__)
|
|
|
27 |
BATCH_SIZE = 8 # Reduced batch size for CPU
|
28 |
MAX_WORKERS = 4 # Number of worker threads for processing
|
29 |
|
30 |
+
|
31 |
+
def log_prediction_data(input_text, word_count, prediction, confidence, execution_time, mode):
|
32 |
+
"""Log prediction data to a CSV file in the /tmp directory."""
|
33 |
+
# Define the CSV file path
|
34 |
+
csv_path = "/tmp/prediction_logs.csv"
|
35 |
+
|
36 |
+
# Check if file exists to determine if we need to write headers
|
37 |
+
file_exists = os.path.isfile(csv_path)
|
38 |
+
|
39 |
+
try:
|
40 |
+
with open(csv_path, 'a', newline='', encoding='utf-8') as f:
|
41 |
+
writer = csv.writer(f)
|
42 |
+
|
43 |
+
# Write headers if the file is newly created
|
44 |
+
if not file_exists:
|
45 |
+
writer.writerow(["timestamp", "word_count", "prediction", "confidence", "execution_time_ms", "analysis_mode", "full_text"])
|
46 |
+
|
47 |
+
# Clean up the input text for CSV storage (replace newlines with spaces)
|
48 |
+
cleaned_text = input_text.replace("\n", " ")
|
49 |
+
|
50 |
+
# Write the data row with the full text
|
51 |
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
52 |
+
writer.writerow([timestamp, word_count, prediction, f"{confidence:.2f}", f"{execution_time:.2f}", mode, cleaned_text])
|
53 |
+
|
54 |
+
logger.info(f"Successfully logged prediction data to {csv_path}")
|
55 |
+
return True
|
56 |
+
except Exception as e:
|
57 |
+
logger.error(f"Error logging prediction data: {str(e)}")
|
58 |
+
return False
|
59 |
+
|
60 |
+
|
61 |
class TextWindowProcessor:
|
62 |
def __init__(self):
|
63 |
try:
|
|
|
454 |
'num_sentences': num_sentences
|
455 |
}
|
456 |
|
457 |
+
|
458 |
+
def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
|
459 |
+
"""Analyze text using specified mode and return formatted results."""
|
460 |
+
# Start timing
|
461 |
+
start_time = time.time()
|
462 |
+
|
463 |
+
# Count words in the text
|
464 |
+
word_count = len(text.split())
|
465 |
+
|
466 |
+
# If text is less than 200 words and detailed mode is selected, switch to quick mode
|
467 |
+
original_mode = mode
|
468 |
+
if word_count < 200 and mode == "detailed":
|
469 |
+
mode = "quick"
|
470 |
+
|
471 |
+
if mode == "quick":
|
472 |
+
result = classifier.quick_scan(text)
|
473 |
+
|
474 |
+
quick_analysis = f"""
|
475 |
+
PREDICTION: {result['prediction'].upper()}
|
476 |
+
Confidence: {result['confidence']*100:.1f}%
|
477 |
+
Windows analyzed: {result['num_windows']}
|
478 |
+
"""
|
479 |
+
|
480 |
+
# Add note if mode was switched
|
481 |
+
if original_mode == "detailed":
|
482 |
+
quick_analysis += f"\n\nNote: Switched to quick mode because text contains only {word_count} words. Minimum 200 words required for detailed analysis."
|
483 |
+
|
484 |
+
# Calculate execution time in milliseconds
|
485 |
+
execution_time = (time.time() - start_time) * 1000
|
486 |
+
|
487 |
+
# Log the prediction data
|
488 |
+
log_prediction_data(
|
489 |
+
input_text=text,
|
490 |
+
word_count=word_count,
|
491 |
+
prediction=result['prediction'],
|
492 |
+
confidence=result['confidence'],
|
493 |
+
execution_time=execution_time,
|
494 |
+
mode=original_mode
|
495 |
+
)
|
496 |
+
|
497 |
+
return (
|
498 |
+
text, # No highlighting in quick mode
|
499 |
+
"Quick scan mode - no sentence-level analysis available",
|
500 |
+
quick_analysis
|
501 |
+
)
|
502 |
+
else:
|
503 |
+
analysis = classifier.detailed_scan(text)
|
504 |
+
|
505 |
+
detailed_analysis = []
|
506 |
+
for pred in analysis['sentence_predictions']:
|
507 |
+
confidence = pred['confidence'] * 100
|
508 |
+
detailed_analysis.append(f"Sentence: {pred['sentence']}")
|
509 |
+
detailed_analysis.append(f"Prediction: {pred['prediction'].upper()}")
|
510 |
+
detailed_analysis.append(f"Confidence: {confidence:.1f}%")
|
511 |
+
detailed_analysis.append("-" * 50)
|
512 |
+
|
513 |
+
final_pred = analysis['overall_prediction']
|
514 |
+
overall_result = f"""
|
515 |
+
FINAL PREDICTION: {final_pred['prediction'].upper()}
|
516 |
+
Overall confidence: {final_pred['confidence']*100:.1f}%
|
517 |
+
Number of sentences analyzed: {final_pred['num_sentences']}
|
518 |
+
"""
|
519 |
+
|
520 |
+
# Calculate execution time in milliseconds
|
521 |
+
execution_time = (time.time() - start_time) * 1000
|
522 |
+
|
523 |
+
# Log the prediction data
|
524 |
+
log_prediction_data(
|
525 |
+
input_text=text,
|
526 |
+
word_count=word_count,
|
527 |
+
prediction=final_pred['prediction'],
|
528 |
+
confidence=final_pred['confidence'],
|
529 |
+
execution_time=execution_time,
|
530 |
+
mode=original_mode
|
531 |
+
)
|
532 |
+
|
533 |
+
return (
|
534 |
+
analysis['highlighted_text'],
|
535 |
+
"\n".join(detailed_analysis),
|
536 |
+
overall_result
|
537 |
+
)
|
538 |
+
|
539 |
|
540 |
# Initialize the classifier globally
|
541 |
classifier = TextClassifier()
|