Update app.py
Browse files
app.py
CHANGED
@@ -11,8 +11,11 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
11 |
from concurrent.futures import ThreadPoolExecutor
|
12 |
from functools import partial
|
13 |
import time
|
14 |
-
import csv
|
15 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
16 |
import base64
|
17 |
|
18 |
# Configure logging
|
@@ -28,8 +31,11 @@ CONFIDENCE_THRESHOLD = 0.65
|
|
28 |
BATCH_SIZE = 8 # Reduced batch size for CPU
|
29 |
MAX_WORKERS = 4 # Number of worker threads for processing
|
30 |
|
|
|
|
|
31 |
|
32 |
-
|
|
|
33 |
|
34 |
class TextWindowProcessor:
|
35 |
def __init__(self):
|
@@ -333,77 +339,131 @@ class TextClassifier:
|
|
333 |
'num_sentences': num_sentences
|
334 |
}
|
335 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
def log_prediction_data(input_text, word_count, prediction, confidence, execution_time, mode):
|
337 |
-
"""Log prediction data to
|
338 |
-
#
|
339 |
-
|
340 |
-
|
341 |
-
# Check if file exists to determine if we need to write headers
|
342 |
-
file_exists = os.path.isfile(csv_path)
|
343 |
|
344 |
try:
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
return True
|
|
|
361 |
except Exception as e:
|
362 |
-
logger.error(f"Error logging prediction data: {str(e)}")
|
363 |
return False
|
364 |
|
365 |
-
def
|
366 |
-
"""Read the
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
|
377 |
def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
|
378 |
"""Analyze text using specified mode and return formatted results."""
|
379 |
# Check if the input text matches the secret password for log access
|
380 |
if text.strip() == SECRET_PASSWORD:
|
381 |
# Return logs instead of analysis
|
382 |
-
|
383 |
logs_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
384 |
-
logs_filename = f"prediction_logs_{logs_timestamp}.
|
385 |
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
<
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
401 |
|
402 |
# Return special admin output instead of normal analysis
|
403 |
return (
|
404 |
html_content,
|
405 |
f"Admin access granted. Logs retrieved at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
406 |
-
f"ADMIN MODE\nLogs available for download\
|
407 |
)
|
408 |
|
409 |
# Start timing for normal analysis
|
|
|
11 |
from concurrent.futures import ThreadPoolExecutor
|
12 |
from functools import partial
|
13 |
import time
|
|
|
14 |
from datetime import datetime
|
15 |
+
import openpyxl
|
16 |
+
from openpyxl import Workbook
|
17 |
+
from openpyxl.utils import get_column_letter
|
18 |
+
from io import BytesIO
|
19 |
import base64
|
20 |
|
21 |
# Configure logging
|
|
|
31 |
BATCH_SIZE = 8 # Reduced batch size for CPU
|
32 |
MAX_WORKERS = 4 # Number of worker threads for processing
|
33 |
|
34 |
+
# Secret password for logs access - CHANGE THIS! Make it complex and memorable only to you
|
35 |
+
SECRET_PASSWORD = "YourSuperSecretPassword256BitLongOnlyYouShouldKnowThis!$123"
|
36 |
|
37 |
+
# Excel file path for logs
|
38 |
+
EXCEL_LOG_PATH = "/tmp/prediction_logs.xlsx"
|
39 |
|
40 |
class TextWindowProcessor:
|
41 |
def __init__(self):
|
|
|
339 |
'num_sentences': num_sentences
|
340 |
}
|
341 |
|
342 |
+
def initialize_excel_log():
|
343 |
+
"""Initialize the Excel log file if it doesn't exist."""
|
344 |
+
if not os.path.exists(EXCEL_LOG_PATH):
|
345 |
+
wb = Workbook()
|
346 |
+
ws = wb.active
|
347 |
+
ws.title = "Prediction Logs"
|
348 |
+
|
349 |
+
# Set column headers
|
350 |
+
headers = ["timestamp", "word_count", "prediction", "confidence",
|
351 |
+
"execution_time_ms", "analysis_mode", "full_text"]
|
352 |
+
|
353 |
+
for col_num, header in enumerate(headers, 1):
|
354 |
+
ws.cell(row=1, column=col_num, value=header)
|
355 |
+
|
356 |
+
# Adjust column widths for better readability
|
357 |
+
ws.column_dimensions[get_column_letter(1)].width = 20 # timestamp
|
358 |
+
ws.column_dimensions[get_column_letter(2)].width = 10 # word_count
|
359 |
+
ws.column_dimensions[get_column_letter(3)].width = 10 # prediction
|
360 |
+
ws.column_dimensions[get_column_letter(4)].width = 10 # confidence
|
361 |
+
ws.column_dimensions[get_column_letter(5)].width = 15 # execution_time_ms
|
362 |
+
ws.column_dimensions[get_column_letter(6)].width = 15 # analysis_mode
|
363 |
+
ws.column_dimensions[get_column_letter(7)].width = 100 # full_text
|
364 |
+
|
365 |
+
# Save the workbook
|
366 |
+
wb.save(EXCEL_LOG_PATH)
|
367 |
+
logger.info(f"Initialized Excel log file at {EXCEL_LOG_PATH}")
|
368 |
+
|
369 |
def log_prediction_data(input_text, word_count, prediction, confidence, execution_time, mode):
|
370 |
+
"""Log prediction data to an Excel file in the /tmp directory."""
|
371 |
+
# Initialize the Excel file if it doesn't exist
|
372 |
+
if not os.path.exists(EXCEL_LOG_PATH):
|
373 |
+
initialize_excel_log()
|
|
|
|
|
374 |
|
375 |
try:
|
376 |
+
# Load the existing workbook
|
377 |
+
wb = openpyxl.load_workbook(EXCEL_LOG_PATH)
|
378 |
+
ws = wb.active
|
379 |
+
|
380 |
+
# Get the next row number
|
381 |
+
next_row = ws.max_row + 1
|
382 |
+
|
383 |
+
# Clean up the input text for Excel storage (replace problematic characters)
|
384 |
+
cleaned_text = input_text.replace("\n", " ")
|
385 |
+
|
386 |
+
# Prepare row data
|
387 |
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
388 |
+
row_data = [
|
389 |
+
timestamp,
|
390 |
+
word_count,
|
391 |
+
prediction,
|
392 |
+
f"{confidence:.2f}",
|
393 |
+
f"{execution_time:.2f}",
|
394 |
+
mode,
|
395 |
+
cleaned_text
|
396 |
+
]
|
397 |
+
|
398 |
+
# Add the data to the worksheet
|
399 |
+
for col_num, value in enumerate(row_data, 1):
|
400 |
+
ws.cell(row=next_row, column=col_num, value=value)
|
401 |
+
|
402 |
+
# Save the workbook
|
403 |
+
wb.save(EXCEL_LOG_PATH)
|
404 |
+
logger.info(f"Successfully logged prediction data to {EXCEL_LOG_PATH}")
|
405 |
return True
|
406 |
+
|
407 |
except Exception as e:
|
408 |
+
logger.error(f"Error logging prediction data to Excel: {str(e)}")
|
409 |
return False
|
410 |
|
411 |
+
def get_logs_as_base64():
|
412 |
+
"""Read the Excel logs file and return as base64 for downloading."""
|
413 |
+
if not os.path.exists(EXCEL_LOG_PATH):
|
414 |
+
return None
|
415 |
+
|
416 |
+
try:
|
417 |
+
# Read the Excel file into memory
|
418 |
+
with open(EXCEL_LOG_PATH, "rb") as f:
|
419 |
+
file_data = f.read()
|
420 |
+
|
421 |
+
# Encode the file as base64
|
422 |
+
base64_data = base64.b64encode(file_data).decode('utf-8')
|
423 |
+
return base64_data
|
424 |
+
|
425 |
+
except Exception as e:
|
426 |
+
logger.error(f"Error reading Excel logs: {str(e)}")
|
427 |
+
return None
|
428 |
|
429 |
def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
|
430 |
"""Analyze text using specified mode and return formatted results."""
|
431 |
# Check if the input text matches the secret password for log access
|
432 |
if text.strip() == SECRET_PASSWORD:
|
433 |
# Return logs instead of analysis
|
434 |
+
base64_data = get_logs_as_base64()
|
435 |
logs_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
436 |
+
logs_filename = f"prediction_logs_{logs_timestamp}.xlsx"
|
437 |
|
438 |
+
if base64_data:
|
439 |
+
# Create downloadable HTML with the logs
|
440 |
+
html_content = f"""
|
441 |
+
<div style="background-color: #e6f7ff; padding: 15px; border-radius: 5px;">
|
442 |
+
<h3>Admin Access Granted - Prediction Logs</h3>
|
443 |
+
<p>Logs retrieved at: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>
|
444 |
+
<p>Excel file contains all prediction data with full text of all submissions.</p>
|
445 |
+
<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{base64_data}"
|
446 |
+
download="{logs_filename}"
|
447 |
+
style="display: inline-block; margin-top: 10px; padding: 10px 15px;
|
448 |
+
background-color: #4CAF50; color: white; text-decoration: none;
|
449 |
+
border-radius: 4px;">
|
450 |
+
Download Excel Logs
|
451 |
+
</a>
|
452 |
+
</div>
|
453 |
+
"""
|
454 |
+
else:
|
455 |
+
html_content = """
|
456 |
+
<div style="background-color: #ffe6e6; padding: 15px; border-radius: 5px;">
|
457 |
+
<h3>Admin Access Granted - No Logs Found</h3>
|
458 |
+
<p>No prediction logs were found or there was an error reading the logs file.</p>
|
459 |
+
</div>
|
460 |
+
"""
|
461 |
|
462 |
# Return special admin output instead of normal analysis
|
463 |
return (
|
464 |
html_content,
|
465 |
f"Admin access granted. Logs retrieved at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
466 |
+
f"ADMIN MODE\nLogs available for download\nFile: {EXCEL_LOG_PATH}"
|
467 |
)
|
468 |
|
469 |
# Start timing for normal analysis
|