Spaces:

ApsidalSolid4
/

CITProjectAIDetector

Running

App Files Files Community

ApsidalSolid4 commited on Mar 13

Commit

41365d5

verified ·

1 Parent(s): 6fc3054

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -208

app.py CHANGED Viewed

@@ -12,8 +12,8 @@ from concurrent.futures import ThreadPoolExecutor
 from functools import partial
 import time
 import csv
-import os
 from datetime import datetime
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -27,37 +27,6 @@ CONFIDENCE_THRESHOLD = 0.65
 BATCH_SIZE = 8  # Reduced batch size for CPU
 MAX_WORKERS = 4  # Number of worker threads for processing
-def log_prediction_data(input_text, word_count, prediction, confidence, execution_time, mode):
-    """Log prediction data to a CSV file in the /tmp directory."""
-    # Define the CSV file path
-    csv_path = "/tmp/prediction_logs.csv"
-    # Check if file exists to determine if we need to write headers
-    file_exists = os.path.isfile(csv_path)
-    try:
-        with open(csv_path, 'a', newline='', encoding='utf-8') as f:
-            writer = csv.writer(f)
-            # Write headers if the file is newly created
-            if not file_exists:
-                writer.writerow(["timestamp", "word_count", "prediction", "confidence", "execution_time_ms", "analysis_mode", "full_text"])
-            # Clean up the input text for CSV storage (replace newlines with spaces)
-            cleaned_text = input_text.replace("\n", " ")
-            # Write the data row with the full text
-            timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-            writer.writerow([timestamp, word_count, prediction, f"{confidence:.2f}", f"{execution_time:.2f}", mode, cleaned_text])
-        logger.info(f"Successfully logged prediction data to {csv_path}")
-        return True
-    except Exception as e:
-        logger.error(f"Error logging prediction data: {str(e)}")
-        return False
 class TextWindowProcessor:
     def __init__(self):
         try:
@@ -210,100 +179,6 @@ class TextClassifier:
             'num_windows': len(predictions)
         }
-    # def detailed_scan(self, text: str) -> Dict:
-    #     """Original prediction method with modified window handling"""
-    #     if self.model is None or self.tokenizer is None:
-    #         self.load_model()
-    #     self.model.eval()
-    #     sentences = self.processor.split_into_sentences(text)
-    #     if not sentences:
-    #         return {}
-    #     # Create centered windows for each sentence
-    #     windows, window_sentence_indices = self.processor.create_centered_windows(sentences, WINDOW_SIZE)
-    #     # Track scores for each sentence
-    #     sentence_appearances = {i: 0 for i in range(len(sentences))}
-    #     sentence_scores = {i: {'human_prob': 0.0, 'ai_prob': 0.0} for i in range(len(sentences))}
-    #     # Process windows in batches
-    #     batch_size = 16
-    #     for i in range(0, len(windows), batch_size):
-    #         batch_windows = windows[i:i + batch_size]
-    #         batch_indices = window_sentence_indices[i:i + batch_size]
-    #         inputs = self.tokenizer(
-    #             batch_windows,
-    #             truncation=True,
-    #             padding=True,
-    #             max_length=MAX_LENGTH,
-    #             return_tensors="pt"
-    #         ).to(self.device)
-    #         with torch.no_grad():
-    #             outputs = self.model(**inputs)
-    #             probs = F.softmax(outputs.logits, dim=-1)
-    #             # Attribute predictions more carefully
-    #             for window_idx, indices in enumerate(batch_indices):
-    #                 center_idx = len(indices) // 2
-    #                 center_weight = 0.7  # Higher weight for center sentence
-    #                 edge_weight = 0.3 / (len(indices) - 1)  # Distribute remaining weight
-    #                 for pos, sent_idx in enumerate(indices):
-    #                     # Apply higher weight to center sentence
-    #                     weight = center_weight if pos == center_idx else edge_weight
-    #                     sentence_appearances[sent_idx] += weight
-    #                     sentence_scores[sent_idx]['human_prob'] += weight * probs[window_idx][1].item()
-    #                     sentence_scores[sent_idx]['ai_prob'] += weight * probs[window_idx][0].item()
-    #         del inputs, outputs, probs
-    #         if torch.cuda.is_available():
-    #             torch.cuda.empty_cache()
-    #     # Calculate final predictions
-    #     sentence_predictions = []
-    #     for i in range(len(sentences)):
-    #         if sentence_appearances[i] > 0:
-    #             human_prob = sentence_scores[i]['human_prob'] / sentence_appearances[i]
-    #             ai_prob = sentence_scores[i]['ai_prob'] / sentence_appearances[i]
-    #             # Only apply minimal smoothing at prediction boundaries
-    #             if i > 0 and i < len(sentences) - 1:
-    #                 prev_human = sentence_scores[i-1]['human_prob'] / sentence_appearances[i-1]
-    #                 prev_ai = sentence_scores[i-1]['ai_prob'] / sentence_appearances[i-1]
-    #                 next_human = sentence_scores[i+1]['human_prob'] / sentence_appearances[i+1]
-    #                 next_ai = sentence_scores[i+1]['ai_prob'] / sentence_appearances[i+1]
-    #                 # Check if we're at a prediction boundary
-    #                 current_pred = 'human' if human_prob > ai_prob else 'ai'
-    #                 prev_pred = 'human' if prev_human > prev_ai else 'ai'
-    #                 next_pred = 'human' if next_human > next_ai else 'ai'
-    #                 if current_pred != prev_pred or current_pred != next_pred:
-    #                     # Small adjustment at boundaries
-    #                     smooth_factor = 0.1
-    #                     human_prob = (human_prob * (1 - smooth_factor) +
-    #                                 (prev_human + next_human) * smooth_factor / 2)
-    #                     ai_prob = (ai_prob * (1 - smooth_factor) +
-    #                             (prev_ai + next_ai) * smooth_factor / 2)
-    #             sentence_predictions.append({
-    #                 'sentence': sentences[i],
-    #                 'human_prob': human_prob,
-    #                 'ai_prob': ai_prob,
-    #                 'prediction': 'human' if human_prob > ai_prob else 'ai',
-    #                 'confidence': max(human_prob, ai_prob)
-    #             })
-    #     return {
-    #         'sentence_predictions': sentence_predictions,
-    #         'highlighted_text': self.format_predictions_html(sentence_predictions),
-    #         'full_text': text,
-    #         'overall_prediction': self.aggregate_predictions(sentence_predictions)
-    #     }
     def detailed_scan(self, text: str) -> Dict:
         """Perform a detailed scan with improved sentence-level analysis."""
         # Clean up trailing whitespace
@@ -454,88 +329,124 @@ class TextClassifier:
             'num_sentences': num_sentences
         }
-    def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
-        """Analyze text using specified mode and return formatted results."""
-        # Start timing
-        start_time = time.time()
-        # Count words in the text
-        word_count = len(text.split())
-        # If text is less than 200 words and detailed mode is selected, switch to quick mode
-        original_mode = mode
-        if word_count < 200 and mode == "detailed":
-            mode = "quick"
-        if mode == "quick":
-            result = classifier.quick_scan(text)
-            quick_analysis = f"""
-            PREDICTION: {result['prediction'].upper()}
-            Confidence: {result['confidence']*100:.1f}%
-            Windows analyzed: {result['num_windows']}
-            """
-            # Add note if mode was switched
-            if original_mode == "detailed":
-                quick_analysis += f"\n\nNote: Switched to quick mode because text contains only {word_count} words. Minimum 200 words required for detailed analysis."
-            # Calculate execution time in milliseconds
-            execution_time = (time.time() - start_time) * 1000
-            # Log the prediction data
-            log_prediction_data(
-                input_text=text,
-                word_count=word_count,
-                prediction=result['prediction'],
-                confidence=result['confidence'],
-                execution_time=execution_time,
-                mode=original_mode
-            )
-            return (
-                text,  # No highlighting in quick mode
-                "Quick scan mode - no sentence-level analysis available",
-                quick_analysis
-            )
-        else:
-            analysis = classifier.detailed_scan(text)
-            detailed_analysis = []
-            for pred in analysis['sentence_predictions']:
-                confidence = pred['confidence'] * 100
-                detailed_analysis.append(f"Sentence: {pred['sentence']}")
-                detailed_analysis.append(f"Prediction: {pred['prediction'].upper()}")
-                detailed_analysis.append(f"Confidence: {confidence:.1f}%")
-                detailed_analysis.append("-" * 50)
-            final_pred = analysis['overall_prediction']
-            overall_result = f"""
-            FINAL PREDICTION: {final_pred['prediction'].upper()}
-            Overall confidence: {final_pred['confidence']*100:.1f}%
-            Number of sentences analyzed: {final_pred['num_sentences']}
-            """
-            # Calculate execution time in milliseconds
-            execution_time = (time.time() - start_time) * 1000
-            # Log the prediction data
-            log_prediction_data(
-                input_text=text,
-                word_count=word_count,
-                prediction=final_pred['prediction'],
-                confidence=final_pred['confidence'],
-                execution_time=execution_time,
-                mode=original_mode
-            )
-            return (
-                analysis['highlighted_text'],
-                "\n".join(detailed_analysis),
-                overall_result
-            )
 # Initialize the classifier globally
 classifier = TextClassifier()
@@ -567,8 +478,17 @@ demo = gr.Interface(
     flagging_mode="never"
 )
-app = demo.app
-app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],  # For development
     allow_credentials=True,

 from functools import partial
 import time
 import csv
 from datetime import datetime
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 BATCH_SIZE = 8  # Reduced batch size for CPU
 MAX_WORKERS = 4  # Number of worker threads for processing
 class TextWindowProcessor:
     def __init__(self):
         try:
             'num_windows': len(predictions)
         }
     def detailed_scan(self, text: str) -> Dict:
         """Perform a detailed scan with improved sentence-level analysis."""
         # Clean up trailing whitespace
             'num_sentences': num_sentences
         }
+def log_prediction_data(input_text, word_count, prediction, confidence, execution_time, mode):
+    """Log prediction data to a CSV file in the /tmp directory."""
+    # Define the CSV file path
+    csv_path = "/tmp/prediction_logs.csv"
+    # Check if file exists to determine if we need to write headers
+    file_exists = os.path.isfile(csv_path)
+    try:
+        with open(csv_path, 'a', newline='', encoding='utf-8') as f:
+            writer = csv.writer(f)
+            # Write headers if the file is newly created
+            if not file_exists:
+                writer.writerow(["timestamp", "word_count", "prediction", "confidence", "execution_time_ms", "analysis_mode", "full_text"])
+            # Clean up the input text for CSV storage (replace newlines with spaces)
+            cleaned_text = input_text.replace("\n", " ")
+            # Write the data row with the full text
+            timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+            writer.writerow([timestamp, word_count, prediction, f"{confidence:.2f}", f"{execution_time:.2f}", mode, cleaned_text])
+        logger.info(f"Successfully logged prediction data to {csv_path}")
+        return True
+    except Exception as e:
+        logger.error(f"Error logging prediction data: {str(e)}")
+        return False
+def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
+    """Analyze text using specified mode and return formatted results."""
+    # Start timing
+    start_time = time.time()
+    # Count words in the text
+    word_count = len(text.split())
+    # If text is less than 200 words and detailed mode is selected, switch to quick mode
+    original_mode = mode
+    if word_count < 200 and mode == "detailed":
+        mode = "quick"
+    if mode == "quick":
+        result = classifier.quick_scan(text)
+        quick_analysis = f"""
+        PREDICTION: {result['prediction'].upper()}
+        Confidence: {result['confidence']*100:.1f}%
+        Windows analyzed: {result['num_windows']}
+        """
+        # Add note if mode was switched
+        if original_mode == "detailed":
+            quick_analysis += f"\n\nNote: Switched to quick mode because text contains only {word_count} words. Minimum 200 words required for detailed analysis."
+        # Calculate execution time in milliseconds
+        execution_time = (time.time() - start_time) * 1000
+        # Log the prediction data
+        log_prediction_data(
+            input_text=text,
+            word_count=word_count,
+            prediction=result['prediction'],
+            confidence=result['confidence'],
+            execution_time=execution_time,
+            mode=original_mode
+        )
+        return (
+            text,  # No highlighting in quick mode
+            "Quick scan mode - no sentence-level analysis available",
+            quick_analysis
+        )
+    else:
+        analysis = classifier.detailed_scan(text)
+        detailed_analysis = []
+        for pred in analysis['sentence_predictions']:
+            confidence = pred['confidence'] * 100
+            detailed_analysis.append(f"Sentence: {pred['sentence']}")
+            detailed_analysis.append(f"Prediction: {pred['prediction'].upper()}")
+            detailed_analysis.append(f"Confidence: {confidence:.1f}%")
+            detailed_analysis.append("-" * 50)
+        final_pred = analysis['overall_prediction']
+        overall_result = f"""
+        FINAL PREDICTION: {final_pred['prediction'].upper()}
+        Overall confidence: {final_pred['confidence']*100:.1f}%
+        Number of sentences analyzed: {final_pred['num_sentences']}
+        """
+        # Calculate execution time in milliseconds
+        execution_time = (time.time() - start_time) * 1000
+        # Log the prediction data
+        log_prediction_data(
+            input_text=text,
+            word_count=word_count,
+            prediction=final_pred['prediction'],
+            confidence=final_pred['confidence'],
+            execution_time=execution_time,
+            mode=original_mode
+        )
+        return (
+            analysis['highlighted_text'],
+            "\n".join(detailed_analysis),
+            overall_result
+        )
+# Add a function to download the logs
+def download_logs():
+    log_path = "/tmp/prediction_logs.csv"
+    if os.path.exists(log_path):
+        with open(log_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        return content
+    return "No logs found."
 # Initialize the classifier globally
 classifier = TextClassifier()
     flagging_mode="never"
 )
+# Add admin panel for log access (only visible to space owners)
+with gr.Blocks() as admin_interface:
+    gr.Markdown("## Admin Panel - Data Logs")
+    download_button = gr.Button("Download Logs")
+    log_output = gr.File(label="Prediction Logs")
+    download_button.click(fn=download_logs, outputs=log_output)
+# Combine interfaces
+app = gr.TabbedInterface([demo, admin_interface], ["AI Text Detector", "Admin"])
+app.app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],  # For development
     allow_credentials=True,