Spaces:

ApsidalSolid4
/

CITProjectAIDetector

Running

App Files Files Community

ApsidalSolid4 commited on Feb 20

Commit

33fd63d

verified ·

1 Parent(s): 9a1a827

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -103

app.py CHANGED Viewed

@@ -176,112 +176,112 @@ class TextClassifier:
             'num_windows': len(predictions)
         }
-        def detailed_scan(self, text: str) -> Dict:
-            """Original prediction method with modified window handling"""
-            # Clean up trailing whitespace
-            text = text.rstrip()
-            if not text.strip():
-                return {
-                    'sentence_predictions': [],
-                    'highlighted_text': '',
-                    'full_text': '',
-                    'overall_prediction': {
-                        'prediction': 'unknown',
-                        'confidence': 0.0,
-                        'num_sentences': 0
-                    }
-                }
-            self.model.eval()
-            sentences = self.processor.split_into_sentences(text)
-            if not sentences:
-                return {}
-            # Create centered windows for each sentence
-            windows, window_sentence_indices = self.processor.create_centered_windows(sentences, WINDOW_SIZE)
-            # Track scores for each sentence
-            sentence_appearances = {i: 0 for i in range(len(sentences))}
-            sentence_scores = {i: {'human_prob': 0.0, 'ai_prob': 0.0} for i in range(len(sentences))}
-            # Process windows in batches
-            batch_size = 16
-            for i in range(0, len(windows), batch_size):
-                batch_windows = windows[i:i + batch_size]
-                batch_indices = window_sentence_indices[i:i + batch_size]
-                inputs = self.tokenizer(
-                    batch_windows,
-                    truncation=True,
-                    padding=True,
-                    max_length=MAX_LENGTH,
-                    return_tensors="pt"
-                ).to(self.device)
-                with torch.no_grad():
-                    outputs = self.model(**inputs)
-                    probs = F.softmax(outputs.logits, dim=-1)
-                    # Attribute predictions with weighted scoring
-                    for window_idx, indices in enumerate(batch_indices):
-                        center_idx = len(indices) // 2
-                        center_weight = 0.7  # Higher weight for center sentence
-                        edge_weight = 0.3 / (len(indices) - 1)  # Distribute remaining weight
-                        for pos, sent_idx in enumerate(indices):
-                            # Apply higher weight to center sentence
-                            weight = center_weight if pos == center_idx else edge_weight
-                            sentence_appearances[sent_idx] += weight
-                            sentence_scores[sent_idx]['human_prob'] += weight * probs[window_idx][1].item()
-                            sentence_scores[sent_idx]['ai_prob'] += weight * probs[window_idx][0].item()
-                # Clean up memory
-                del inputs, outputs, probs
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-            # Calculate final predictions with boundary smoothing
-            sentence_predictions = []
-            for i in range(len(sentences)):
-                if sentence_appearances[i] > 0:
-                    human_prob = sentence_scores[i]['human_prob'] / sentence_appearances[i]
-                    ai_prob = sentence_scores[i]['ai_prob'] / sentence_appearances[i]
-                    # Only apply minimal smoothing at prediction boundaries
-                    if i > 0 and i < len(sentences) - 1:
-                        prev_human = sentence_scores[i-1]['human_prob'] / sentence_appearances[i-1]
-                        prev_ai = sentence_scores[i-1]['ai_prob'] / sentence_appearances[i-1]
-                        next_human = sentence_scores[i+1]['human_prob'] / sentence_appearances[i+1]
-                        next_ai = sentence_scores[i+1]['ai_prob'] / sentence_appearances[i+1]
-                        # Check if we're at a prediction boundary
-                        current_pred = 'human' if human_prob > ai_prob else 'ai'
-                        prev_pred = 'human' if prev_human > prev_ai else 'ai'
-                        next_pred = 'human' if next_human > next_ai else 'ai'
-                        if current_pred != prev_pred or current_pred != next_pred:
-                            # Small adjustment at boundaries
-                            smooth_factor = 0.1
-                            human_prob = (human_prob * (1 - smooth_factor) +
-                                        (prev_human + next_human) * smooth_factor / 2)
-                            ai_prob = (ai_prob * (1 - smooth_factor) +
-                                    (prev_ai + next_ai) * smooth_factor / 2)
-                    sentence_predictions.append({
-                        'sentence': sentences[i],
-                        'human_prob': human_prob,
-                        'ai_prob': ai_prob,
-                        'prediction': 'human' if human_prob > ai_prob else 'ai',
-                        'confidence': max(human_prob, ai_prob)
-                    })
             return {
-                'sentence_predictions': sentence_predictions,
-                'highlighted_text': self.format_predictions_html(sentence_predictions),
-                'full_text': text,
-                'overall_prediction': self.aggregate_predictions(sentence_predictions)
             }
     def format_predictions_html(self, sentence_predictions: List[Dict]) -> str:

             'num_windows': len(predictions)
         }
+    def detailed_scan(self, text: str) -> Dict:
+        """Original prediction method with modified window handling"""
+        # Clean up trailing whitespace
+        text = text.rstrip()
+        if not text.strip():
             return {
+                'sentence_predictions': [],
+                'highlighted_text': '',
+                'full_text': '',
+                'overall_prediction': {
+                    'prediction': 'unknown',
+                    'confidence': 0.0,
+                    'num_sentences': 0
+                }
             }
+        self.model.eval()
+        sentences = self.processor.split_into_sentences(text)
+        if not sentences:
+            return {}
+        # Create centered windows for each sentence
+        windows, window_sentence_indices = self.processor.create_centered_windows(sentences, WINDOW_SIZE)
+        # Track scores for each sentence
+        sentence_appearances = {i: 0 for i in range(len(sentences))}
+        sentence_scores = {i: {'human_prob': 0.0, 'ai_prob': 0.0} for i in range(len(sentences))}
+        # Process windows in batches
+        batch_size = 16
+        for i in range(0, len(windows), batch_size):
+            batch_windows = windows[i:i + batch_size]
+            batch_indices = window_sentence_indices[i:i + batch_size]
+            inputs = self.tokenizer(
+                batch_windows,
+                truncation=True,
+                padding=True,
+                max_length=MAX_LENGTH,
+                return_tensors="pt"
+            ).to(self.device)
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                probs = F.softmax(outputs.logits, dim=-1)
+                # Attribute predictions with weighted scoring
+                for window_idx, indices in enumerate(batch_indices):
+                    center_idx = len(indices) // 2
+                    center_weight = 0.7  # Higher weight for center sentence
+                    edge_weight = 0.3 / (len(indices) - 1)  # Distribute remaining weight
+                    for pos, sent_idx in enumerate(indices):
+                        # Apply higher weight to center sentence
+                        weight = center_weight if pos == center_idx else edge_weight
+                        sentence_appearances[sent_idx] += weight
+                        sentence_scores[sent_idx]['human_prob'] += weight * probs[window_idx][1].item()
+                        sentence_scores[sent_idx]['ai_prob'] += weight * probs[window_idx][0].item()
+            # Clean up memory
+            del inputs, outputs, probs
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        # Calculate final predictions with boundary smoothing
+        sentence_predictions = []
+        for i in range(len(sentences)):
+            if sentence_appearances[i] > 0:
+                human_prob = sentence_scores[i]['human_prob'] / sentence_appearances[i]
+                ai_prob = sentence_scores[i]['ai_prob'] / sentence_appearances[i]
+                # Only apply minimal smoothing at prediction boundaries
+                if i > 0 and i < len(sentences) - 1:
+                    prev_human = sentence_scores[i-1]['human_prob'] / sentence_appearances[i-1]
+                    prev_ai = sentence_scores[i-1]['ai_prob'] / sentence_appearances[i-1]
+                    next_human = sentence_scores[i+1]['human_prob'] / sentence_appearances[i+1]
+                    next_ai = sentence_scores[i+1]['ai_prob'] / sentence_appearances[i+1]
+                    # Check if we're at a prediction boundary
+                    current_pred = 'human' if human_prob > ai_prob else 'ai'
+                    prev_pred = 'human' if prev_human > prev_ai else 'ai'
+                    next_pred = 'human' if next_human > next_ai else 'ai'
+                    if current_pred != prev_pred or current_pred != next_pred:
+                        # Small adjustment at boundaries
+                        smooth_factor = 0.1
+                        human_prob = (human_prob * (1 - smooth_factor) +
+                                    (prev_human + next_human) * smooth_factor / 2)
+                        ai_prob = (ai_prob * (1 - smooth_factor) +
+                                (prev_ai + next_ai) * smooth_factor / 2)
+                sentence_predictions.append({
+                    'sentence': sentences[i],
+                    'human_prob': human_prob,
+                    'ai_prob': ai_prob,
+                    'prediction': 'human' if human_prob > ai_prob else 'ai',
+                    'confidence': max(human_prob, ai_prob)
+                })
+        return {
+            'sentence_predictions': sentence_predictions,
+            'highlighted_text': self.format_predictions_html(sentence_predictions),
+            'full_text': text,
+            'overall_prediction': self.aggregate_predictions(sentence_predictions)
+        }
     def format_predictions_html(self, sentence_predictions: List[Dict]) -> str: