ApsidalSolid4 commited on
Commit
8373deb
·
verified ·
1 Parent(s): 79ae2f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -39
app.py CHANGED
@@ -163,7 +163,7 @@ class TextClassifier:
163
  }
164
 
165
  def detailed_scan(self, text: str) -> Dict:
166
- """Perform a detailed scan with sentence-level analysis and improved boundary handling."""
167
  if not text.strip():
168
  return {
169
  'sentence_predictions': [],
@@ -180,18 +180,22 @@ class TextClassifier:
180
  if not sentences:
181
  return {}
182
 
183
- # Create centered windows for each sentence
184
  windows, window_sentence_indices = self.processor.create_centered_windows(sentences, WINDOW_SIZE)
 
185
 
186
- # Track scores for each sentence
187
- sentence_appearances = {i: 0 for i in range(len(sentences))}
188
- sentence_scores = {i: {'human_prob': 0.0, 'ai_prob': 0.0} for i in range(len(sentences))}
189
 
190
- # Process windows in batches
191
- for i in range(0, len(windows), BATCH_SIZE):
192
- batch_windows = windows[i:i + BATCH_SIZE]
193
- batch_indices = window_sentence_indices[i:i + batch_size]
 
 
194
 
 
195
  inputs = self.tokenizer(
196
  batch_windows,
197
  truncation=True,
@@ -204,45 +208,46 @@ class TextClassifier:
204
  outputs = self.model(**inputs)
205
  probs = F.softmax(outputs.logits, dim=-1)
206
 
207
- # Attribute predictions with center-weighted approach
208
  for window_idx, indices in enumerate(batch_indices):
209
  center_idx = len(indices) // 2
210
- center_weight = 0.7 # Higher weight for center sentence
211
- edge_weight = 0.3 / (len(indices) - 1) # Distribute remaining weight
212
 
 
213
  for pos, sent_idx in enumerate(indices):
214
- # Apply higher weight to center sentence
215
  weight = center_weight if pos == center_idx else edge_weight
216
- sentence_appearances[sent_idx] += weight
217
- sentence_scores[sent_idx]['human_prob'] += weight * probs[window_idx][1].item()
218
- sentence_scores[sent_idx]['ai_prob'] += weight * probs[window_idx][0].item()
219
 
220
- # Calculate final predictions with boundary smoothing
 
 
 
 
221
  sentence_predictions = []
 
222
  for i in range(len(sentences)):
223
- if sentence_appearances[i] > 0:
224
- human_prob = sentence_scores[i]['human_prob'] / sentence_appearances[i]
225
- ai_prob = sentence_scores[i]['ai_prob'] / sentence_appearances[i]
226
-
227
- # Apply minimal smoothing at prediction boundaries
228
- if i > 0 and i < len(sentences) - 1:
229
- prev_human = sentence_scores[i-1]['human_prob'] / sentence_appearances[i-1]
230
- prev_ai = sentence_scores[i-1]['ai_prob'] / sentence_appearances[i-1]
231
- next_human = sentence_scores[i+1]['human_prob'] / sentence_appearances[i+1]
232
- next_ai = sentence_scores[i+1]['ai_prob'] / sentence_appearances[i+1]
233
-
234
- # Check if we're at a prediction boundary
235
- current_pred = 'human' if human_prob > ai_prob else 'ai'
236
- prev_pred = 'human' if prev_human > prev_ai else 'ai'
237
- next_pred = 'human' if next_human > next_ai else 'ai'
238
 
239
- if current_pred != prev_pred or current_pred != next_pred:
240
- # Small adjustment at boundaries
241
- smooth_factor = 0.1
242
- human_prob = (human_prob * (1 - smooth_factor) +
243
- (prev_human + next_human) * smooth_factor / 2)
244
- ai_prob = (ai_prob * (1 - smooth_factor) +
245
- (prev_ai + next_ai) * smooth_factor / 2)
 
 
 
 
 
246
 
247
  sentence_predictions.append({
248
  'sentence': sentences[i],
@@ -251,6 +256,7 @@ class TextClassifier:
251
  'prediction': 'human' if human_prob > ai_prob else 'ai',
252
  'confidence': max(human_prob, ai_prob)
253
  })
 
254
 
255
  return {
256
  'sentence_predictions': sentence_predictions,
 
163
  }
164
 
165
  def detailed_scan(self, text: str) -> Dict:
166
+ """Optimized detailed scan with sentence-level analysis."""
167
  if not text.strip():
168
  return {
169
  'sentence_predictions': [],
 
180
  if not sentences:
181
  return {}
182
 
183
+ # Pre-calculate window information
184
  windows, window_sentence_indices = self.processor.create_centered_windows(sentences, WINDOW_SIZE)
185
+ sentence_scores = {i: {'human_prob': 0.0, 'ai_prob': 0.0, 'appearances': 0} for i in range(len(sentences))}
186
 
187
+ # Calculate weights once
188
+ center_weight = 0.7
189
+ edge_weight = 0.3 / (WINDOW_SIZE - 1) if WINDOW_SIZE > 1 else 0.3
190
 
191
+ # Process all windows in larger batches
192
+ batch_size = min(32, len(windows)) # Increased batch size
193
+ for i in range(0, len(windows), batch_size):
194
+ batch_end = min(i + batch_size, len(windows))
195
+ batch_windows = windows[i:batch_end]
196
+ batch_indices = window_sentence_indices[i:batch_end]
197
 
198
+ # Process batch
199
  inputs = self.tokenizer(
200
  batch_windows,
201
  truncation=True,
 
208
  outputs = self.model(**inputs)
209
  probs = F.softmax(outputs.logits, dim=-1)
210
 
211
+ # Process each window in the batch
212
  for window_idx, indices in enumerate(batch_indices):
213
  center_idx = len(indices) // 2
214
+ window_human_prob = probs[window_idx][1].item()
215
+ window_ai_prob = probs[window_idx][0].item()
216
 
217
+ # Update scores for all sentences in this window
218
  for pos, sent_idx in enumerate(indices):
 
219
  weight = center_weight if pos == center_idx else edge_weight
220
+ sentence_scores[sent_idx]['human_prob'] += weight * window_human_prob
221
+ sentence_scores[sent_idx]['ai_prob'] += weight * window_ai_prob
222
+ sentence_scores[sent_idx]['appearances'] += weight
223
 
224
+ del inputs, outputs, probs
225
+ if torch.cuda.is_available():
226
+ torch.cuda.empty_cache()
227
+
228
+ # Calculate final predictions
229
  sentence_predictions = []
230
+ prev_pred = None
231
  for i in range(len(sentences)):
232
+ scores = sentence_scores[i]
233
+ if scores['appearances'] > 0:
234
+ # Calculate base probabilities
235
+ human_prob = scores['human_prob'] / scores['appearances']
236
+ ai_prob = scores['ai_prob'] / scores['appearances']
237
+ current_pred = 'human' if human_prob > ai_prob else 'ai'
 
 
 
 
 
 
 
 
 
238
 
239
+ # Only apply smoothing at actual prediction boundaries
240
+ if i > 0 and prev_pred and current_pred != prev_pred:
241
+ # Simple smoothing only at boundaries
242
+ smooth_factor = 0.1
243
+ if i < len(sentences) - 1:
244
+ next_scores = sentence_scores[i + 1]
245
+ next_human = next_scores['human_prob'] / next_scores['appearances']
246
+ next_ai = next_scores['ai_prob'] / next_scores['appearances']
247
+
248
+ # Apply minimal smoothing
249
+ human_prob = human_prob * (1 - smooth_factor) + next_human * smooth_factor
250
+ ai_prob = ai_prob * (1 - smooth_factor) + next_ai * smooth_factor
251
 
252
  sentence_predictions.append({
253
  'sentence': sentences[i],
 
256
  'prediction': 'human' if human_prob > ai_prob else 'ai',
257
  'confidence': max(human_prob, ai_prob)
258
  })
259
+ prev_pred = current_pred
260
 
261
  return {
262
  'sentence_predictions': sentence_predictions,