ariansyahdedy commited on
Commit
2ee07d8
·
1 Parent(s): a240cf0

fix gitignore

Browse files
Files changed (2) hide show
  1. .gitignore +2 -1
  2. app/main.py +1114 -0
.gitignore CHANGED
@@ -8,6 +8,7 @@ secret.json
8
  test*.py
9
  client_secret.json
10
  app/main_backup.py
11
- app/main*.py
 
12
  test.json
13
  draft.py
 
8
  test*.py
9
  client_secret.json
10
  app/main_backup.py
11
+ app/main_*.py
12
+ app/test.py
13
  test.json
14
  draft.py
app/main.py ADDED
@@ -0,0 +1,1114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #main.py
2
+ import re
3
+ import time
4
+ import os
5
+ import logging
6
+ from typing import List, Dict, Optional, Set, Tuple
7
+
8
+ import google_auth_oauthlib.flow
9
+ import googleapiclient.discovery
10
+ import googleapiclient.errors
11
+ from google_auth_oauthlib.flow import Flow
12
+ from google.oauth2.credentials import Credentials
13
+ from googleapiclient.discovery import build
14
+ from fastapi import FastAPI, Request, Form, File, UploadFile, HTTPException, Depends
15
+ from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
16
+ from fastapi.templating import Jinja2Templates
17
+ from fastapi.staticfiles import StaticFiles
18
+ from fastapi.security import OAuth2PasswordBearer
19
+ from google.oauth2.credentials import Credentials
20
+ from pydantic import BaseModel
21
+
22
+ import unicodedata
23
+ import unidecode
24
+
25
+ import io
26
+ import pandas as pd
27
+ import json
28
+ from dotenv import load_dotenv
29
+ # For monitoring with Prometheus
30
+ load_dotenv()
31
+
32
+ # Configure logging at the top of the file
33
+ logging.basicConfig(
34
+ level=logging.INFO,
35
+ format='%(asctime)s - %(name)s - [%(levelname)s] %(message)s',
36
+ datefmt='%Y-%m-%d %H:%M:%S'
37
+ )
38
+ logger = logging.getLogger(__name__)
39
+
40
+ VISUAL_MAP = {
41
+ 'А': 'A','В': 'B','С': 'C','Е': 'E','Н': 'H','К': 'K','М': 'M','О': 'O','Р': 'P','Т': 'T','Х': 'X',
42
+ 'а': 'a','в': 'b','с': 'c','е': 'e','о': 'o','р': 'p','х': 'x','у': 'y',
43
+ 'Я': 'R','я': 'r',
44
+ 'ρ': 'p',
45
+ 'Π': 'P',
46
+ # etc...
47
+ }
48
+
49
+
50
+
51
+ # At the top of your main.py, after your imports:
52
+ # In a real DB model, you'd do this in a table.
53
+ # But for demonstration, let's store it in memory:
54
+ manual_overrides = {}
55
+ # This might be a class-level dict keyed by comment_id or (video_id, comment_id)
56
+
57
+
58
+ from google.oauth2 import service_account
59
+
60
+ def get_google_credentials():
61
+ if os.getenv("HF_SPACE") == "true":
62
+ # In Hugging Face Spaces: load from secrets
63
+ service_account_str = os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
64
+ if not service_account_str:
65
+ raise RuntimeError("Missing GOOGLE_SERVICE_ACCOUNT_JSON in Hugging Face secret.")
66
+ service_account_info = json.loads(service_account_str)
67
+ credentials = service_account.Credentials.from_service_account_info(service_account_info)
68
+ # Attach the service account info so we can retrieve it later
69
+ credentials._sa_info = service_account_info
70
+ return credentials
71
+ else:
72
+ # Local development: use OAuth flow
73
+ flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
74
+ "./app/client_secret.json",
75
+ scopes=[
76
+ "https://www.googleapis.com/auth/youtube.readonly",
77
+ "https://www.googleapis.com/auth/youtube.force-ssl"
78
+ ],
79
+ redirect_uri=os.getenv('YOUTUBE_REDIRECT_URI')
80
+ )
81
+ return flow.run_local_server(port=0)
82
+
83
+
84
+ def keep_comment(comment_id: str, video_id: str):
85
+ # Mark this comment as manually kept
86
+ manual_overrides[(video_id, comment_id)] = "safe"
87
+
88
+ # --- GamblingFilter class (with rule updates) ---
89
+ class GamblingFilter:
90
+ """
91
+ A high-performance filter for detecting online gambling-related comments.
92
+ Features include aggressive Unicode normalization, keyword matching, and pattern detection.
93
+ """
94
+
95
+ def __init__(self):
96
+ logger.info("Initializing GamblingFilter")
97
+ self._platform_names: Set[str] = {
98
+ 'agustoto', 'aero', 'aero88', 'dora', 'dora77', 'dewadora', 'pulau777', 'pulau', '777',
99
+ 'jptogel', 'mandalika', 'cnd88', 'axl', 'berkah99', 'weton88', 'garuda', 'hoki'
100
+ }
101
+
102
+ self._gambling_terms: Set[str] = {
103
+ 'jackpot', 'jp', 'wd', 'depo', 'cuan', 'gacor', 'gacir', 'jekpot', 'sultan',
104
+ 'rezeki nomplok', 'rezeki', 'menang', 'nomplok', 'deposit', 'withdraw', 'maxwin',
105
+ 'auto sultan', 'jepe', 'jepee', 'bikin nagih', 'berkah'
106
+ }
107
+
108
+ self._ambiguous_terms: Set[str] = {
109
+ 'auto', 'main', 'bermain', 'hasil', 'dapat', 'dapet', 'berkat'
110
+ }
111
+
112
+ self._safe_indicators: Set[str] = {
113
+ 'tidak mengandung', 'bukan perjudian', 'tanpa perjudian',
114
+ 'dokumentasi', 'profesional', 'pembelajaran'
115
+ }
116
+
117
+ self._gambling_contexts: List[str] = [
118
+ r'(main|bermain|coba).{1,30}(dapat|dapet|pro|jadi|langsung|menang|jp|cuan)',
119
+ r'(modal|depo).{1,30}(jadi|langsung|wd|cuan)',
120
+ r'(jp|jackpot|jekpot).{1,30}(gede|besar|pecah)',
121
+ r'(berkat|dari).{1,30}(rezeki|menang|cuan|sultan)',
122
+ r'(gacor|gacir).{1,30}(terus|parah|tiap|hari)',
123
+ r'(rezeki|cuan).{1,30}(nomplok|datang|mengalir|lancar)',
124
+ r'(hari ini).{1,30}(menang|cuan|rezeki|berkat)',
125
+ r'(malah|eh).{1,30}(jadi|dapat|dapet|rezeki)',
126
+ r'(auto).{1,30}(sultan|cuan|rezeki|kaya)',
127
+ r'(0\d:[0-5]\d).{1,30}(menang|rezeki|cuan|gacor)',
128
+ r'(iseng|coba).{1,30}(malah|jadi|eh|pro)',
129
+ r'(deposit|depo|wd).{1,30}(jadi|langsung)',
130
+ r'(langsung|auto).{1,30}(jp|cuan|sultan|rezeki)',
131
+ r'bikin\s+nagih',
132
+ r'gak\s+ada\s+duanya',
133
+ r'berkah.{0,20}rezeki',
134
+ r'puji\s+syukur'
135
+ ]
136
+
137
+ self._compiled_gambling_contexts = [
138
+ re.compile(pattern, re.IGNORECASE | re.DOTALL)
139
+ for pattern in self._gambling_contexts
140
+ ]
141
+
142
+ self._update_platform_pattern()
143
+
144
+ self._number_pattern = re.compile(r'(88|777|77|99|7+)')
145
+
146
+ def _update_platform_pattern(self):
147
+ """Recompile the platform name regex based on current _platform_names."""
148
+ platform_patterns = []
149
+ for platform in self._platform_names:
150
+ # chars = list(platform)
151
+ # strict = ''.join(f'[{c.upper()}{c.lower()}][^a-zA-Z0-9]*' for c in chars[:-1]) + f'[{chars[-1].upper()}{chars[-1].lower()}]'
152
+ # flexible = '.*?'.join(re.escape(c) for c in chars)
153
+ # platform_patterns.append(f'({strict})')
154
+ # platform_patterns.append(f'({flexible})')
155
+ chars = list(platform) # e.g. ['p', 'u', 'l', 'a', 'u']
156
+ # Each letter can be followed by up to 3 non-alphanumeric chars:
157
+ # (or fewer if you want to be more strict)
158
+ segments = [
159
+ f'[{c.upper()}{c.lower()}][^a-zA-Z0-9]{{0,3}}'
160
+ for c in chars[:-1]
161
+ ]
162
+ # Then the last char without trailing non-alphanumerics
163
+ segments.append(f'[{chars[-1].upper()}{chars[-1].lower()}]')
164
+ strict = ''.join(segments)
165
+ platform_patterns.append(strict)
166
+
167
+ self._platform_pattern = re.compile('|'.join(platform_patterns), re.DOTALL)
168
+
169
+ def add_rule(self, rule_type: str, rule_value: str):
170
+ """
171
+ Add a new rule based on the rule type.
172
+ Allowed types: 'platform', 'gambling_term', 'safe_indicator', 'gambling_context', 'ambiguous_term'
173
+ """
174
+ rule_type = rule_type.lower()
175
+ if rule_type == 'platform':
176
+ self._platform_names.add(rule_value)
177
+ self._update_platform_pattern()
178
+ elif rule_type == 'gambling_term':
179
+ self._gambling_terms.add(rule_value)
180
+ elif rule_type == 'safe_indicator':
181
+ self._safe_indicators.add(rule_value)
182
+ elif rule_type == 'gambling_context':
183
+ self._gambling_contexts.append(rule_value)
184
+ self._compiled_gambling_contexts.append(re.compile(rule_value, re.IGNORECASE | re.DOTALL))
185
+ elif rule_type == 'ambiguous_term':
186
+ self._ambiguous_terms.add(rule_value)
187
+ else:
188
+ raise ValueError("Unsupported rule type")
189
+
190
+ def _strip_all_formatting(self, text: str) -> str:
191
+ result = []
192
+ for c in text:
193
+ if c.isalnum() or c.isspace():
194
+ result.append(c.lower())
195
+ return ''.join(result)
196
+
197
+ def _aggressive_normalize_text(self, text: str) -> str:
198
+ normalized = unicodedata.normalize('NFKD', text)
199
+ ascii_text = ''.join(c for c in normalized if ord(c) < 128)
200
+ return ascii_text.lower()
201
+
202
+
203
+
204
+ def _robust_normalize(self, text: str) -> str:
205
+ """
206
+ 1) Replace visually-similar letters (Cyrillic/Greek) with Latin equivalents.
207
+ 2) Then use unidecode to handle bold/italic forms, fullwidth, etc.
208
+ 3) Lowercase the result.
209
+ """
210
+ # Step 1: custom pass for visual lookalikes
211
+ mapped_chars = []
212
+ for ch in text:
213
+ if ch in VISUAL_MAP:
214
+ mapped_chars.append(VISUAL_MAP[ch])
215
+ else:
216
+ mapped_chars.append(ch)
217
+ mapped_text = ''.join(mapped_chars)
218
+
219
+ # Step 2: apply normal Unicode decomposition + unidecode
220
+ # This handles bold/italic/mathematical letters, fullwidth forms, etc.
221
+ decomposed = unicodedata.normalize('NFKD', mapped_text)
222
+ ascii_equiv = unidecode.unidecode(decomposed)
223
+
224
+ # Step 3: lowercase the result
225
+ return ascii_equiv.lower()
226
+
227
+
228
+ def _extract_platform_names(self, text: str) -> List[str]:
229
+ matches = []
230
+ pattern_matches = self._platform_pattern.findall(text)
231
+ if pattern_matches:
232
+ pattern_matches = [m for sublist in pattern_matches for m in sublist if m]
233
+ matches.extend(pattern_matches)
234
+ normalized = self._robust_normalize(text)
235
+ stripped = self._strip_all_formatting(text)
236
+ for platform in self._platform_names:
237
+ if platform in normalized or platform in stripped:
238
+ if not any(platform in m.lower() for m in matches):
239
+ matches.append(platform)
240
+ if '88' in text or '88' in normalized:
241
+ if not any('88' in m for m in matches):
242
+ matches.append('88')
243
+ if '777' in text or '777' in normalized:
244
+ if not any('777' in m for m in matches):
245
+ matches.append('777')
246
+ return matches
247
+
248
+ def normalize_text(self, text: str) -> str:
249
+ normalized = unicodedata.normalize('NFKD', text)
250
+ normalized = ''.join(c for c in normalized if ord(c) < 128 or c.isspace())
251
+ return normalized.lower()
252
+
253
+ def is_gambling_comment(self, text: str, threshold: float = 0.55) -> Tuple[bool, Dict]:
254
+ start_time = time.time()
255
+ logger.info(f"Analyzing comment for gambling content: {text[:100]}...")
256
+ metrics = {
257
+ 'platform_matches': [],
258
+ 'gambling_term_matches': [],
259
+ 'context_matches': [],
260
+ 'safe_indicators': [],
261
+ 'has_numbers': False,
262
+ 'confidence_score': 0.0,
263
+ 'processing_time_ms': 0
264
+ }
265
+
266
+ normalized_text = self.normalize_text(text)
267
+ stripped_text = self._strip_all_formatting(text)
268
+ aggressive_text = self._robust_normalize(text)
269
+
270
+ for indicator in self._safe_indicators:
271
+ if indicator in normalized_text.lower():
272
+ metrics['safe_indicators'].append(indicator)
273
+
274
+ if len(metrics['safe_indicators']) > 0:
275
+ metrics['confidence_score'] = 0.0
276
+ metrics['processing_time_ms'] = (time.time() - start_time) * 1000
277
+ return False, metrics
278
+
279
+ platform_matches = self._extract_platform_names(text)
280
+ if platform_matches:
281
+ metrics['platform_matches'] = platform_matches
282
+
283
+ for term in self._gambling_terms:
284
+ if (term in normalized_text.lower() or
285
+ term in stripped_text.lower() or
286
+ term in aggressive_text.lower()):
287
+ metrics['gambling_term_matches'].append(term)
288
+
289
+ if self._number_pattern.search(normalized_text):
290
+ metrics['has_numbers'] = True
291
+
292
+ for pattern in self._compiled_gambling_contexts:
293
+ match = pattern.search(normalized_text)
294
+ if match:
295
+ metrics['context_matches'].append(match.group(0))
296
+ match = pattern.search(aggressive_text)
297
+ if match and match.group(0) not in metrics['context_matches']:
298
+ metrics['context_matches'].append(match.group(0))
299
+
300
+ platform_score = min(len(metrics['platform_matches']) * 1.0, 1)
301
+ term_score = min(len(metrics['gambling_term_matches']) * 0.2, 0.4)
302
+ context_score = min(len(metrics['context_matches']) * 0.2, 0.4)
303
+ number_score = 0.1 if metrics['has_numbers'] else 0
304
+
305
+ if platform_score > 0 and (term_score > 0 or context_score > 0):
306
+ total_score = platform_score + term_score + context_score + number_score
307
+ elif context_score > 0.2 and term_score > 0:
308
+ total_score = context_score + term_score + number_score
309
+ else:
310
+ total_score = max(platform_score, term_score, context_score) * 0.8
311
+
312
+ metrics['confidence_score'] = min(total_score, 1.0)
313
+
314
+ if ("berkah" in normalized_text.lower() or "berkah" in aggressive_text.lower()) and \
315
+ ("rezeki" in normalized_text.lower() or "rezeki" in aggressive_text.lower()) and \
316
+ len(metrics['platform_matches']) > 0:
317
+ metrics['confidence_score'] = max(metrics['confidence_score'], 0.7)
318
+ if "Special case: berkah+rezeki+platform" not in metrics['context_matches']:
319
+ metrics['context_matches'].append("Special case: berkah+rezeki+platform")
320
+
321
+ elif ("puji" in normalized_text.lower() or "puji" in aggressive_text.lower()) and \
322
+ ("syukur" in normalized_text.lower() or "syukur" in aggressive_text.lower()) and \
323
+ len(metrics['platform_matches']) > 0:
324
+ metrics['confidence_score'] = max(metrics['confidence_score'], 0.7)
325
+ if "Special case: puji+syukur+platform" not in metrics['context_matches']:
326
+ metrics['context_matches'].append("Special case: puji+syukur+platform")
327
+
328
+ metrics['processing_time_ms'] = (time.time() - start_time) * 1000
329
+ is_gambling = metrics['confidence_score'] >= threshold
330
+ return is_gambling, metrics
331
+
332
+ def filter_comments(self, comments: List[str], threshold: float = 0.55) -> Dict[str, List]:
333
+ result = {
334
+ 'gambling_comments': [],
335
+ 'safe_comments': [],
336
+ 'metrics': []
337
+ }
338
+
339
+ for comment in comments:
340
+ is_gambling, metrics = self.is_gambling_comment(comment, threshold)
341
+ if is_gambling:
342
+ result['gambling_comments'].append(comment)
343
+ else:
344
+ result['safe_comments'].append(comment)
345
+ metrics['original_text'] = comment
346
+ result['metrics'].append(metrics)
347
+ return result
348
+
349
+
350
+ class YouTubeCommentModerator:
351
+ def __init__(self,
352
+ client_secrets_path: str = "./app/client_secret.json",
353
+ gambling_filter: Optional[GamblingFilter] = None):
354
+ """
355
+ Initialize the YouTube Comment Moderator with configurable settings.
356
+
357
+ :param client_secrets_path: Path to OAuth 2.0 client secrets file
358
+ :param gambling_filter: Optional pre-configured GamblingFilter instance
359
+ """
360
+ # Setup logging
361
+ logging.basicConfig(
362
+ level=logging.INFO,
363
+ format='%(asctime)s - [%(levelname)s] %(message)s',
364
+ datefmt='%Y-%m-%d %H:%M:%S'
365
+ )
366
+ self.logger = logging.getLogger(__name__)
367
+
368
+ # OAuth configuration
369
+ self.client_secrets_path = client_secrets_path
370
+ self.scopes = [
371
+ "https://www.googleapis.com/auth/youtube.readonly",
372
+ "https://www.googleapis.com/auth/youtube.force-ssl"
373
+ ]
374
+
375
+ # Disable OAuthlib's HTTPS verification when running locally
376
+ os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
377
+
378
+ # YouTube service
379
+ self.youtube_service = None
380
+
381
+ # Gambling Filter
382
+ self.gambling_filter = gambling_filter or GamblingFilter()
383
+
384
+ def authenticate(self) -> bool:
385
+ """
386
+ Authenticate with YouTube Data API.
387
+
388
+ :return: Boolean indicating successful authentication
389
+ """
390
+ try:
391
+ # flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
392
+ # self.client_secrets_path, self.scopes)
393
+ credentials = get_google_credentials()
394
+
395
+ self.youtube_service = googleapiclient.discovery.build(
396
+ "youtube", "v3", credentials=credentials
397
+ )
398
+ self.logger.info("YouTube API authentication successful.")
399
+ return True
400
+ except Exception as e:
401
+ self.logger.error(f"Authentication failed: {e}")
402
+ return False
403
+
404
+ def moderate_video_comments(self, video_id: str, threshold: float = 0.55) -> Dict:
405
+ if not self.youtube_service:
406
+ self.logger.error("YouTube service not authenticated.")
407
+ return {"error": "Not authenticated"}
408
+
409
+ try:
410
+ comments = []
411
+ request = self.youtube_service.commentThreads().list(
412
+ part="snippet",
413
+ videoId=video_id,
414
+ maxResults=100,
415
+ textFormat="plainText"
416
+ )
417
+ response = request.execute()
418
+
419
+ moderation_results = {
420
+ "total_comments": 0,
421
+ "gambling_comments": [],
422
+ "safe_comments": [],
423
+ "moderation_metrics": []
424
+ }
425
+
426
+ while request is not None:
427
+ for item in response.get("items", []):
428
+ comment_id = item["snippet"]["topLevelComment"]["id"]
429
+ comment_snippet = item["snippet"]["topLevelComment"]["snippet"]
430
+ comment_text = comment_snippet["textDisplay"]
431
+
432
+ # Check for manual override first
433
+ if manual_overrides.get((video_id, comment_id)) == "safe":
434
+ # The user previously pressed "Keep" - skip the gambling filter
435
+ is_gambling = False
436
+ metrics = {"confidence_score": 0.0}
437
+ else:
438
+ # Normal path - filter it
439
+ is_gambling, metrics = self.gambling_filter.is_gambling_comment(comment_text, threshold)
440
+
441
+ comment_info = {
442
+ "id": comment_id,
443
+ "text": comment_text,
444
+ "author": comment_snippet["authorDisplayName"],
445
+ "is_gambling": is_gambling,
446
+ "metrics": metrics
447
+ }
448
+
449
+ moderation_results["total_comments"] += 1
450
+
451
+ if is_gambling:
452
+ moderation_results["gambling_comments"].append(comment_info)
453
+ else:
454
+ moderation_results["safe_comments"].append(comment_info)
455
+
456
+ metrics["original_text"] = comment_text
457
+ moderation_results["moderation_metrics"].append(metrics)
458
+
459
+ # Handle pagination if available
460
+ request = self.youtube_service.commentThreads().list_next(request, response)
461
+ if request:
462
+ response = request.execute()
463
+ else:
464
+ break
465
+
466
+ return moderation_results
467
+
468
+ except Exception as e:
469
+ self.logger.error(f"Error moderating comments: {e}")
470
+ return {"error": str(e)}
471
+
472
+
473
+ def delete_comment(self, comment_id: str) -> bool:
474
+ """
475
+ Delete a specific comment.
476
+
477
+ :param comment_id: YouTube comment ID
478
+ :return: Boolean indicating successful deletion
479
+ """
480
+ try:
481
+
482
+ # self.youtube_service.comments().delete(id=comment_id).execute()
483
+ self.youtube_service.comments().setModerationStatus(
484
+ id=comment_id,
485
+ moderationStatus="rejected"
486
+ ).execute()
487
+ self.logger.info(f"Comment {comment_id} deleted successfully.")
488
+ return True
489
+ except Exception as e:
490
+ self.logger.error(f"Failed to delete comment {comment_id}: {e}")
491
+ return False
492
+
493
+ def get_channel_videos(self, max_results: int = 50) -> List[Dict]:
494
+ """
495
+ Retrieve videos from authenticated user's channel.
496
+
497
+ :param max_results: Maximum number of videos to retrieve
498
+ :return: List of video details
499
+ """
500
+ if not self.youtube_service:
501
+ self.logger.error("YouTube service not authenticated.")
502
+ return []
503
+
504
+ try:
505
+ request = self.youtube_service.search().list(
506
+ part="snippet",
507
+ channelId=self._get_channel_id(),
508
+ maxResults=max_results,
509
+ type="video"
510
+ )
511
+ response = request.execute()
512
+
513
+ videos = []
514
+ for item in response.get("items", []):
515
+ video_info = {
516
+ "id": item["id"]["videoId"],
517
+ "title": item["snippet"]["title"],
518
+ "thumbnail": item["snippet"]["thumbnails"]["default"]["url"]
519
+ }
520
+ videos.append(video_info)
521
+
522
+ return videos
523
+ except Exception as e:
524
+ self.logger.error(f"Error retrieving videos: {e}")
525
+ return []
526
+
527
+ def _get_channel_id(self) -> Optional[str]:
528
+ """
529
+ Retrieve the authenticated user's channel ID.
530
+
531
+ :return: Channel ID or None
532
+ """
533
+ try:
534
+ request = self.youtube_service.channels().list(part="id", mine=True)
535
+ response = request.execute()
536
+ return response["items"][0]["id"]
537
+ except Exception as e:
538
+ self.logger.error(f"Error retrieving channel ID: {e}")
539
+ return None
540
+
541
+ class User(BaseModel):
542
+ username: str
543
+ email: Optional[str] = None
544
+ youtube_credentials: Optional[Dict] = None
545
+
546
+ class UserDatabase:
547
+ """
548
+ In-memory user database. In a production app,
549
+ replace with a proper database like SQLAlchemy
550
+ """
551
+ users = {}
552
+
553
+ @classmethod
554
+ def create_user(cls, username: str, credentials: Dict):
555
+ user = User(username=username, youtube_credentials=credentials)
556
+ cls.users[username] = user
557
+ return user
558
+
559
+ @classmethod
560
+ def get_user(cls, username: str):
561
+ return cls.users.get(username)
562
+
563
+ class YouTubeAuthenticator:
564
+ @staticmethod
565
+ def authenticate_with_client_secrets(client_secrets_file=None):
566
+ try:
567
+ credentials = get_google_credentials()
568
+ return credentials
569
+ except Exception as e:
570
+ raise HTTPException(status_code=400, detail=f"Authentication failed: {str(e)}")
571
+ # --- FastAPI application setup ---
572
+ app = FastAPI()
573
+ app.mount("/static", StaticFiles(directory="static"), name="static")
574
+ templates = Jinja2Templates(directory="templates")
575
+
576
+ # Create a single instance of the GamblingFilter
577
+ filter_instance = GamblingFilter()
578
+
579
+ # ----Google ----
580
+ class GoogleOAuthHandler:
581
+ def __init__(self):
582
+ # Configuration paths and settings
583
+ self.client_secrets_file = "./app/client_secret.json"
584
+ self.scopes = [
585
+ 'https://www.googleapis.com/auth/youtube.readonly',
586
+ 'https://www.googleapis.com/auth/userinfo.profile'
587
+ ]
588
+ self.redirect_uri = os.getenv('YOUTUBE_REDIRECT_URI', 'http://localhost:8000/oauth/callback')
589
+
590
+ def create_oauth_flow(self):
591
+ """
592
+ Create OAuth 2.0 Flow for Google Authorization
593
+ """
594
+ flow = Flow.from_client_secrets_file(
595
+ self.client_secrets_file,
596
+ scopes=self.scopes,
597
+ redirect_uri=self.redirect_uri
598
+ )
599
+ return flow
600
+
601
+ def initiate_oauth_flow(self):
602
+ """
603
+ Generate Authorization URL for OAuth Flow
604
+
605
+ This method can be called when you want to start the OAuth authentication process.
606
+ In your case, it would be triggered from the login route.
607
+ """
608
+ flow = self.create_oauth_flow()
609
+
610
+ # Generate authorization URL
611
+ authorization_url, state = flow.authorization_url(
612
+ access_type='offline', # Ensures we get a refresh token
613
+ prompt='consent', # Forces user to see and accept consent screen
614
+ include_granted_scopes='true'
615
+ )
616
+
617
+ return authorization_url
618
+
619
+ def handle_oauth_callback(self, authorization_code):
620
+ """
621
+ Handle the OAuth callback and retrieve user credentials
622
+
623
+ This method exchanges the authorization code for access and refresh tokens
624
+ """
625
+ try:
626
+ # Create flow and exchange authorization code for tokens
627
+ flow = self.create_oauth_flow()
628
+ flow.fetch_token(code=authorization_code)
629
+
630
+ # Get credentials
631
+ credentials = flow.credentials
632
+
633
+ # Fetch user information
634
+ oauth2_client = build('oauth2', 'v2', credentials=credentials)
635
+ user_info = oauth2_client.userinfo().get().execute()
636
+
637
+ # Build YouTube service to get channel details
638
+ youtube_service = build('youtube', 'v3', credentials=credentials)
639
+ channel_req = youtube_service.channels().list(part="snippet", mine=True)
640
+ channel_resp = channel_req.execute()
641
+
642
+ # Extract channel username or use email as fallback
643
+ if "items" in channel_resp and len(channel_resp["items"]) > 0:
644
+ channel_username = channel_resp['items'][0]['snippet']['title']
645
+ else:
646
+ channel_username = user_info.get('email', 'unknown_user')
647
+
648
+ # Convert credentials to dict for storage
649
+ credentials_dict = {
650
+ 'token': credentials.token,
651
+ 'refresh_token': credentials.refresh_token,
652
+ 'token_uri': credentials.token_uri,
653
+ 'client_id': credentials.client_id,
654
+ 'client_secret': credentials.client_secret,
655
+ 'scopes': credentials.scopes
656
+ }
657
+
658
+ return {
659
+ 'username': channel_username,
660
+ 'credentials': credentials_dict,
661
+ 'user_info': user_info
662
+ }
663
+
664
+ except Exception as e:
665
+ raise HTTPException(status_code=400, detail=f"OAuth callback failed: {str(e)}")
666
+
667
+ moderator = YouTubeCommentModerator(gambling_filter=filter_instance)
668
+ @app.post("/moderate_video")
669
+ async def moderate_video(request: Request, video_id: str = Form(...), threshold: float = Form(0.55)):
670
+ if not moderator.youtube_service:
671
+ result = {"error": "YouTube service not authenticated. Please authenticate first."}
672
+ else:
673
+ result = moderator.moderate_video_comments(video_id, threshold)
674
+
675
+ return templates.TemplateResponse("index.html", {
676
+ "request": request,
677
+ "result": result,
678
+ "video_id": video_id,
679
+ "rules": {
680
+ "platform": sorted(list(filter_instance._platform_names)),
681
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
682
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
683
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
684
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
685
+ }
686
+ })
687
+
688
+ @app.delete("/api/comments/{comment_id}")
689
+ async def api_delete_comment(
690
+ request: Request,
691
+ comment_id: str,
692
+ video_id: str
693
+ ):
694
+ current_user = get_current_user_from_cookie(request)
695
+ user_creds = Credentials.from_authorized_user_info(current_user.youtube_credentials)
696
+ user_moderator = YouTubeCommentModerator(gambling_filter=GamblingFilter())
697
+ user_moderator.youtube_service = googleapiclient.discovery.build(
698
+ "youtube", "v3",
699
+ credentials=user_creds
700
+ )
701
+ success = user_moderator.delete_comment(comment_id)
702
+ return {"success": success}
703
+
704
+
705
+ # OAuth2 Password Bearer for session management
706
+ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
707
+
708
+ # Hardcoded client secrets path (you'll need to replace this with your actual path)
709
+ CLIENT_SECRETS_PATH = "./app/client_secret.json"
710
+
711
+ # 1) Root route => Decide if user is logged in; if not, go to /login
712
+ @app.get("/", response_class=HTMLResponse)
713
+ async def root_redirect(request: Request):
714
+ token = request.cookies.get("token")
715
+ if token:
716
+ return RedirectResponse(url="/videos", status_code=303)
717
+ else:
718
+ return RedirectResponse(url="/login", status_code=303)
719
+
720
+
721
+ # 2) Show the login form (GET /login)
722
+ @app.get("/login", response_class=HTMLResponse)
723
+ async def login_form(request: Request):
724
+ return templates.TemplateResponse("login.html", {"request": request})
725
+
726
+
727
+ # 3) Handle login submission (POST /login) => Google OAuth => /videos
728
+ @app.post("/login", response_class=HTMLResponse)
729
+ async def login(
730
+ request: Request,
731
+ username: str = Form(None) # Make username optional
732
+ ):
733
+ try:
734
+ # Get credentials (will return service account creds on HF, OAuth creds locally)
735
+ credentials = get_google_credentials()
736
+ youtube_service = googleapiclient.discovery.build(
737
+ "youtube", "v3", credentials=credentials
738
+ )
739
+
740
+ # If running in Hugging Face Space, use a default username
741
+ if os.getenv("HF_SPACE") == "true":
742
+ channel_username = "hf_space_user"
743
+ else:
744
+ req = youtube_service.channels().list(part="snippet", mine=True)
745
+ resp = req.execute()
746
+ if "items" in resp and len(resp["items"]) > 0:
747
+ channel_username = resp['items'][0]['snippet']['title']
748
+ else:
749
+ channel_username = "unknown_user"
750
+
751
+ # Convert credentials to dict for storage
752
+ import json
753
+ if hasattr(credentials, "to_json"):
754
+ credentials_dict = json.loads(credentials.to_json())
755
+ elif hasattr(credentials, "_sa_info"):
756
+ credentials_dict = credentials._sa_info
757
+ else:
758
+ credentials_dict = {}
759
+
760
+ # Create or update user in our "database"
761
+ user = UserDatabase.create_user(channel_username, credentials_dict)
762
+
763
+ # Determine cookie settings based on environment
764
+ if os.getenv("HF_SPACE") == "true":
765
+ secure_cookie = True
766
+ samesite_value = "none"
767
+ else:
768
+ secure_cookie = False
769
+ samesite_value = "lax"
770
+
771
+ # Set the user token in a cookie and redirect to /videos
772
+ response = RedirectResponse(url="/videos", status_code=303)
773
+ response.set_cookie(
774
+ key="token",
775
+ value=channel_username,
776
+ max_age=1800,
777
+ httponly=True,
778
+ secure=secure_cookie,
779
+ samesite=samesite_value
780
+ )
781
+ return response
782
+
783
+ except Exception as e:
784
+ return templates.TemplateResponse("login.html", {
785
+ "request": request,
786
+ "error": f"Authentication failed: {str(e)}"
787
+ })
788
+
789
+
790
+ @app.post("/api/comments/keep/{comment_id}")
791
+ async def api_keep_comment(
792
+ request: Request,
793
+ comment_id: str,
794
+ video_id: str
795
+ ):
796
+ try:
797
+ logging.debug(f"Received keep request for comment_id: {comment_id}, video_id: {video_id}")
798
+
799
+ # Get current user's credentials
800
+ current_user = get_current_user_from_cookie(request)
801
+ logging.debug(f"Current user: {current_user.username}")
802
+ user_creds = Credentials.from_authorized_user_info(current_user.youtube_credentials)
803
+
804
+ # Create a moderator instance with user credentials
805
+ user_moderator = YouTubeCommentModerator(gambling_filter=GamblingFilter())
806
+ user_moderator.youtube_service = googleapiclient.discovery.build(
807
+ "youtube", "v3",
808
+ credentials=user_creds
809
+ )
810
+
811
+ logging.debug("Setting moderation status to 'published' on YouTube...")
812
+ # Mark comment as approved on YouTube
813
+ result = user_moderator.youtube_service.comments().setModerationStatus(
814
+ id=comment_id,
815
+ moderationStatus="published" # This marks the comment as approved
816
+ ).execute()
817
+ logging.debug(f"YouTube API response: {result}")
818
+
819
+ # Add the comment ID to the manual overrides so it won't be reflagged
820
+ keep_comment(comment_id, video_id) # Ensure this function is defined and working
821
+ logging.debug("Manual override saved for comment.")
822
+
823
+ return {"success": True, "message": "Comment kept successfully"}
824
+
825
+ except Exception as e:
826
+ logging.error(f"Error keeping comment: {e}", exc_info=True)
827
+ return {"success": False, "error": str(e)}
828
+
829
+
830
+ @app.get("/refresh_comments/{video_id}")
831
+ async def refresh_video_comments(
832
+ request: Request,
833
+ video_id: str,
834
+ threshold: float = 0.55
835
+ ):
836
+ """
837
+ Refresh comments for a specific video, reapplying moderation.
838
+
839
+ :param request: Request object
840
+ :param video_id: ID of the video to refresh comments for
841
+ :param threshold: Gambling confidence threshold
842
+ :return: Rendered template with updated comments
843
+ """
844
+ # Get current user's credentials
845
+ current_user = get_current_user_from_cookie(request)
846
+
847
+ if not current_user:
848
+ return RedirectResponse(url="/login", status_code=303)
849
+
850
+ try:
851
+ # Recreate moderator with current user's credentials
852
+ user_creds = Credentials.from_authorized_user_info(current_user.youtube_credentials)
853
+
854
+ moderator = YouTubeCommentModerator(gambling_filter=filter_instance)
855
+ moderator.youtube_service = googleapiclient.discovery.build(
856
+ "youtube", "v3",
857
+ credentials=user_creds
858
+ )
859
+
860
+ # Moderate comments for the video
861
+ result = moderator.moderate_video_comments(video_id, threshold)
862
+
863
+ # Fetch video details to pass to template
864
+ youtube_service = googleapiclient.discovery.build(
865
+ "youtube", "v3",
866
+ credentials=user_creds
867
+ )
868
+ video_request = youtube_service.videos().list(
869
+ part="snippet",
870
+ id=video_id
871
+ )
872
+ video_response = video_request.execute()
873
+ video_info = video_response['items'][0]['snippet'] if video_response['items'] else {}
874
+
875
+ return templates.TemplateResponse("video_comments.html", {
876
+ "request": request,
877
+ "video": {
878
+ "id": video_id,
879
+ "title": video_info.get('title', 'Unknown Video')
880
+ },
881
+ "safe_comments": result.get('safe_comments', []),
882
+ "flagged_comments": result.get('gambling_comments', []),
883
+ "total_comments": result.get('total_comments', 0)
884
+ })
885
+
886
+ except Exception as e:
887
+ logging.error(f"Error refreshing comments: {e}")
888
+ return templates.TemplateResponse("error.html", {
889
+ "request": request,
890
+ "error": f"Failed to refresh comments: {str(e)}"
891
+ })
892
+
893
+
894
+ # 4) Protected route to fetch current user from cookie
895
+ def get_current_user(token: str = Depends(oauth2_scheme)):
896
+ username = token # In a real app, decode/validate token properly
897
+ user = UserDatabase.get_user(username)
898
+ if not user:
899
+ raise HTTPException(status_code=401, detail="Invalid authentication credentials")
900
+ return user
901
+
902
+ def get_current_user_from_cookie(request: Request):
903
+ token = request.cookies.get("token")
904
+ if not token:
905
+ raise HTTPException(status_code=401, detail="Not authenticated")
906
+ user = UserDatabase.get_user(token)
907
+ if not user:
908
+ raise HTTPException(status_code=401, detail="Invalid authentication credentials")
909
+ return user
910
+
911
+ @app.exception_handler(HTTPException)
912
+ async def auth_exception_handler(request: Request, exc: HTTPException):
913
+ if exc.status_code == 401:
914
+ # Redirect the user to the login page
915
+ return RedirectResponse(url="/login")
916
+ # For other HTTP errors, return a JSON response
917
+ return JSONResponse(
918
+ status_code=exc.status_code,
919
+ content={"detail": exc.detail},
920
+ )
921
+
922
+ # 5) List user's videos (GET /videos) - requires login
923
+ @app.get("/videos", response_class=HTMLResponse)
924
+ async def list_videos(request: Request, current_user: User = Depends(get_current_user_from_cookie)):
925
+ # Reconstruct the credentials from the stored dictionary
926
+ user_creds = Credentials.from_authorized_user_info(current_user.youtube_credentials)
927
+ user_moderator = YouTubeCommentModerator(gambling_filter=GamblingFilter())
928
+ user_moderator.youtube_service = googleapiclient.discovery.build(
929
+ "youtube", "v3", credentials=user_creds
930
+ )
931
+
932
+ videos = user_moderator.get_channel_videos()
933
+
934
+ return templates.TemplateResponse("videos.html", {
935
+ "request": request,
936
+ "current_user": current_user,
937
+ "videos": videos
938
+ })
939
+
940
+
941
+
942
+ # 6) Moderate a specific video's comments (GET /video/{video_id}) - requires login
943
+ @app.get("/video/{video_id}", response_class=HTMLResponse)
944
+ async def moderate_video_comments(
945
+ request: Request,
946
+ video_id: str,
947
+ current_user: User = Depends(get_current_user_from_cookie)
948
+ ):
949
+ # Reconstruct the Credentials object from the stored dict
950
+ user_creds = Credentials.from_authorized_user_info(current_user.youtube_credentials)
951
+ user_moderator = YouTubeCommentModerator(gambling_filter=GamblingFilter())
952
+ user_moderator.youtube_service = googleapiclient.discovery.build(
953
+ "youtube", "v3",
954
+ credentials=user_creds
955
+ )
956
+
957
+ moderation_results = user_moderator.moderate_video_comments(video_id)
958
+
959
+ return templates.TemplateResponse("video_comments.html", {
960
+ "request": request,
961
+ "current_user": current_user,
962
+ "video": {"id": video_id, "title": "Sample Video Title"}, # Optionally fetch actual title
963
+ "safe_comments": moderation_results.get('safe_comments', []),
964
+ "flagged_comments": moderation_results.get('gambling_comments', [])
965
+ })
966
+
967
+ # 7) Logout => remove token
968
+ @app.get("/logout")
969
+ async def logout():
970
+ response = RedirectResponse(url="/login")
971
+ response.delete_cookie("token")
972
+ return response
973
+
974
+ from jinja2 import Undefined
975
+ import json
976
+
977
+ def pretty_json(value):
978
+ if isinstance(value, Undefined):
979
+ return ""
980
+ return json.dumps(value, ensure_ascii=False, indent=2)
981
+
982
+ templates.env.filters["pretty_json"] = pretty_json
983
+
984
+ @app.get("/classify", response_class=HTMLResponse)
985
+ async def read_root(request: Request):
986
+ return templates.TemplateResponse("index.html", {
987
+ "request": request,
988
+ "result": None,
989
+ "comment": "",
990
+ "rules": {
991
+ "platform": sorted(list(filter_instance._platform_names)),
992
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
993
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
994
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
995
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
996
+ }
997
+ })
998
+
999
+ @app.post("/classify", response_class=HTMLResponse)
1000
+ async def classify_comment(request: Request, comment: str = Form(...)):
1001
+ is_gambling, metrics = filter_instance.is_gambling_comment(comment)
1002
+ result = {"is_gambling": is_gambling, "metrics": metrics}
1003
+ return templates.TemplateResponse("index.html", {
1004
+ "request": request,
1005
+ "result": result,
1006
+ "comment": comment,
1007
+ "rules": {
1008
+ "platform": sorted(list(filter_instance._platform_names)),
1009
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
1010
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
1011
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
1012
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
1013
+ }
1014
+ })
1015
+
1016
+ @app.post("/add_rule", response_class=HTMLResponse)
1017
+ async def add_rule(request: Request, rule_type: str = Form(...), rule_value: str = Form(...)):
1018
+ try:
1019
+ filter_instance.add_rule(rule_type, rule_value)
1020
+ message = f"Added rule '{rule_value}' as type '{rule_type}'."
1021
+ except ValueError as e:
1022
+ message = str(e)
1023
+ return templates.TemplateResponse("index.html", {
1024
+ "request": request,
1025
+ "result": {"message": message},
1026
+ "comment": "",
1027
+ "rules": {
1028
+ "platform": sorted(list(filter_instance._platform_names)),
1029
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
1030
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
1031
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
1032
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
1033
+ }
1034
+ })
1035
+
1036
+ @app.post("/upload", response_class=HTMLResponse)
1037
+ async def upload_file(request: Request, file: UploadFile = File(...), column: str = Form("comment")):
1038
+ content = await file.read()
1039
+ try:
1040
+ if file.filename.endswith('.csv'):
1041
+ df = pd.read_csv(io.BytesIO(content))
1042
+ elif file.filename.endswith('.xls') or file.filename.endswith('.xlsx'):
1043
+ df = pd.read_excel(io.BytesIO(content))
1044
+ else:
1045
+ raise ValueError("Unsupported file type.")
1046
+ except Exception as e:
1047
+ return templates.TemplateResponse("index.html", {
1048
+ "request": request,
1049
+ "result": {"message": f"Error reading file: {e}"},
1050
+ "comment": "",
1051
+ "rules": {
1052
+ "platform": sorted(list(filter_instance._platform_names)),
1053
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
1054
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
1055
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
1056
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
1057
+ }
1058
+ })
1059
+
1060
+ if column not in df.columns:
1061
+ return templates.TemplateResponse("index.html", {
1062
+ "request": request,
1063
+ "result": {"message": f"Column '{column}' not found. Available columns: {list(df.columns)}"},
1064
+ "comment": "",
1065
+ "rules": {
1066
+ "platform": sorted(list(filter_instance._platform_names)),
1067
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
1068
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
1069
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
1070
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
1071
+ }
1072
+ })
1073
+
1074
+ comments = df[column].astype(str).tolist()
1075
+ results = filter_instance.filter_comments(comments)
1076
+
1077
+ # Return the results as part of the template context.
1078
+ return templates.TemplateResponse("index.html", {
1079
+ "request": request,
1080
+ "result": {"upload_result": results},
1081
+ "comment": "",
1082
+ "rules": {
1083
+ "platform": sorted(list(filter_instance._platform_names)),
1084
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
1085
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
1086
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
1087
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
1088
+ }
1089
+ })
1090
+
1091
+ @app.post("/add_visual_char")
1092
+ async def add_visual_char(request: Request,
1093
+ char: str = Form(...),
1094
+ ascii_equiv: str = Form(...)):
1095
+ # Add a new mapping
1096
+ VISUAL_MAP[char] = ascii_equiv
1097
+
1098
+ message = f"Added visual map entry '{char}' -> '{ascii_equiv}'."
1099
+ return templates.TemplateResponse("index.html", {
1100
+ "request": request,
1101
+ "result": {"message": message},
1102
+ "comment": "",
1103
+ "rules": {
1104
+ "platform": sorted(list(filter_instance._platform_names)),
1105
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
1106
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
1107
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
1108
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
1109
+ }
1110
+ })
1111
+
1112
+
1113
+
1114
+