ariansyahdedy commited on
Commit
15e12f6
·
1 Parent(s): 92c9bc0

First commit

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. Dockerfile +16 -0
  3. app/main.py +404 -0
  4. requirements.txt +0 -0
  5. templates/index.html +519 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ venv
2
+ app/__pycache__
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/main.py ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+ import re
3
+ import time
4
+ import os
5
+ import json
6
+ import pathlib
7
+ import logging
8
+ import unicodedata
9
+ import io
10
+ import traceback
11
+ import unidecode
12
+ import pandas as pd
13
+ from dotenv import load_dotenv
14
+
15
+
16
+ from fastapi import FastAPI, Request, Form, File, UploadFile, HTTPException, Depends
17
+ from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
18
+ from fastapi.templating import Jinja2Templates
19
+ from fastapi.staticfiles import StaticFiles
20
+ from fastapi.security import OAuth2PasswordBearer
21
+ from pydantic import BaseModel
22
+
23
+
24
+
25
+ load_dotenv()
26
+
27
+ # Configure logging at the top of the file
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format='%(asctime)s - %(name)s - [%(levelname)s] %(message)s',
31
+ datefmt='%Y-%m-%d %H:%M:%S'
32
+ )
33
+ logger = logging.getLogger(__name__)
34
+
35
+ # Global visual map for replacing visually similar characters.
36
+ VISUAL_MAP = {
37
+ 'А': 'A', 'В': 'B', 'С': 'C', 'Е': 'E', 'Н': 'H', 'К': 'K', 'М': 'M',
38
+ 'О': 'O', 'Р': 'P', 'Т': 'T', 'Х': 'X',
39
+ 'а': 'a', 'в': 'b', 'с': 'c', 'е': 'e', 'о': 'o', 'р': 'p', 'х': 'x', 'у': 'y',
40
+ 'Я': 'R', 'я': 'r',
41
+ 'ρ': 'p',
42
+ 'Π': 'P',
43
+ # etc...
44
+ }
45
+
46
+
47
+ # --- GamblingFilter class (with rule updates) ---
48
+ class GamblingFilter:
49
+ """
50
+ A high-performance filter for detecting online gambling-related comments.
51
+ Features include aggressive Unicode normalization, keyword matching, and pattern detection.
52
+ """
53
+ def __init__(self):
54
+ logger.info("Initializing GamblingFilter")
55
+ self._platform_names = {
56
+ 'agustoto', 'aero', 'aero88', 'dora', 'dora77', 'dewadora', 'pulau777', 'pulau', '777',
57
+ 'jptogel', 'mandalika', 'cnd88', 'axl', 'berkah99', 'weton88', 'garuda', 'hoki'
58
+ }
59
+ self._gambling_terms = {
60
+ 'jackpot', 'jp', 'wd', 'depo', 'cuan', 'gacor', 'gacir', 'jekpot', 'sultan',
61
+ 'rezeki nomplok', 'rezeki', 'menang', 'nomplok', 'deposit', 'withdraw', 'maxwin',
62
+ 'auto sultan', 'jepe', 'jepee', 'bikin nagih', 'berkah'
63
+ }
64
+ self._ambiguous_terms = {
65
+ 'auto', 'main', 'bermain', 'hasil', 'dapat', 'dapet', 'berkat'
66
+ }
67
+ self._safe_indicators = {
68
+ 'tidak mengandung', 'bukan perjudian', 'tanpa perjudian',
69
+ 'dokumentasi', 'profesional', 'pembelajaran'
70
+ }
71
+ self._gambling_contexts = [
72
+ r'(main|bermain|coba).{1,30}(dapat|dapet|pro|jadi|langsung|menang|jp|cuan)',
73
+ r'(modal|depo).{1,30}(jadi|langsung|wd|cuan)',
74
+ r'(jp|jackpot|jekpot).{1,30}(gede|besar|pecah)',
75
+ r'(berkat|dari).{1,30}(rezeki|menang|cuan|sultan)',
76
+ r'(gacor|gacir).{1,30}(terus|parah|tiap|hari)',
77
+ r'(rezeki|cuan).{1,30}(nomplok|datang|mengalir|lancar)',
78
+ r'(hari ini).{1,30}(menang|cuan|rezeki|berkat)',
79
+ r'(malah|eh).{1,30}(jadi|dapat|dapet|rezeki)',
80
+ r'(auto).{1,30}(sultan|cuan|rezeki|kaya)',
81
+ r'(0\d:[0-5]\d).{1,30}(menang|rezeki|cuan|gacor)',
82
+ r'(iseng|coba).{1,30}(malah|jadi|eh|pro)',
83
+ r'(deposit|depo|wd).{1,30}(jadi|langsung)',
84
+ r'(langsung|auto).{1,30}(jp|cuan|sultan|rezeki)',
85
+ r'bikin\s+nagih',
86
+ r'gak\s+ada\s+duanya',
87
+ r'berkah.{0,20}rezeki',
88
+ r'puji\s+syukur'
89
+ ]
90
+ self._compiled_gambling_contexts = [
91
+ re.compile(pattern, re.IGNORECASE | re.DOTALL)
92
+ for pattern in self._gambling_contexts
93
+ ]
94
+ self._update_platform_pattern()
95
+ self._number_pattern = re.compile(r'(88|777|77|99|7+)')
96
+
97
+ def _update_platform_pattern(self):
98
+ """Recompile the platform name regex based on current _platform_names."""
99
+ platform_patterns = []
100
+ for platform in self._platform_names:
101
+ chars = list(platform)
102
+ segments = [
103
+ f'[{c.upper()}{c.lower()}][^a-zA-Z0-9]{{0,3}}'
104
+ for c in chars[:-1]
105
+ ]
106
+ segments.append(f'[{chars[-1].upper()}{chars[-1].lower()}]')
107
+ strict = ''.join(segments)
108
+ platform_patterns.append(strict)
109
+ self._platform_pattern = re.compile('|'.join(platform_patterns), re.DOTALL)
110
+
111
+ def add_rule(self, rule_type: str, rule_value: str):
112
+ """
113
+ Add a new rule based on the rule type.
114
+ Allowed types: 'platform', 'gambling_term', 'safe_indicator', 'gambling_context', 'ambiguous_term'
115
+ """
116
+ rule_type = rule_type.lower()
117
+ if rule_type == 'platform':
118
+ self._platform_names.add(rule_value)
119
+ self._update_platform_pattern()
120
+ elif rule_type == 'gambling_term':
121
+ self._gambling_terms.add(rule_value)
122
+ elif rule_type == 'safe_indicator':
123
+ self._safe_indicators.add(rule_value)
124
+ elif rule_type == 'gambling_context':
125
+ self._gambling_contexts.append(rule_value)
126
+ self._compiled_gambling_contexts.append(re.compile(rule_value, re.IGNORECASE | re.DOTALL))
127
+ elif rule_type == 'ambiguous_term':
128
+ self._ambiguous_terms.add(rule_value)
129
+ else:
130
+ raise ValueError("Unsupported rule type")
131
+
132
+ def _strip_all_formatting(self, text: str) -> str:
133
+ return ''.join(c.lower() for c in text if c.isalnum() or c.isspace())
134
+
135
+ def _robust_normalize(self, text: str) -> str:
136
+ # Step 1: custom mapping for visually similar characters
137
+ mapped_text = ''.join(VISUAL_MAP.get(ch, ch) for ch in text)
138
+ # Step 2: Unicode normalization + unidecode
139
+ decomposed = unicodedata.normalize('NFKD', mapped_text)
140
+ ascii_equiv = unidecode.unidecode(decomposed)
141
+ return ascii_equiv.lower()
142
+
143
+ def _extract_platform_names(self, text: str) -> list:
144
+ matches = []
145
+ pattern_matches = self._platform_pattern.findall(text)
146
+ if pattern_matches:
147
+ pattern_matches = [m for sublist in pattern_matches for m in sublist if m]
148
+ matches.extend(pattern_matches)
149
+ normalized = self._robust_normalize(text)
150
+ stripped = self._strip_all_formatting(text)
151
+ for platform in self._platform_names:
152
+ if platform in normalized or platform in stripped:
153
+ if not any(platform in m.lower() for m in matches):
154
+ matches.append(platform)
155
+ if '88' in text or '88' in normalized:
156
+ if not any('88' in m for m in matches):
157
+ matches.append('88')
158
+ if '777' in text or '777' in normalized:
159
+ if not any('777' in m for m in matches):
160
+ matches.append('777')
161
+ return matches
162
+
163
+ def normalize_text(self, text: str) -> str:
164
+ normalized = unicodedata.normalize('NFKD', text)
165
+ normalized = ''.join(c for c in normalized if ord(c) < 128 or c.isspace())
166
+ return normalized.lower()
167
+
168
+ def is_gambling_comment(self, text: str, threshold: float = 0.55) -> tuple:
169
+ start_time = time.time()
170
+ logger.info(f"Analyzing comment for gambling content: {text[:100]}...")
171
+ metrics = {
172
+ 'platform_matches': [],
173
+ 'gambling_term_matches': [],
174
+ 'context_matches': [],
175
+ 'safe_indicators': [],
176
+ 'has_numbers': False,
177
+ 'confidence_score': 0.0,
178
+ 'processing_time_ms': 0
179
+ }
180
+ normalized_text = self.normalize_text(text)
181
+ stripped_text = self._strip_all_formatting(text)
182
+ aggressive_text = self._robust_normalize(text)
183
+ for indicator in self._safe_indicators:
184
+ if indicator in normalized_text:
185
+ metrics['safe_indicators'].append(indicator)
186
+ if metrics['safe_indicators']:
187
+ metrics['confidence_score'] = 0.0
188
+ metrics['processing_time_ms'] = (time.time() - start_time) * 1000
189
+ return False, metrics
190
+ platform_matches = self._extract_platform_names(text)
191
+ if platform_matches:
192
+ metrics['platform_matches'] = platform_matches
193
+ for term in self._gambling_terms:
194
+ if term in normalized_text or term in stripped_text or term in aggressive_text:
195
+ metrics['gambling_term_matches'].append(term)
196
+ if self._number_pattern.search(normalized_text):
197
+ metrics['has_numbers'] = True
198
+ for pattern in self._compiled_gambling_contexts:
199
+ match = pattern.search(normalized_text)
200
+ if match:
201
+ metrics['context_matches'].append(match.group(0))
202
+ match = pattern.search(aggressive_text)
203
+ if match and match.group(0) not in metrics['context_matches']:
204
+ metrics['context_matches'].append(match.group(0))
205
+ platform_score = min(len(metrics['platform_matches']) * 1.0, 1)
206
+ term_score = min(len(metrics['gambling_term_matches']) * 0.2, 0.4)
207
+ context_score = min(len(metrics['context_matches']) * 0.2, 0.4)
208
+ number_score = 0.1 if metrics['has_numbers'] else 0
209
+ if platform_score > 0 and (term_score > 0 or context_score > 0):
210
+ total_score = platform_score + term_score + context_score + number_score
211
+ elif context_score > 0.2 and term_score > 0:
212
+ total_score = context_score + term_score + number_score
213
+ else:
214
+ total_score = max(platform_score, term_score, context_score) * 0.8
215
+ metrics['confidence_score'] = min(total_score, 1.0)
216
+ if ("berkah" in normalized_text or "berkah" in aggressive_text) and \
217
+ ("rezeki" in normalized_text or "rezeki" in aggressive_text) and \
218
+ metrics['platform_matches']:
219
+ metrics['confidence_score'] = max(metrics['confidence_score'], 0.7)
220
+ if "Special case: berkah+rezeki+platform" not in metrics['context_matches']:
221
+ metrics['context_matches'].append("Special case: berkah+rezeki+platform")
222
+ elif ("puji" in normalized_text or "puji" in aggressive_text) and \
223
+ ("syukur" in normalized_text or "syukur" in aggressive_text) and \
224
+ metrics['platform_matches']:
225
+ metrics['confidence_score'] = max(metrics['confidence_score'], 0.7)
226
+ if "Special case: puji+syukur+platform" not in metrics['context_matches']:
227
+ metrics['context_matches'].append("Special case: puji+syukur+platform")
228
+ metrics['processing_time_ms'] = (time.time() - start_time) * 1000
229
+ is_gambling = metrics['confidence_score'] >= threshold
230
+ return is_gambling, metrics
231
+
232
+ def filter_comments(self, comments: list, threshold: float = 0.55) -> dict:
233
+ result = {
234
+ 'gambling_comments': [],
235
+ 'safe_comments': [],
236
+ 'metrics': []
237
+ }
238
+ for comment in comments:
239
+ is_gambling, metrics = self.is_gambling_comment(comment, threshold)
240
+ if is_gambling:
241
+ result['gambling_comments'].append(comment)
242
+ else:
243
+ result['safe_comments'].append(comment)
244
+ metrics["original_text"] = comment
245
+ result["metrics"].append(metrics)
246
+ return result
247
+
248
+
249
+ # --- FastAPI application setup ---
250
+ app = FastAPI()
251
+
252
+ templates = Jinja2Templates(directory="templates")
253
+
254
+ # Create a single instance of the GamblingFilter
255
+ filter_instance = GamblingFilter()
256
+
257
+ from jinja2 import Undefined
258
+ def pretty_json(value):
259
+ if isinstance(value, Undefined):
260
+ return ""
261
+ return json.dumps(value, ensure_ascii=False, indent=2)
262
+ templates.env.filters["pretty_json"] = pretty_json
263
+
264
+ @app.get("/", response_class=HTMLResponse)
265
+ async def read_root(request: Request):
266
+ return templates.TemplateResponse("index.html", {
267
+ "request": request,
268
+ "result": None,
269
+ "comment": "",
270
+ "rules": {
271
+ "platform": sorted(list(filter_instance._platform_names)),
272
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
273
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
274
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
275
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
276
+ }
277
+ })
278
+
279
+
280
+
281
+ @app.get("/classify", response_class=HTMLResponse)
282
+ async def read_root(request: Request):
283
+ return templates.TemplateResponse("index.html", {
284
+ "request": request,
285
+ "result": None,
286
+ "comment": "",
287
+ "rules": {
288
+ "platform": sorted(list(filter_instance._platform_names)),
289
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
290
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
291
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
292
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
293
+ }
294
+ })
295
+
296
+ @app.post("/classify", response_class=HTMLResponse)
297
+ async def classify_comment(request: Request, comment: str = Form(...)):
298
+ is_gambling, metrics = filter_instance.is_gambling_comment(comment)
299
+ result = {"is_gambling": is_gambling, "metrics": metrics}
300
+ print(result['metrics'])
301
+ return templates.TemplateResponse("index.html", {
302
+ "request": request,
303
+ "result": result,
304
+ "comment": comment,
305
+ "rules": {
306
+ "platform": sorted(list(filter_instance._platform_names)),
307
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
308
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
309
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
310
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
311
+ }
312
+ })
313
+
314
+ @app.post("/add_rule", response_class=HTMLResponse)
315
+ async def add_rule(request: Request, rule_type: str = Form(...), rule_value: str = Form(...)):
316
+ try:
317
+ filter_instance.add_rule(rule_type, rule_value)
318
+ message = f"Added rule '{rule_value}' as type '{rule_type}'."
319
+ except ValueError as e:
320
+ message = str(e)
321
+ return templates.TemplateResponse("index.html", {
322
+ "request": request,
323
+ "result": {"message": message},
324
+ "comment": "",
325
+ "rules": {
326
+ "platform": sorted(list(filter_instance._platform_names)),
327
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
328
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
329
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
330
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
331
+ }
332
+ })
333
+
334
+ @app.post("/upload", response_class=HTMLResponse)
335
+ async def upload_file(request: Request, file: UploadFile = File(...), column: str = Form("comment")):
336
+ content = await file.read()
337
+ try:
338
+ if file.filename.endswith('.csv'):
339
+ df = pd.read_csv(io.BytesIO(content))
340
+ elif file.filename.endswith(('.xls', '.xlsx')):
341
+ df = pd.read_excel(io.BytesIO(content))
342
+ else:
343
+ raise ValueError("Unsupported file type.")
344
+ except Exception as e:
345
+ return templates.TemplateResponse("index.html", {
346
+ "request": request,
347
+ "result": {"message": f"Error reading file: {e}"},
348
+ "comment": "",
349
+ "rules": {
350
+ "platform": sorted(list(filter_instance._platform_names)),
351
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
352
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
353
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
354
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
355
+ }
356
+ })
357
+ if column not in df.columns:
358
+ return templates.TemplateResponse("index.html", {
359
+ "request": request,
360
+ "result": {"message": f"Column '{column}' not found. Available columns: {list(df.columns)}"},
361
+ "comment": "",
362
+ "rules": {
363
+ "platform": sorted(list(filter_instance._platform_names)),
364
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
365
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
366
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
367
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
368
+ }
369
+ })
370
+ comments = df[column].astype(str).tolist()
371
+ results = filter_instance.filter_comments(comments)
372
+ return templates.TemplateResponse("index.html", {
373
+ "request": request,
374
+ "result": {"upload_result": results},
375
+ "comment": "",
376
+ "rules": {
377
+ "platform": sorted(list(filter_instance._platform_names)),
378
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
379
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
380
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
381
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
382
+ }
383
+ })
384
+
385
+ @app.post("/add_visual_char")
386
+ async def add_visual_char(request: Request, char: str = Form(...), ascii_equiv: str = Form(...)):
387
+ VISUAL_MAP[char] = ascii_equiv
388
+ message = f"Added visual map entry '{char}' -> '{ascii_equiv}'."
389
+ return templates.TemplateResponse("index.html", {
390
+ "request": request,
391
+ "result": {"message": message},
392
+ "comment": "",
393
+ "rules": {
394
+ "platform": sorted(list(filter_instance._platform_names)),
395
+ "gambling_term": sorted(list(filter_instance._gambling_terms)),
396
+ "safe_indicator": sorted(list(filter_instance._safe_indicators)),
397
+ "gambling_context": sorted(list(filter_instance._gambling_contexts)),
398
+ "ambiguous_term": sorted(list(filter_instance._ambiguous_terms))
399
+ }
400
+ })
401
+
402
+ if __name__ == "__main__":
403
+ import uvicorn
404
+ uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
requirements.txt ADDED
Binary file (996 Bytes). View file
 
templates/index.html ADDED
@@ -0,0 +1,519 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Gambling Comment Filter</title>
7
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
9
+ <style>
10
+ :root {
11
+ --primary: #4361ee;
12
+ --secondary: #3a0ca3;
13
+ --light: #f8f9fa;
14
+ --dark: #212529;
15
+ --success: #2dc653;
16
+ --danger: #e63946;
17
+ --warning: #ff9f1c;
18
+ --info: #90e0ef;
19
+ --border-radius: 0.5rem;
20
+ --box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
21
+ --transition: all 0.3s ease;
22
+ }
23
+
24
+ body {
25
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
26
+ background-color: #f5f7fa;
27
+ color: #333;
28
+ line-height: 1.6;
29
+ padding: 0;
30
+ margin: 0;
31
+ }
32
+
33
+ .navbar {
34
+ background-color: var(--primary);
35
+ box-shadow: var(--box-shadow);
36
+ padding: 1rem 0;
37
+ }
38
+
39
+ .navbar-brand {
40
+ color: white;
41
+ font-weight: 700;
42
+ font-size: 1.5rem;
43
+ padding-left: 1rem;
44
+ }
45
+
46
+ .app-container {
47
+ max-width: 1200px;
48
+ margin: 2rem auto;
49
+ padding: 0 1rem;
50
+ }
51
+
52
+ .card {
53
+ border: none;
54
+ border-radius: var(--border-radius);
55
+ box-shadow: var(--box-shadow);
56
+ margin-bottom: 2rem;
57
+ overflow: hidden;
58
+ transition: var(--transition);
59
+ }
60
+
61
+ .card:hover {
62
+ box-shadow: 0 10px 15px rgba(0, 0, 0, 0.1);
63
+ }
64
+
65
+ .card-header {
66
+ background-color: white;
67
+ border-bottom: 1px solid rgba(0, 0, 0, 0.1);
68
+ font-weight: 600;
69
+ padding: 1.25rem 1.5rem;
70
+ }
71
+
72
+ .card-body {
73
+ padding: 1.5rem;
74
+ }
75
+
76
+ .form-label {
77
+ font-weight: 500;
78
+ margin-bottom: 0.5rem;
79
+ color: #495057;
80
+ }
81
+
82
+ .form-control {
83
+ border-radius: var(--border-radius);
84
+ padding: 0.75rem 1rem;
85
+ border: 1px solid #ced4da;
86
+ transition: var(--transition);
87
+ }
88
+
89
+ .form-control:focus {
90
+ border-color: var(--primary);
91
+ box-shadow: 0 0 0 0.2rem rgba(67, 97, 238, 0.25);
92
+ }
93
+
94
+ .btn {
95
+ border-radius: var(--border-radius);
96
+ padding: 0.75rem 1.5rem;
97
+ font-weight: 500;
98
+ transition: var(--transition);
99
+ }
100
+
101
+ .btn-primary {
102
+ background-color: var(--primary);
103
+ border-color: var(--primary);
104
+ }
105
+
106
+ .btn-primary:hover {
107
+ background-color: var(--secondary);
108
+ border-color: var(--secondary);
109
+ }
110
+
111
+ .result {
112
+ background-color: #f8f9fa;
113
+ border-radius: var(--border-radius);
114
+ padding: 1.5rem;
115
+ margin-bottom: 2rem;
116
+ }
117
+
118
+ .result-header {
119
+ display: flex;
120
+ align-items: center;
121
+ margin-bottom: 1rem;
122
+ }
123
+
124
+ .result-icon {
125
+ font-size: 1.5rem;
126
+ margin-right: 0.75rem;
127
+ }
128
+
129
+ .result-title {
130
+ font-size: 1.25rem;
131
+ font-weight: 600;
132
+ margin: 0;
133
+ }
134
+
135
+ .badge {
136
+ font-size: 0.85rem;
137
+ padding: 0.5rem 0.75rem;
138
+ border-radius: 50px;
139
+ font-weight: 500;
140
+ }
141
+
142
+ .rules-container {
143
+ background-color: white;
144
+ border-radius: var(--border-radius);
145
+ box-shadow: var(--box-shadow);
146
+ padding: 1.5rem;
147
+ }
148
+
149
+ .rules-category {
150
+ margin-bottom: 2rem;
151
+ }
152
+
153
+ .rules-category h3 {
154
+ font-size: 1.1rem;
155
+ font-weight: 600;
156
+ margin-bottom: 1rem;
157
+ padding-bottom: 0.5rem;
158
+ border-bottom: 2px solid var(--primary);
159
+ color: var(--primary);
160
+ }
161
+
162
+ .rules-list {
163
+ list-style-type: none;
164
+ padding: 0;
165
+ margin: 0;
166
+ max-height: 200px;
167
+ overflow-y: auto;
168
+ }
169
+
170
+ .rules-list li {
171
+ padding: 0.5rem 0.75rem;
172
+ background-color: #f8f9fa;
173
+ margin-bottom: 0.5rem;
174
+ border-radius: var(--border-radius);
175
+ font-size: 0.9rem;
176
+ display: flex;
177
+ align-items: center;
178
+ }
179
+
180
+ .rules-list li:before {
181
+ content: "•";
182
+ color: var(--primary);
183
+ font-weight: bold;
184
+ margin-right: 0.5rem;
185
+ }
186
+
187
+ pre {
188
+ background-color: #f8f9fa;
189
+ padding: 1rem;
190
+ border-radius: var(--border-radius);
191
+ white-space: pre-wrap;
192
+ }
193
+
194
+ .tabs {
195
+ display: flex;
196
+ background-color: white;
197
+ border-radius: var(--border-radius);
198
+ box-shadow: var(--box-shadow);
199
+ margin-bottom: 2rem;
200
+ overflow: hidden;
201
+ }
202
+
203
+ .tab {
204
+ flex: 1;
205
+ text-align: center;
206
+ padding: 1rem;
207
+ cursor: pointer;
208
+ transition: var(--transition);
209
+ border-bottom: 3px solid transparent;
210
+ font-weight: 500;
211
+ }
212
+
213
+ .tab.active {
214
+ background-color: white;
215
+ color: var(--primary);
216
+ border-bottom: 3px solid var(--primary);
217
+ }
218
+
219
+ .tab-icon {
220
+ margin-right: 0.5rem;
221
+ }
222
+
223
+ .tab-content {
224
+ display: none;
225
+ }
226
+
227
+ .tab-content.active {
228
+ display: block;
229
+ }
230
+
231
+ .footer {
232
+ text-align: center;
233
+ padding: 2rem 0;
234
+ margin-top: 2rem;
235
+ background-color: white;
236
+ border-top: 1px solid rgba(0, 0, 0, 0.1);
237
+ }
238
+ </style>
239
+ </head>
240
+ <body>
241
+ <!-- Navbar -->
242
+ <nav class="navbar navbar-dark">
243
+ <div class="container">
244
+ <span class="navbar-brand">
245
+ <i class="fas fa-shield-alt me-2"></i>
246
+ Gambling Comment Filter
247
+ </span>
248
+ </div>
249
+ </nav>
250
+
251
+ <div class="app-container">
252
+ <!-- Results Section (if available) -->
253
+ {% if result %}
254
+ <div class="card mb-4">
255
+ <div class="card-header d-flex align-items-center">
256
+ <i class="fas fa-chart-bar me-2"></i>
257
+ Analysis Results
258
+ </div>
259
+ <div class="card-body">
260
+ {% if result.message %}
261
+ <div class="alert alert-success">
262
+ <i class="fas fa-check-circle me-2"></i>
263
+ {{ result.message }}
264
+ </div>
265
+ {% elif result.upload_result %}
266
+ <div class="result">
267
+ <div class="result-header">
268
+ <div class="result-icon text-primary">
269
+ <i class="fas fa-file-alt"></i>
270
+ </div>
271
+ <h5 class="result-title">File Upload Results</h5>
272
+ </div>
273
+ <div class="row mb-3">
274
+ <div class="col-md-6">
275
+ <div class="card bg-light">
276
+ <div class="card-body text-center">
277
+ <h3 class="text-danger mb-1">{{ result.upload_result.gambling_comments | length }}</h3>
278
+ <p class="mb-0">Gambling Comments Found</p>
279
+ </div>
280
+ </div>
281
+ </div>
282
+ <div class="col-md-6">
283
+ <div class="card bg-light">
284
+ <div class="card-body text-center">
285
+ <h3 class="text-success mb-1">{{ result.upload_result.safe_comments | length }}</h3>
286
+ <p class="mb-0">Safe Comments Found</p>
287
+ </div>
288
+ </div>
289
+ </div>
290
+ </div>
291
+ <details>
292
+ <summary class="mb-2 btn btn-sm btn-outline-secondary">View Detailed Results</summary>
293
+ <pre>{{ result.upload_result | pretty_json | safe }}</pre>
294
+ </details>
295
+ </div>
296
+ {% else %}
297
+ <div class="result">
298
+ <div class="result-header">
299
+ <div class="result-icon {% if result.is_gambling %}text-danger{% else %}text-success{% endif %}">
300
+ <i class="fas {% if result.is_gambling %}fa-exclamation-triangle{% else %}fa-check-circle{% endif %}"></i>
301
+ </div>
302
+ <h5 class="result-title">Classification Result</h5>
303
+ </div>
304
+ <div class="mb-3">
305
+ <span class="badge {% if result.is_gambling %}bg-danger{% else %}bg-success{% endif %} me-2">
306
+ <i class="fas {% if result.is_gambling %}fa-times{% else %}fa-check{% endif %} me-1"></i>
307
+ {{ "Gambling Comment" if result.is_gambling else "Safe Comment" }}
308
+ </span>
309
+ <span class="badge bg-info text-dark">
310
+ <i class="fas fa-chart-line me-1"></i>
311
+ Confidence: {{ result.metrics.confidence_score }}
312
+ </span>
313
+ </div>
314
+ <details>
315
+ <summary class="mb-2 btn btn-sm btn-outline-secondary">View Analysis Details</summary>
316
+ <pre>{{ result.metrics| pretty_json | safe }}</pre>
317
+ </details>
318
+ </div>
319
+ {% endif %}
320
+ </div>
321
+ </div>
322
+ {% endif %}
323
+
324
+ <!-- Tabs for Different Functions -->
325
+ <div class="tabs">
326
+ <div class="tab active" data-tab="classify">
327
+ <i class="fas fa-search tab-icon"></i>
328
+ Classify Comment
329
+ </div>
330
+ <div class="tab" data-tab="upload">
331
+ <i class="fas fa-upload tab-icon"></i>
332
+ Batch Upload
333
+ </div>
334
+ <div class="tab" data-tab="rules">
335
+ <i class="fas fa-cogs tab-icon"></i>
336
+ Manage Rules
337
+ </div>
338
+ </div>
339
+
340
+ <!-- Tab Contents -->
341
+ <div class="tab-content active" id="classify-tab">
342
+ <div class="card">
343
+ <div class="card-header">
344
+ <i class="fas fa-search me-2"></i>
345
+ Classify Single Comment
346
+ </div>
347
+ <div class="card-body">
348
+ <form action="/classify" method="post">
349
+ <div class="form-group mb-3">
350
+ <label for="comment" class="form-label">Enter your comment:</label>
351
+ <textarea class="form-control" name="comment" id="comment" rows="4" placeholder="Type or paste the comment here...">{{ comment }}</textarea>
352
+ </div>
353
+ <button type="submit" class="btn btn-primary">
354
+ <i class="fas fa-check-circle me-2"></i>
355
+ Analyze Comment
356
+ </button>
357
+ </form>
358
+ </div>
359
+ </div>
360
+ </div>
361
+
362
+ <div class="tab-content" id="upload-tab">
363
+ <div class="card">
364
+ <div class="card-header">
365
+ <i class="fas fa-upload me-2"></i>
366
+ Batch Upload &amp; Process
367
+ </div>
368
+ <div class="card-body">
369
+ <form action="/upload" method="post" enctype="multipart/form-data">
370
+ <div class="form-group mb-3">
371
+ <label for="file" class="form-label">Select File for Analysis:</label>
372
+ <input type="file" class="form-control" name="file" id="file">
373
+ <small class="form-text text-muted">Supported formats: CSV, JSON, Excel</small>
374
+ </div>
375
+ <div class="form-group mb-3">
376
+ <label for="column" class="form-label">Column Name:</label>
377
+ <input type="text" class="form-control" name="column" id="column" value="comment" placeholder="Column containing comments">
378
+ <small class="form-text text-muted">Default is "comment"</small>
379
+ </div>
380
+ <button type="submit" class="btn btn-primary">
381
+ <i class="fas fa-file-import me-2"></i>
382
+ Upload &amp; Process File
383
+ </button>
384
+ </form>
385
+ </div>
386
+ </div>
387
+ </div>
388
+
389
+ <div class="tab-content" id="rules-tab">
390
+ <div class="card mb-4">
391
+ <div class="card-header">
392
+ <i class="fas fa-plus-circle me-2"></i>
393
+ Add New Rule
394
+ </div>
395
+ <div class="card-body">
396
+ <form action="/add_rule" method="post">
397
+ <div class="row">
398
+ <div class="col-md-6">
399
+ <div class="form-group mb-3">
400
+ <label for="rule_type" class="form-label">Rule Type:</label>
401
+ <select name="rule_type" id="rule_type" class="form-control">
402
+ <option value="platform">Platform Name</option>
403
+ <option value="gambling_term">Gambling Term</option>
404
+ <option value="safe_indicator">Safe Indicator</option>
405
+ <option value="gambling_context">Gambling Context</option>
406
+ <option value="ambiguous_term">Ambiguous Term</option>
407
+ </select>
408
+ </div>
409
+ </div>
410
+ <div class="col-md-6">
411
+ <div class="form-group mb-3">
412
+ <label for="rule_value" class="form-label">Rule Value:</label>
413
+ <input type="text" class="form-control" name="rule_value" id="rule_value" placeholder="Enter new rule value">
414
+ </div>
415
+ </div>
416
+ </div>
417
+ <button type="submit" class="btn btn-primary">
418
+ <i class="fas fa-plus me-2"></i>
419
+ Add Rule
420
+ </button>
421
+ </form>
422
+ </div>
423
+ </div>
424
+
425
+ <div class="rules-container">
426
+ <h2 class="mb-4">Current Rules</h2>
427
+ <div class="row">
428
+ <div class="col-md-4">
429
+ <div class="rules-category">
430
+ <h3><i class="fas fa-dice me-2"></i>Platform Names</h3>
431
+ <ul class="rules-list">
432
+ {% for rule in rules.platform %}
433
+ <li>{{ rule }}</li>
434
+ {% endfor %}
435
+ </ul>
436
+ </div>
437
+ </div>
438
+ <div class="col-md-4">
439
+ <div class="rules-category">
440
+ <h3><i class="fas fa-coins me-2"></i>Gambling Terms</h3>
441
+ <ul class="rules-list">
442
+ {% for rule in rules.gambling_term %}
443
+ <li>{{ rule }}</li>
444
+ {% endfor %}
445
+ </ul>
446
+ </div>
447
+ </div>
448
+ <div class="col-md-4">
449
+ <div class="rules-category">
450
+ <h3><i class="fas fa-shield-alt me-2"></i>Safe Indicators</h3>
451
+ <ul class="rules-list">
452
+ {% for rule in rules.safe_indicator %}
453
+ <li>{{ rule }}</li>
454
+ {% endfor %}
455
+ </ul>
456
+ </div>
457
+ </div>
458
+ </div>
459
+ <div class="row">
460
+ <div class="col-md-6">
461
+ <div class="rules-category">
462
+ <h3><i class="fas fa-comment-alt me-2"></i>Gambling Contexts</h3>
463
+ <ul class="rules-list">
464
+ {% for rule in rules.gambling_context %}
465
+ <li>{{ rule }}</li>
466
+ {% endfor %}
467
+ </ul>
468
+ </div>
469
+ </div>
470
+ <div class="col-md-6">
471
+ <div class="rules-category">
472
+ <h3><i class="fas fa-question-circle me-2"></i>Ambiguous Terms</h3>
473
+ <ul class="rules-list">
474
+ {% for rule in rules.ambiguous_term %}
475
+ <li>{{ rule }}</li>
476
+ {% endfor %}
477
+ </ul>
478
+ </div>
479
+ </div>
480
+ </div>
481
+ </div>
482
+ </div>
483
+ </div>
484
+
485
+ <footer class="footer">
486
+ <div class="container">
487
+ <p class="mb-0">Gambling Comment Filter &copy; 2025 | All Rights Reserved</p>
488
+ </div>
489
+ </footer>
490
+
491
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
492
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
493
+ <script>
494
+ // Tab functionality
495
+ document.addEventListener('DOMContentLoaded', function() {
496
+ const tabs = document.querySelectorAll('.tab');
497
+
498
+ tabs.forEach(tab => {
499
+ tab.addEventListener('click', function() {
500
+ // Remove active class from all tabs
501
+ tabs.forEach(t => t.classList.remove('active'));
502
+
503
+ // Add active class to clicked tab
504
+ this.classList.add('active');
505
+
506
+ // Hide all tab contents
507
+ document.querySelectorAll('.tab-content').forEach(content => {
508
+ content.classList.remove('active');
509
+ });
510
+
511
+ // Show the corresponding tab content
512
+ const tabId = this.getAttribute('data-tab') + '-tab';
513
+ document.getElementById(tabId).classList.add('active');
514
+ });
515
+ });
516
+ });
517
+ </script>
518
+ </body>
519
+ </html>