xuandin commited on
Commit
a486265
·
verified ·
1 Parent(s): 6fc23f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py CHANGED
@@ -18,6 +18,57 @@ os.environ["MKL_NUM_THREADS"] = str(psutil.cpu_count(logical=False))
18
  # Set device globally
19
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Set page configuration
22
  st.set_page_config(
23
  page_title="SemViQA - Hệ thống Kiểm chứng Thông tin Tiếng Việt",
 
18
  # Set device globally
19
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20
 
21
+ @st.cache_data
22
+ def preprocess_text(text):
23
+ # Add any text cleaning or normalization here
24
+ return text.strip()
25
+
26
+ # Optimized function for evidence extraction and classification with better CPU performance
27
+ def perform_verification(claim, context, model_qatc, tokenizer_qatc, model_tc, tokenizer_tc,
28
+ model_bc, tokenizer_bc, tfidf_threshold, length_ratio_threshold):
29
+ # Extract evidence
30
+ evidence_start_time = time.time()
31
+ evidence = extract_evidence_tfidf_qatc(
32
+ claim, context, model_qatc, tokenizer_qatc,
33
+ DEVICE,
34
+ confidence_threshold=tfidf_threshold,
35
+ length_ratio_threshold=length_ratio_threshold
36
+ )
37
+ evidence_time = time.time() - evidence_start_time
38
+
39
+ # Explicit garbage collection after evidence extraction
40
+ gc.collect()
41
+
42
+ # Classify the claim
43
+ verdict_start_time = time.time()
44
+ with torch.no_grad():
45
+ verdict = "NEI"
46
+ prob3class, pred_tc = classify_claim(
47
+ claim, evidence, model_tc, tokenizer_tc, DEVICE
48
+ )
49
+
50
+ # Only run binary classifier if needed
51
+ prob2class, pred_bc = 0, 0
52
+ if pred_tc != 0:
53
+ prob2class, pred_bc = classify_claim(
54
+ claim, evidence, model_bc, tokenizer_bc, DEVICE
55
+ )
56
+ verdict = "SUPPORTED" if pred_bc == 0 else "REFUTED" if prob2class > prob3class else ["NEI", "SUPPORTED", "REFUTED"][pred_tc]
57
+
58
+ verdict_time = time.time() - verdict_start_time
59
+
60
+ return {
61
+ "evidence": evidence,
62
+ "verdict": verdict,
63
+ "evidence_time": evidence_time,
64
+ "verdict_time": verdict_time,
65
+ "prob3class": prob3class.item() if isinstance(prob3class, torch.Tensor) else prob3class,
66
+ "pred_tc": pred_tc,
67
+ "prob2class": prob2class.item() if isinstance(prob2class, torch.Tensor) else prob2class,
68
+ "pred_bc": pred_bc
69
+ }
70
+
71
+
72
  # Set page configuration
73
  st.set_page_config(
74
  page_title="SemViQA - Hệ thống Kiểm chứng Thông tin Tiếng Việt",