Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,57 @@ os.environ["MKL_NUM_THREADS"] = str(psutil.cpu_count(logical=False))
|
|
18 |
# Set device globally
|
19 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
# Set page configuration
|
22 |
st.set_page_config(
|
23 |
page_title="SemViQA - Hệ thống Kiểm chứng Thông tin Tiếng Việt",
|
|
|
18 |
# Set device globally
|
19 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
20 |
|
21 |
+
@st.cache_data
|
22 |
+
def preprocess_text(text):
|
23 |
+
# Add any text cleaning or normalization here
|
24 |
+
return text.strip()
|
25 |
+
|
26 |
+
# Optimized function for evidence extraction and classification with better CPU performance
|
27 |
+
def perform_verification(claim, context, model_qatc, tokenizer_qatc, model_tc, tokenizer_tc,
|
28 |
+
model_bc, tokenizer_bc, tfidf_threshold, length_ratio_threshold):
|
29 |
+
# Extract evidence
|
30 |
+
evidence_start_time = time.time()
|
31 |
+
evidence = extract_evidence_tfidf_qatc(
|
32 |
+
claim, context, model_qatc, tokenizer_qatc,
|
33 |
+
DEVICE,
|
34 |
+
confidence_threshold=tfidf_threshold,
|
35 |
+
length_ratio_threshold=length_ratio_threshold
|
36 |
+
)
|
37 |
+
evidence_time = time.time() - evidence_start_time
|
38 |
+
|
39 |
+
# Explicit garbage collection after evidence extraction
|
40 |
+
gc.collect()
|
41 |
+
|
42 |
+
# Classify the claim
|
43 |
+
verdict_start_time = time.time()
|
44 |
+
with torch.no_grad():
|
45 |
+
verdict = "NEI"
|
46 |
+
prob3class, pred_tc = classify_claim(
|
47 |
+
claim, evidence, model_tc, tokenizer_tc, DEVICE
|
48 |
+
)
|
49 |
+
|
50 |
+
# Only run binary classifier if needed
|
51 |
+
prob2class, pred_bc = 0, 0
|
52 |
+
if pred_tc != 0:
|
53 |
+
prob2class, pred_bc = classify_claim(
|
54 |
+
claim, evidence, model_bc, tokenizer_bc, DEVICE
|
55 |
+
)
|
56 |
+
verdict = "SUPPORTED" if pred_bc == 0 else "REFUTED" if prob2class > prob3class else ["NEI", "SUPPORTED", "REFUTED"][pred_tc]
|
57 |
+
|
58 |
+
verdict_time = time.time() - verdict_start_time
|
59 |
+
|
60 |
+
return {
|
61 |
+
"evidence": evidence,
|
62 |
+
"verdict": verdict,
|
63 |
+
"evidence_time": evidence_time,
|
64 |
+
"verdict_time": verdict_time,
|
65 |
+
"prob3class": prob3class.item() if isinstance(prob3class, torch.Tensor) else prob3class,
|
66 |
+
"pred_tc": pred_tc,
|
67 |
+
"prob2class": prob2class.item() if isinstance(prob2class, torch.Tensor) else prob2class,
|
68 |
+
"pred_bc": pred_bc
|
69 |
+
}
|
70 |
+
|
71 |
+
|
72 |
# Set page configuration
|
73 |
st.set_page_config(
|
74 |
page_title="SemViQA - Hệ thống Kiểm chứng Thông tin Tiếng Việt",
|