Spaces:

SemViQA
/

semviqa-demo

Sleeping

App Files Files Community

xuandin commited on Mar 16

Commit

7725101

verified ·

1 Parent(s): 99fbeb9

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -21

app.py CHANGED Viewed

@@ -7,8 +7,18 @@ from semviqa.ser.ser_eval import extract_evidence_tfidf_qatc
 from semviqa.tvc.tvc_eval import classify_claim
 import time
 import pandas as pd
-# Load models with caching and optimization
 @st.cache_resource()
 def load_model(model_name, model_class, is_bc=False, device=None):
     if device is None:
@@ -17,13 +27,20 @@ def load_model(model_name, model_class, is_bc=False, device=None):
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = model_class.from_pretrained(model_name, num_labels=3 if not is_bc else 2)
     model.eval()
-    model.to(device)
-    # Enable CUDA optimizations if available
-    if device == "cuda":
-        if hasattr(model, 'half') and not model_name.startswith("SemViQA/bc-erniem") and not model_name.startswith("SemViQA/tc-erniem"):
-            model = model.half()  # Use FP16 for most models (except ERNIE which might not support it)
     return tokenizer, model
 # Set device globally
@@ -35,28 +52,52 @@ def preprocess_text(text):
     # Add any text cleaning or normalization here
     return text.strip()
-# Optimized function for evidence extraction and classification
-def perform_verification(claim, context, model_qatc, tokenizer_qatc, model_tc, tokenizer_tc,
-                          model_bc, tokenizer_bc, tfidf_threshold, length_ratio_threshold):
     with torch.no_grad():
-        # Extract evidence
-        evidence_start_time = time.time()
         evidence = extract_evidence_tfidf_qatc(
             claim, context, model_qatc, tokenizer_qatc,
-            DEVICE,
             confidence_threshold=tfidf_threshold,
             length_ratio_threshold=length_ratio_threshold
         )
-        evidence_time = time.time() - evidence_start_time
-        # Classify the claim
-        verdict_start_time = time.time()
         prob3class, pred_tc = classify_claim(
             claim, evidence, model_tc, tokenizer_tc, DEVICE
         )
         # Only run binary classifier if needed
-        prob2class, pred_bc = 0, "Not used"
         if pred_tc != 0:
             prob2class, pred_bc = classify_claim(
                 claim, evidence, model_bc, tokenizer_bc, DEVICE
@@ -65,7 +106,7 @@ def perform_verification(claim, context, model_qatc, tokenizer_qatc, model_tc, t
         else:
             verdict = "NEI"
-        verdict_time = time.time() - verdict_start_time
     return {
         "evidence": evidence,
@@ -222,6 +263,13 @@ with st.container():
             "SemViQA/tc-erniem-isedsc01"
         ])
         show_details = st.checkbox("Show Probability Details", value=False)
     # Store verification history
     if 'history' not in st.session_state:
@@ -238,6 +286,11 @@ with st.container():
             st.session_state.prev_models['tc'] != tc_model_name):
         with st.spinner("Loading models..."):
             tokenizer_qatc, model_qatc = load_model(qatc_model_name, QATCForQuestionAnswering, device=DEVICE)
             tokenizer_bc, model_bc = load_model(bc_model_name, ClaimModelForClassification, is_bc=True, device=DEVICE)
             tokenizer_tc, model_tc = load_model(tc_model_name, ClaimModelForClassification, device=DEVICE)
@@ -286,6 +339,9 @@ with st.container():
                 with st.spinner("Verifying..."):
                     start_time = time.time()
                     # Use the optimized verification function
                     result = perform_verification(
                         preprocessed_claim, preprocessed_context,
@@ -304,7 +360,7 @@ with st.container():
                             3-Class Probability: {result['prob3class'].item():.2f}
                             3-Class Predicted Label: {['NEI', 'SUPPORTED', 'REFUTED'][result['pred_tc']]}
                             2-Class Probability: {result['prob2class'].item():.2f}
-                            2-Class Predicted Label: {['SUPPORTED', 'REFUTED'][result['pred_bc']] if result['pred_tc'] != 0 else 'Not used'}
                         """
                     st.session_state.latest_result = {
@@ -323,9 +379,8 @@ with st.container():
                     # Add new result to history
                     st.session_state.history.append(st.session_state.latest_result)
-                    # Clear GPU cache to free memory
-                    if torch.cuda.is_available():
-                        torch.cuda.empty_cache()
                     # Display the result after verification
                     res = st.session_state.latest_result

 from semviqa.tvc.tvc_eval import classify_claim
 import time
 import pandas as pd
+import os
+import psutil
+import gc
+import threading
+from queue import Queue
+from concurrent.futures import ThreadPoolExecutor
+# Set environment variables to optimize CPU performance
+os.environ["OMP_NUM_THREADS"] = str(psutil.cpu_count(logical=False))
+os.environ["MKL_NUM_THREADS"] = str(psutil.cpu_count(logical=False))
+# Load models with caching and CPU optimization
 @st.cache_resource()
 def load_model(model_name, model_class, is_bc=False, device=None):
     if device is None:
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = model_class.from_pretrained(model_name, num_labels=3 if not is_bc else 2)
     model.eval()
+    # CPU-specific optimizations
+    if device == "cpu":
+        # Use torch's quantization for CPU inference speed boost
+        try:
+            import torch.quantization
+            # Quantize the model to INT8
+            model = torch.quantization.quantize_dynamic(
+                model, {torch.nn.Linear}, dtype=torch.qint8
+            )
+        except Exception as e:
+            st.warning(f"Quantization failed, using default model: {e}")
+    model.to(device)
     return tokenizer, model
 # Set device globally
     # Add any text cleaning or normalization here
     return text.strip()
+# Function to extract evidence in a separate thread for better CPU utilization
+def extract_evidence_threaded(queue, claim, context, model_qatc, tokenizer_qatc, device,
+                             tfidf_threshold, length_ratio_threshold):
+    start_time = time.time()
     with torch.no_grad():
         evidence = extract_evidence_tfidf_qatc(
             claim, context, model_qatc, tokenizer_qatc,
+            device,
             confidence_threshold=tfidf_threshold,
             length_ratio_threshold=length_ratio_threshold
         )
+    evidence_time = time.time() - start_time
+    queue.put((evidence, evidence_time))
+# Function to classify in a separate thread
+def classify_claim_threaded(queue, claim, evidence, model, tokenizer, device):
+    with torch.no_grad():
+        result = classify_claim(claim, evidence, model, tokenizer, device)
+    queue.put(result)
+# Optimized function for evidence extraction and classification with better CPU performance
+def perform_verification(claim, context, model_qatc, tokenizer_qatc, model_tc, tokenizer_tc,
+                          model_bc, tokenizer_bc, tfidf_threshold, length_ratio_threshold):
+    # Use thread for evidence extraction to allow garbage collection in between
+    evidence_queue = Queue()
+    evidence_thread = threading.Thread(
+        target=extract_evidence_threaded,
+        args=(evidence_queue, claim, context, model_qatc, tokenizer_qatc, DEVICE,
+              tfidf_threshold, length_ratio_threshold)
+    )
+    evidence_thread.start()
+    evidence_thread.join()
+    evidence, evidence_time = evidence_queue.get()
+    # Explicit garbage collection after evidence extraction
+    gc.collect()
+    # Classify the claim
+    verdict_start_time = time.time()
+    with torch.no_grad():
         prob3class, pred_tc = classify_claim(
             claim, evidence, model_tc, tokenizer_tc, DEVICE
         )
         # Only run binary classifier if needed
+        prob2class, pred_bc = 0, 0
         if pred_tc != 0:
             prob2class, pred_bc = classify_claim(
                 claim, evidence, model_bc, tokenizer_bc, DEVICE
         else:
             verdict = "NEI"
+    verdict_time = time.time() - verdict_start_time
     return {
         "evidence": evidence,
             "SemViQA/tc-erniem-isedsc01"
         ])
         show_details = st.checkbox("Show Probability Details", value=False)
+        # Add CPU optimization settings
+        st.subheader("CPU Performance Settings")
+        num_threads = st.slider("Number of CPU Threads", 1, psutil.cpu_count(),
+                               psutil.cpu_count(logical=False))
+        os.environ["OMP_NUM_THREADS"] = str(num_threads)
+        os.environ["MKL_NUM_THREADS"] = str(num_threads)
     # Store verification history
     if 'history' not in st.session_state:
             st.session_state.prev_models['tc'] != tc_model_name):
         with st.spinner("Loading models..."):
+            # Clear memory before loading new models
+            gc.collect()
+            if DEVICE == "cpu":
+                torch.set_num_threads(num_threads)
             tokenizer_qatc, model_qatc = load_model(qatc_model_name, QATCForQuestionAnswering, device=DEVICE)
             tokenizer_bc, model_bc = load_model(bc_model_name, ClaimModelForClassification, is_bc=True, device=DEVICE)
             tokenizer_tc, model_tc = load_model(tc_model_name, ClaimModelForClassification, device=DEVICE)
                 with st.spinner("Verifying..."):
                     start_time = time.time()
+                    # Clear memory before verification
+                    gc.collect()
                     # Use the optimized verification function
                     result = perform_verification(
                         preprocessed_claim, preprocessed_context,
                             3-Class Probability: {result['prob3class'].item():.2f}
                             3-Class Predicted Label: {['NEI', 'SUPPORTED', 'REFUTED'][result['pred_tc']]}
                             2-Class Probability: {result['prob2class'].item():.2f}
+                            2-Class Predicted Label: {['SUPPORTED', 'REFUTED'][result['pred_bc']] if isinstance(result['pred_bc'], int) and result['pred_tc'] != 0 else 'Not used'}
                         """
                     st.session_state.latest_result = {
                     # Add new result to history
                     st.session_state.history.append(st.session_state.latest_result)
+                    # Clear memory after processing
+                    gc.collect()
                     # Display the result after verification
                     res = st.session_state.latest_result