Spaces:

Nitin00043
/

HandwrittenMathsProblem

Runtime error

App Files Files Community

Nitin00043 commited on Feb 8

Commit

191e2cd

verified ·

1 Parent(s): 6fe9380

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -35

app.py CHANGED Viewed

@@ -2,14 +2,27 @@ from transformers import pipeline, AutoModelForSequenceClassification, AutoToken
 import gradio as gr
 import torch
 from concurrent.futures import ThreadPoolExecutor
-# Load models with quantization (8-bit) for faster inference
 def load_quantized_model(model_name):
-    model = AutoModelForSequenceClassification.from_pretrained(model_name, load_in_8bit=True)
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    return pipeline("text-classification", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
-# Load models in parallel during startup
 with ThreadPoolExecutor() as executor:
     sentiment_future = executor.submit(load_quantized_model, "cardiffnlp/twitter-roberta-base-sentiment")
     emotion_future = executor.submit(load_quantized_model, "bhadresh-savani/bert-base-uncased-emotion")
@@ -17,43 +30,46 @@ with ThreadPoolExecutor() as executor:
 sentiment_pipeline = sentiment_future.result()
 emotion_pipeline = emotion_future.result()
-# Cache recent predictions to avoid recomputation
-CACHE_SIZE = 100
-prediction_cache = {}
 def analyze_text(text):
-    # Check cache first
-    if text in prediction_cache:
-        return prediction_cache[text]
-    # Parallel model execution
-    with ThreadPoolExecutor() as executor:
-        sentiment_future = executor.submit(sentiment_pipeline, text)
-        emotion_future = executor.submit(emotion_pipeline, text)
-        sentiment_result = sentiment_future.result()[0]
-        emotion_result = emotion_future.result()[0]
-    # Format response
-    result = {
-        "Sentiment": {sentiment_result['label']: round(sentiment_result['score'], 4)},
-        "Emotion": {emotion_result['label']: round(emotion_result['score'], 4)}
-    }
-    # Update cache
-    if len(prediction_cache) >= CACHE_SIZE:
-        prediction_cache.pop(next(iter(prediction_cache)))
-    prediction_cache[text] = result
     return result
-# Optimized Gradio interface with batch processing
-demo = gr.Interface(
     fn=analyze_text,
     inputs=gr.Textbox(placeholder="Enter your text here...", label="Input Text"),
-    outputs=gr.Label(label="Analysis Results"),
     title="🚀 Fast Sentiment & Emotion Analysis",
-    description="Optimized version using quantized models and parallel processing",
     examples=[
         ["I'm thrilled to start this new adventure!"],
         ["This situation is making me really frustrated."],
@@ -63,8 +79,8 @@ demo = gr.Interface(
     allow_flagging="never"
 )
-# Warm up models with sample input
-analyze_text("Warming up models...")
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
 from concurrent.futures import ThreadPoolExecutor
+from threading import Lock
+# Global cache settings and lock for thread-safety
+CACHE_SIZE = 100
+prediction_cache = {}
+cache_lock = Lock()
+# Function to load models with 8-bit quantization
 def load_quantized_model(model_name):
+    try:
+        model = AutoModelForSequenceClassification.from_pretrained(model_name, load_in_8bit=True)
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        device = 0 if torch.cuda.is_available() else -1
+        pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)
+        print(f"Loaded model: {model_name}")
+        return pipe
+    except Exception as e:
+        print(f"Error loading model '{model_name}': {e}")
+        raise e
+# Load both models concurrently at startup
 with ThreadPoolExecutor() as executor:
     sentiment_future = executor.submit(load_quantized_model, "cardiffnlp/twitter-roberta-base-sentiment")
     emotion_future = executor.submit(load_quantized_model, "bhadresh-savani/bert-base-uncased-emotion")
 sentiment_pipeline = sentiment_future.result()
 emotion_pipeline = emotion_future.result()
 def analyze_text(text):
+    # Check cache first (using lock for thread-safety)
+    with cache_lock:
+        if text in prediction_cache:
+            return prediction_cache[text]
+    try:
+        # Execute both model inferences in parallel
+        with ThreadPoolExecutor() as executor:
+            sentiment_future = executor.submit(sentiment_pipeline, text)
+            emotion_future = executor.submit(emotion_pipeline, text)
+            sentiment_result = sentiment_future.result()[0]
+            emotion_result = emotion_future.result()[0]
+        # Prepare a clear, rounded output
+        result = {
+            "Sentiment": {sentiment_result['label']: round(sentiment_result['score'], 4)},
+            "Emotion": {emotion_result['label']: round(emotion_result['score'], 4)}
+        }
+    except Exception as e:
+        result = {"error": str(e)}
+    # Update cache with lock protection
+    with cache_lock:
+        if len(prediction_cache) >= CACHE_SIZE:
+            prediction_cache.pop(next(iter(prediction_cache)))
+        prediction_cache[text] = result
     return result
+# Gradio interface: using gr.JSON to display structured output
+demo = gr.Interface(
     fn=analyze_text,
     inputs=gr.Textbox(placeholder="Enter your text here...", label="Input Text"),
+    outputs=gr.JSON(label="Analysis Results"),
     title="🚀 Fast Sentiment & Emotion Analysis",
+    description="An optimized application using 8-bit quantized models and parallel processing for fast inference.",
     examples=[
         ["I'm thrilled to start this new adventure!"],
         ["This situation is making me really frustrated."],
     allow_flagging="never"
 )
+# Warm up the models with a sample input to reduce first-call latency
+_ = analyze_text("Warming up models...")
 if __name__ == "__main__":
+    demo.launch()