Nitin00043 commited on
Commit
5924948
·
verified ·
1 Parent(s): 1ee9cdc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -13
app.py CHANGED
@@ -4,33 +4,36 @@ import torch
4
  from concurrent.futures import ThreadPoolExecutor
5
  from threading import Lock
6
 
7
- # Global cache and thread lock for thread-safe caching
8
  CACHE_SIZE = 100
9
  prediction_cache = {}
10
  cache_lock = Lock()
11
 
 
 
 
 
 
 
 
12
  def load_model(model_name):
13
  """
14
- Loads the model with 8-bit quantization if a GPU is available.
15
- On CPU, it loads the full model.
16
  """
17
  if torch.cuda.is_available():
18
- # Use 8-bit quantization and auto device mapping for GPU inference.
19
  model = AutoModelForSequenceClassification.from_pretrained(
20
  model_name, load_in_8bit=True, device_map="auto"
21
  )
22
  tokenizer = AutoTokenizer.from_pretrained(model_name)
23
  device = 0 # GPU index
24
  else:
25
- # CPU fallback: do not use quantization.
26
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
27
  tokenizer = AutoTokenizer.from_pretrained(model_name)
28
  device = -1
29
-
30
  return pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)
31
 
32
- # Load both models concurrently atartup.
33
-
34
  with ThreadPoolExecutor() as executor:
35
  sentiment_future = executor.submit(load_model, "cardiffnlp/twitter-roberta-base-sentiment")
36
  emotion_future = executor.submit(load_model, "bhadresh-savani/bert-base-uncased-emotion")
@@ -52,15 +55,19 @@ def analyze_text(text):
52
  sentiment_result = future_sentiment.result()[0]
53
  emotion_result = future_emotion.result()[0]
54
 
 
 
 
 
55
  # Format the output with rounded scores.
56
  result = {
57
- "Sentiment": {sentiment_result['label']: round(sentiment_result['score'], 4)},
58
  "Emotion": {emotion_result['label']: round(emotion_result['score'], 4)}
59
  }
60
  except Exception as e:
61
  result = {"error": str(e)}
62
 
63
- # Update cache with protection.
64
  with cache_lock:
65
  if len(prediction_cache) >= CACHE_SIZE:
66
  prediction_cache.pop(next(iter(prediction_cache)))
@@ -74,7 +81,7 @@ demo = gr.Interface(
74
  inputs=gr.Textbox(placeholder="Enter your text here...", label="Input Text"),
75
  outputs=gr.JSON(label="Analysis Results"),
76
  title="🚀 Fast Sentiment & Emotion Analysis",
77
- description="An optimized application using quantized models (when available) and parallel processing for fast inference.",
78
  examples=[
79
  ["I'm thrilled to start this new adventure!"],
80
  ["This situation is making me really frustrated."],
@@ -84,9 +91,9 @@ demo = gr.Interface(
84
  allow_flagging="never"
85
  )
86
 
87
- # Warm up the models to reduce first-call latency.
88
  _ = analyze_text("Warming up models...")
89
 
90
  if __name__ == "__main__":
91
- # In Spaces, binding to 0.0.0.0 is required.
92
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
4
  from concurrent.futures import ThreadPoolExecutor
5
  from threading import Lock
6
 
7
+ # Global cache and lock for thread-safety
8
  CACHE_SIZE = 100
9
  prediction_cache = {}
10
  cache_lock = Lock()
11
 
12
+ # Mapping for sentiment labels from cardiffnlp/twitter-roberta-base-sentiment
13
+ SENTIMENT_LABEL_MAPPING = {
14
+ "LABEL_0": "negative",
15
+ "LABEL_1": "neutral",
16
+ "LABEL_2": "positive"
17
+ }
18
+
19
  def load_model(model_name):
20
  """
21
+ Loads the model with 8-bit quantization if a GPU is available;
22
+ otherwise, loads the full model.
23
  """
24
  if torch.cuda.is_available():
 
25
  model = AutoModelForSequenceClassification.from_pretrained(
26
  model_name, load_in_8bit=True, device_map="auto"
27
  )
28
  tokenizer = AutoTokenizer.from_pretrained(model_name)
29
  device = 0 # GPU index
30
  else:
 
31
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
32
  tokenizer = AutoTokenizer.from_pretrained(model_name)
33
  device = -1
 
34
  return pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)
35
 
36
+ # Load both models concurrently at startup.
 
37
  with ThreadPoolExecutor() as executor:
38
  sentiment_future = executor.submit(load_model, "cardiffnlp/twitter-roberta-base-sentiment")
39
  emotion_future = executor.submit(load_model, "bhadresh-savani/bert-base-uncased-emotion")
 
55
  sentiment_result = future_sentiment.result()[0]
56
  emotion_result = future_emotion.result()[0]
57
 
58
+ # Remap the sentiment label to a human-readable format if available.
59
+ raw_sentiment_label = sentiment_result.get("label", "")
60
+ sentiment_label = SENTIMENT_LABEL_MAPPING.get(raw_sentiment_label, raw_sentiment_label)
61
+
62
  # Format the output with rounded scores.
63
  result = {
64
+ "Sentiment": {sentiment_label: round(sentiment_result['score'], 4)},
65
  "Emotion": {emotion_result['label']: round(emotion_result['score'], 4)}
66
  }
67
  except Exception as e:
68
  result = {"error": str(e)}
69
 
70
+ # Update the cache in a thread-safe manner.
71
  with cache_lock:
72
  if len(prediction_cache) >= CACHE_SIZE:
73
  prediction_cache.pop(next(iter(prediction_cache)))
 
81
  inputs=gr.Textbox(placeholder="Enter your text here...", label="Input Text"),
82
  outputs=gr.JSON(label="Analysis Results"),
83
  title="🚀 Fast Sentiment & Emotion Analysis",
84
+ description="Optimized application that remaps sentiment labels and uses parallel processing.",
85
  examples=[
86
  ["I'm thrilled to start this new adventure!"],
87
  ["This situation is making me really frustrated."],
 
91
  allow_flagging="never"
92
  )
93
 
94
+ # Warm up the models with a sample input.
95
  _ = analyze_text("Warming up models...")
96
 
97
  if __name__ == "__main__":
98
+ # Bind to all interfaces for Hugging Face Spaces.
99
  demo.launch(server_name="0.0.0.0", server_port=7860)