Spaces:

shrish191
/

sentiment-classifier-ui

Running

App Files Files Community

shrish191 commited on 9 days ago

Commit

5a741e8

verified ·

1 Parent(s): 65d8742

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -18

app.py CHANGED Viewed

@@ -138,31 +138,42 @@ demo = gr.Interface(
 demo.launch()
 '''
-'''import gradio as gr
 from transformers import TFBertForSequenceClassification, BertTokenizer
 import tensorflow as tf
 import praw
 import os
-# Load model and tokenizer from Hugging Face
 model = TFBertForSequenceClassification.from_pretrained("shrish191/sentiment-bert")
 tokenizer = BertTokenizer.from_pretrained("shrish191/sentiment-bert")
-# Label mapping
 LABELS = {
     0: "Neutral",
     1: "Positive",
     2: "Negative"
 }
-# Reddit API setup (credentials loaded securely from secrets)
 reddit = praw.Reddit(
     client_id=os.getenv("REDDIT_CLIENT_ID"),
     client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
-    user_agent=os.getenv("REDDIT_USER_AGENT", "sentiment-classifier-script")
 )
-# Reddit post fetcher
 def fetch_reddit_text(reddit_url):
     try:
         submission = reddit.submission(url=reddit_url)
@@ -170,7 +181,15 @@ def fetch_reddit_text(reddit_url):
     except Exception as e:
         return f"Error fetching Reddit post: {str(e)}"
-# Main sentiment function
 def classify_sentiment(text_input, reddit_url):
     if reddit_url.strip():
         text = fetch_reddit_text(reddit_url)
@@ -186,13 +205,17 @@ def classify_sentiment(text_input, reddit_url):
         inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
         outputs = model(inputs)
         probs = tf.nn.softmax(outputs.logits, axis=1)
-        pred_label = tf.argmax(probs, axis=1).numpy()[0]
         confidence = float(tf.reduce_max(probs).numpy())
-        return f"Prediction: {LABELS[pred_label]} (Confidence: {confidence:.2f})"
     except Exception as e:
         return f"[!] Prediction error: {str(e)}"
-# Gradio UI
 demo = gr.Interface(
     fn=classify_sentiment,
     inputs=[
@@ -211,7 +234,7 @@ demo = gr.Interface(
     title="Sentiment Analyzer",
     description="🔍 Paste any text (including tweet content) OR a Reddit post URL to analyze sentiment.\n\n💡 Tweet URLs are not supported directly due to platform restrictions. Please paste tweet content manually."
 )
 demo.launch()
 '''
 import gradio as gr
@@ -219,13 +242,19 @@ from transformers import TFBertForSequenceClassification, BertTokenizer
 import tensorflow as tf
 import praw
 import os
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 from scipy.special import softmax
 model = TFBertForSequenceClassification.from_pretrained("shrish191/sentiment-bert")
 tokenizer = BertTokenizer.from_pretrained("shrish191/sentiment-bert")
@@ -235,12 +264,12 @@ LABELS = {
     2: "Negative"
 }
 fallback_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
 fallback_tokenizer = AutoTokenizer.from_pretrained(fallback_model_name)
 fallback_model = AutoModelForSequenceClassification.from_pretrained(fallback_model_name)
-# Reddit API
 reddit = praw.Reddit(
     client_id=os.getenv("REDDIT_CLIENT_ID"),
     client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
@@ -254,7 +283,6 @@ def fetch_reddit_text(reddit_url):
     except Exception as e:
         return f"Error fetching Reddit post: {str(e)}"
 def fallback_classifier(text):
     encoded_input = fallback_tokenizer(text, return_tensors='pt', truncation=True, padding=True)
     with torch.no_grad():
@@ -263,13 +291,20 @@ def fallback_classifier(text):
     labels = ['Negative', 'Neutral', 'Positive']
     return f"Prediction: {labels[scores.argmax()]}"
-def classify_sentiment(text_input, reddit_url):
     if reddit_url.strip():
         text = fetch_reddit_text(reddit_url)
     elif text_input.strip():
         text = text_input
     else:
-        return "[!] Please enter some text or a Reddit post URL."
     if text.lower().startswith("error") or "Unable to extract" in text:
         return f"[!] {text}"
@@ -302,10 +337,14 @@ demo = gr.Interface(
             placeholder="Paste a Reddit post URL (optional)",
             lines=1
         ),
     ],
     outputs="text",
     title="Sentiment Analyzer",
-    description="🔍 Paste any text (including tweet content) OR a Reddit post URL to analyze sentiment.\n\n💡 Tweet URLs are not supported directly due to platform restrictions. Please paste tweet content manually."
 )
 demo.launch()

 demo.launch()
 '''
+'''
+import gradio as gr
 from transformers import TFBertForSequenceClassification, BertTokenizer
 import tensorflow as tf
 import praw
 import os
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+from scipy.special import softmax
 model = TFBertForSequenceClassification.from_pretrained("shrish191/sentiment-bert")
 tokenizer = BertTokenizer.from_pretrained("shrish191/sentiment-bert")
 LABELS = {
     0: "Neutral",
     1: "Positive",
     2: "Negative"
 }
+fallback_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
+fallback_tokenizer = AutoTokenizer.from_pretrained(fallback_model_name)
+fallback_model = AutoModelForSequenceClassification.from_pretrained(fallback_model_name)
+# Reddit API
 reddit = praw.Reddit(
     client_id=os.getenv("REDDIT_CLIENT_ID"),
     client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
+    user_agent=os.getenv("REDDIT_USER_AGENT", "sentiment-classifier-ui")
 )
 def fetch_reddit_text(reddit_url):
     try:
         submission = reddit.submission(url=reddit_url)
     except Exception as e:
         return f"Error fetching Reddit post: {str(e)}"
+def fallback_classifier(text):
+    encoded_input = fallback_tokenizer(text, return_tensors='pt', truncation=True, padding=True)
+    with torch.no_grad():
+        output = fallback_model(**encoded_input)
+    scores = softmax(output.logits.numpy()[0])
+    labels = ['Negative', 'Neutral', 'Positive']
+    return f"Prediction: {labels[scores.argmax()]}"
 def classify_sentiment(text_input, reddit_url):
     if reddit_url.strip():
         text = fetch_reddit_text(reddit_url)
         inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
         outputs = model(inputs)
         probs = tf.nn.softmax(outputs.logits, axis=1)
         confidence = float(tf.reduce_max(probs).numpy())
+        pred_label = tf.argmax(probs, axis=1).numpy()[0]
+        if confidence < 0.5:
+            return fallback_classifier(text)
+        return f"Prediction: {LABELS[pred_label]}"
     except Exception as e:
         return f"[!] Prediction error: {str(e)}"
+# Gradio interface
 demo = gr.Interface(
     fn=classify_sentiment,
     inputs=[
     title="Sentiment Analyzer",
     description="🔍 Paste any text (including tweet content) OR a Reddit post URL to analyze sentiment.\n\n💡 Tweet URLs are not supported directly due to platform restrictions. Please paste tweet content manually."
 )
 demo.launch()
 '''
 import gradio as gr
 import tensorflow as tf
 import praw
 import os
+import pytesseract
+from PIL import Image
+import cv2
+import numpy as np
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 from scipy.special import softmax
+# Install tesseract OCR (only runs once in Hugging Face Spaces)
+os.system("apt-get update && apt-get install -y tesseract-ocr")
+# Load main model
 model = TFBertForSequenceClassification.from_pretrained("shrish191/sentiment-bert")
 tokenizer = BertTokenizer.from_pretrained("shrish191/sentiment-bert")
     2: "Negative"
 }
+# Load fallback model
 fallback_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
 fallback_tokenizer = AutoTokenizer.from_pretrained(fallback_model_name)
 fallback_model = AutoModelForSequenceClassification.from_pretrained(fallback_model_name)
+# Reddit API setup
 reddit = praw.Reddit(
     client_id=os.getenv("REDDIT_CLIENT_ID"),
     client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
     except Exception as e:
         return f"Error fetching Reddit post: {str(e)}"
 def fallback_classifier(text):
     encoded_input = fallback_tokenizer(text, return_tensors='pt', truncation=True, padding=True)
     with torch.no_grad():
     labels = ['Negative', 'Neutral', 'Positive']
     return f"Prediction: {labels[scores.argmax()]}"
+def classify_sentiment(text_input, reddit_url, image):
+    # Priority: Reddit > Image > Textbox
     if reddit_url.strip():
         text = fetch_reddit_text(reddit_url)
+    elif image is not None:
+        try:
+            img_array = np.array(image)
+            text = pytesseract.image_to_string(img_array)
+        except Exception as e:
+            return f"[!] OCR failed: {str(e)}"
     elif text_input.strip():
         text = text_input
     else:
+        return "[!] Please enter some text, upload an image, or provide a Reddit URL."
     if text.lower().startswith("error") or "Unable to extract" in text:
         return f"[!] {text}"
             placeholder="Paste a Reddit post URL (optional)",
             lines=1
         ),
+        gr.Image(
+            label="Upload Image (optional)",
+            type="pil"
+        )
     ],
     outputs="text",
     title="Sentiment Analyzer",
+    description="🔍 Paste any text, Reddit post URL, or upload an image containing text to analyze sentiment.\n\n💡 Tweet URLs are not supported. Please paste tweet content or screenshot instead."
 )
 demo.launch()