Spaces:

philipobiorah
/

bert-sentiment-analysis

Running

App Files Files Community

philipobiorah commited on Feb 9

Commit

237c9fa

verified ·

1 Parent(s): ea404d7

Upload main.py Docker , req

Browse files

Files changed (4) hide show

Dockerfile +30 -0
bert_imdb_model.bin +3 -0
main.py +91 -0
requirements.txt +51 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+# Use an official Python runtime as the base image
+FROM python:3.9-slim
+# Set the working directory inside the container
+WORKDIR /app
+# Install system dependencies for ML and data processing libraries
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    libopenblas-dev \
+    libomp-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Upgrade pip to avoid dependency issues
+RUN pip install --upgrade pip
+# Copy the dependencies file first for caching efficiency
+COPY requirements.txt /app/requirements.txt
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code
+COPY . /app
+# Expose port 7860 (required by Hugging Face Spaces)
+EXPOSE 7860
+# Command to run the Flask app
+CMD ["python", "main.py"]

bert_imdb_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2bd216b42904c9382c0381c94b5852b099c8db7890b14dcf0ebd1f950c2218b
+size 438015111

main.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from flask import Flask, request, render_template
+import pandas as pd
+import torch
+from transformers import BertTokenizer, BertForSequenceClassification
+from collections import Counter
+import matplotlib
+matplotlib.use('Agg')  # Set the backend before importing pyplot
+import matplotlib.pyplot as plt
+import base64
+from io import BytesIO
+import os
+app = Flask(__name__)
+# Load Model - Check if local model exists; otherwise, load from Hugging Face
+MODEL_PATH = "bert_imdb_model.bin"
+MODEL_HF_REPO = "philipobiorah/bert-imdb-model"  # Replace with your Hugging Face model repo
+if os.path.exists(MODEL_PATH):
+    print("Loading model from local file...")
+    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
+    model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device('cpu')))
+else:
+    print("Loading model from Hugging Face Hub...")
+    model = BertForSequenceClassification.from_pretrained(MODEL_HF_REPO)
+model.eval()
+tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+def predict_sentiment(text):
+    # Tokenize and split into chunks
+    tokens = tokenizer.encode(text, add_special_tokens=True)
+    chunks = [tokens[i:i + 512] for i in range(0, len(tokens), 512)]
+    # Predict sentiment for each chunk
+    sentiments = []
+    for chunk in chunks:
+        inputs = tokenizer.decode(chunk, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+        inputs = tokenizer(inputs, return_tensors="pt", truncation=True, padding=True, max_length=512)
+        with torch.no_grad():
+            outputs = model(**inputs)
+        sentiments.append(outputs.logits.argmax(dim=1).item())
+    # Aggregate sentiment results (majority voting)
+    majority_sentiment = Counter(sentiments).most_common(1)[0][0]
+    return 'Positive' if majority_sentiment == 1 else 'Negative'
+@app.route('/')
+def upload_file():
+    return render_template('upload.html')
+@app.route('/analyze_text', methods=['POST'])
+def analyze_text():
+    text = request.form['text']
+    sentiment = predict_sentiment(text)
+    return render_template('upload.html', sentiment=sentiment)
+@app.route('/uploader', methods=['GET', 'POST'])
+def upload_file_post():
+    if request.method == 'POST':
+        f = request.files['file']
+        data = pd.read_csv(f)
+        # Predict sentiment for each review
+        data['sentiment'] = data['review'].apply(predict_sentiment)
+        # Sentiment Analysis Summary
+        sentiment_counts = data['sentiment'].value_counts().to_dict()
+        summary = f"Total Reviews: {len(data)}<br>" \
+                  f"Positive: {sentiment_counts.get('Positive', 0)}<br>" \
+                  f"Negative: {sentiment_counts.get('Negative', 0)}<br>"
+        # Generate bar chart
+        fig, ax = plt.subplots()
+        ax.bar(sentiment_counts.keys(), sentiment_counts.values(), color=['red', 'blue'])
+        ax.set_ylabel('Counts')
+        ax.set_title('Sentiment Analysis Summary')
+        # Convert plot to base64 for embedding
+        img = BytesIO()
+        plt.savefig(img, format='png', bbox_inches='tight')
+        img.seek(0)
+        plot_url = base64.b64encode(img.getvalue()).decode('utf8')
+        plt.close(fig)
+        return render_template('result.html', tables=[data.to_html(classes='data')], titles=data.columns.values, summary=summary, plot_url=plot_url)
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860, debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,51 @@

+astunparse==1.6.3
+attrs==24.2.0
+blinker==1.8.2
+certifi==2024.8.30
+charset-normalizer==3.3.2
+click==8.1.7
+cmake==3.30.3
+contourpy==1.2.1  # Compatible with Python 3.9
+cycler==0.12.1
+expecttest==0.2.1
+filelock==3.16.1
+Flask==3.0.3
+fonttools==4.56.0
+fsspec==2024.9.0
+huggingface-hub==0.28.1
+hypothesis==6.112.1
+idna==3.10
+itsdangerous==2.2.0
+Jinja2==3.1.4
+lintrunner==0.12.5
+MarkupSafe==2.1.5
+matplotlib
+mpmath==1.3.0
+ninja==1.11.1.1
+numpy
+optree==0.12.1
+packaging==24.1
+pandas==2.2.3
+pillow==11.1.0
+psutil==6.0.0
+pyparsing==3.2.1
+python-dateutil==2.9.0.post0
+pytz==2025.1
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+safetensors==0.5.2
+six==1.16.0
+sortedcontainers==2.4.0
+sympy==1.13.1
+tokenizers==0.21.0
+torch
+tqdm==4.67.1
+transformers==4.48.3
+types-dataclasses==0.6.6
+typing_extensions==4.12.2
+tzdata==2025.1
+urllib3==2.2.3
+Werkzeug==3.0.4