Spaces:

philipobiorah
/

bert-sentiment-analysis

Running

App Files Files Community

philipobiorah commited on Feb 9

Commit

a7328ec

verified ·

1 Parent(s): 2584f82

modified Dockerfile to Create Cache Directories

Browse files

Dockerfile to Create Cache Directories
Create writable cache directories

Files changed (2) hide show

Dockerfile +37 -0
main.py +96 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,37 @@

+# Use an official Python runtime as the base image
+FROM python:3.9-slim
+# Set the working directory inside the container
+WORKDIR /app
+# Install system dependencies for ML and data processing libraries
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    libopenblas-dev \
+    libomp-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Upgrade pip to avoid dependency issues
+RUN pip install --upgrade pip
+# Create writable cache directories
+RUN mkdir -p /tmp/huggingface_cache /tmp/matplotlib
+ENV TRANSFORMERS_CACHE=/tmp/huggingface_cache
+ENV HF_HOME=/tmp/huggingface_cache
+ENV MPLCONFIGDIR=/tmp/matplotlib
+# Copy the dependencies file first for caching efficiency
+COPY requirements.txt /app/requirements.txt
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code
+COPY . /app
+# Expose port 7860 (required by Hugging Face Spaces)
+EXPOSE 7860
+# Command to run the Flask app
+CMD ["python", "main.py"]

main.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from flask import Flask, request, render_template
+import pandas as pd
+import torch
+from transformers import BertTokenizer, BertForSequenceClassification
+from collections import Counter
+import matplotlib
+matplotlib.use('Agg')  # Set the backend before importing pyplot
+import matplotlib.pyplot as plt
+import base64
+from io import BytesIO
+import os
+# Set writable cache directories for Hugging Face and Matplotlib
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
+os.environ["HF_HOME"] = "/tmp/huggingface_cache"
+os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
+app = Flask(__name__)
+# Load Model - Check if local model exists; otherwise, load from Hugging Face
+MODEL_PATH = "bert_imdb_model.bin"
+MODEL_HF_REPO = "philipobiorah/bert-imdb-model"  # Replace with your Hugging Face model repo
+if os.path.exists(MODEL_PATH):
+    print("Loading model from local file...")
+    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
+    model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device('cpu')))
+else:
+    print("Loading model from Hugging Face Hub...")
+    model = BertForSequenceClassification.from_pretrained(MODEL_HF_REPO)
+model.eval()
+tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+def predict_sentiment(text):
+    # Tokenize and split into chunks
+    tokens = tokenizer.encode(text, add_special_tokens=True)
+    chunks = [tokens[i:i + 512] for i in range(0, len(tokens), 512)]
+    # Predict sentiment for each chunk
+    sentiments = []
+    for chunk in chunks:
+        inputs = tokenizer.decode(chunk, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+        inputs = tokenizer(inputs, return_tensors="pt", truncation=True, padding=True, max_length=512)
+        with torch.no_grad():
+            outputs = model(**inputs)
+        sentiments.append(outputs.logits.argmax(dim=1).item())
+    # Aggregate sentiment results (majority voting)
+    majority_sentiment = Counter(sentiments).most_common(1)[0][0]
+    return 'Positive' if majority_sentiment == 1 else 'Negative'
+@app.route('/')
+def upload_file():
+    return render_template('upload.html')
+@app.route('/analyze_text', methods=['POST'])
+def analyze_text():
+    text = request.form['text']
+    sentiment = predict_sentiment(text)
+    return render_template('upload.html', sentiment=sentiment)
+@app.route('/uploader', methods=['GET', 'POST'])
+def upload_file_post():
+    if request.method == 'POST':
+        f = request.files['file']
+        data = pd.read_csv(f)
+        # Predict sentiment for each review
+        data['sentiment'] = data['review'].apply(predict_sentiment)
+        # Sentiment Analysis Summary
+        sentiment_counts = data['sentiment'].value_counts().to_dict()
+        summary = f"Total Reviews: {len(data)}<br>" \
+                  f"Positive: {sentiment_counts.get('Positive', 0)}<br>" \
+                  f"Negative: {sentiment_counts.get('Negative', 0)}<br>"
+        # Generate bar chart
+        fig, ax = plt.subplots()
+        ax.bar(sentiment_counts.keys(), sentiment_counts.values(), color=['red', 'blue'])
+        ax.set_ylabel('Counts')
+        ax.set_title('Sentiment Analysis Summary')
+        # Convert plot to base64 for embedding
+        img = BytesIO()
+        plt.savefig(img, format='png', bbox_inches='tight')
+        img.seek(0)
+        plot_url = base64.b64encode(img.getvalue()).decode('utf8')
+        plt.close(fig)
+        return render_template('result.html', tables=[data.to_html(classes='data')], titles=data.columns.values, summary=summary, plot_url=plot_url)
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860, debug=True)