philipobiorah commited on
Commit
a7328ec
·
verified ·
1 Parent(s): 2584f82

modified Dockerfile to Create Cache Directories

Browse files

Dockerfile to Create Cache Directories
Create writable cache directories

Files changed (2) hide show
  1. Dockerfile +37 -0
  2. main.py +96 -0
Dockerfile ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as the base image
2
+ FROM python:3.9-slim
3
+
4
+ # Set the working directory inside the container
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies for ML and data processing libraries
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ libopenblas-dev \
11
+ libomp-dev \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Upgrade pip to avoid dependency issues
15
+ RUN pip install --upgrade pip
16
+
17
+ # Create writable cache directories
18
+ RUN mkdir -p /tmp/huggingface_cache /tmp/matplotlib
19
+ ENV TRANSFORMERS_CACHE=/tmp/huggingface_cache
20
+ ENV HF_HOME=/tmp/huggingface_cache
21
+ ENV MPLCONFIGDIR=/tmp/matplotlib
22
+
23
+
24
+ # Copy the dependencies file first for caching efficiency
25
+ COPY requirements.txt /app/requirements.txt
26
+
27
+ # Install Python dependencies
28
+ RUN pip install --no-cache-dir -r requirements.txt
29
+
30
+ # Copy the rest of the application code
31
+ COPY . /app
32
+
33
+ # Expose port 7860 (required by Hugging Face Spaces)
34
+ EXPOSE 7860
35
+
36
+ # Command to run the Flask app
37
+ CMD ["python", "main.py"]
main.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, render_template
2
+ import pandas as pd
3
+ import torch
4
+ from transformers import BertTokenizer, BertForSequenceClassification
5
+ from collections import Counter
6
+ import matplotlib
7
+ matplotlib.use('Agg') # Set the backend before importing pyplot
8
+ import matplotlib.pyplot as plt
9
+ import base64
10
+ from io import BytesIO
11
+ import os
12
+
13
+ # Set writable cache directories for Hugging Face and Matplotlib
14
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
15
+ os.environ["HF_HOME"] = "/tmp/huggingface_cache"
16
+ os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
17
+
18
+ app = Flask(__name__)
19
+
20
+ # Load Model - Check if local model exists; otherwise, load from Hugging Face
21
+ MODEL_PATH = "bert_imdb_model.bin"
22
+ MODEL_HF_REPO = "philipobiorah/bert-imdb-model" # Replace with your Hugging Face model repo
23
+
24
+ if os.path.exists(MODEL_PATH):
25
+ print("Loading model from local file...")
26
+ model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
27
+ model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device('cpu')))
28
+ else:
29
+ print("Loading model from Hugging Face Hub...")
30
+ model = BertForSequenceClassification.from_pretrained(MODEL_HF_REPO)
31
+
32
+ model.eval()
33
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
34
+
35
+ def predict_sentiment(text):
36
+ # Tokenize and split into chunks
37
+ tokens = tokenizer.encode(text, add_special_tokens=True)
38
+ chunks = [tokens[i:i + 512] for i in range(0, len(tokens), 512)]
39
+
40
+ # Predict sentiment for each chunk
41
+ sentiments = []
42
+ for chunk in chunks:
43
+ inputs = tokenizer.decode(chunk, skip_special_tokens=True, clean_up_tokenization_spaces=True)
44
+ inputs = tokenizer(inputs, return_tensors="pt", truncation=True, padding=True, max_length=512)
45
+
46
+ with torch.no_grad():
47
+ outputs = model(**inputs)
48
+
49
+ sentiments.append(outputs.logits.argmax(dim=1).item())
50
+
51
+ # Aggregate sentiment results (majority voting)
52
+ majority_sentiment = Counter(sentiments).most_common(1)[0][0]
53
+ return 'Positive' if majority_sentiment == 1 else 'Negative'
54
+
55
+ @app.route('/')
56
+ def upload_file():
57
+ return render_template('upload.html')
58
+
59
+ @app.route('/analyze_text', methods=['POST'])
60
+ def analyze_text():
61
+ text = request.form['text']
62
+ sentiment = predict_sentiment(text)
63
+ return render_template('upload.html', sentiment=sentiment)
64
+
65
+ @app.route('/uploader', methods=['GET', 'POST'])
66
+ def upload_file_post():
67
+ if request.method == 'POST':
68
+ f = request.files['file']
69
+ data = pd.read_csv(f)
70
+
71
+ # Predict sentiment for each review
72
+ data['sentiment'] = data['review'].apply(predict_sentiment)
73
+
74
+ # Sentiment Analysis Summary
75
+ sentiment_counts = data['sentiment'].value_counts().to_dict()
76
+ summary = f"Total Reviews: {len(data)}<br>" \
77
+ f"Positive: {sentiment_counts.get('Positive', 0)}<br>" \
78
+ f"Negative: {sentiment_counts.get('Negative', 0)}<br>"
79
+
80
+ # Generate bar chart
81
+ fig, ax = plt.subplots()
82
+ ax.bar(sentiment_counts.keys(), sentiment_counts.values(), color=['red', 'blue'])
83
+ ax.set_ylabel('Counts')
84
+ ax.set_title('Sentiment Analysis Summary')
85
+
86
+ # Convert plot to base64 for embedding
87
+ img = BytesIO()
88
+ plt.savefig(img, format='png', bbox_inches='tight')
89
+ img.seek(0)
90
+ plot_url = base64.b64encode(img.getvalue()).decode('utf8')
91
+ plt.close(fig)
92
+
93
+ return render_template('result.html', tables=[data.to_html(classes='data')], titles=data.columns.values, summary=summary, plot_url=plot_url)
94
+
95
+ if __name__ == '__main__':
96
+ app.run(host='0.0.0.0', port=7860, debug=True)