philipobiorah commited on
Commit
237c9fa
·
verified ·
1 Parent(s): ea404d7

Upload main.py Docker , req

Browse files
Files changed (4) hide show
  1. Dockerfile +30 -0
  2. bert_imdb_model.bin +3 -0
  3. main.py +91 -0
  4. requirements.txt +51 -0
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as the base image
2
+ FROM python:3.9-slim
3
+
4
+ # Set the working directory inside the container
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies for ML and data processing libraries
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ libopenblas-dev \
11
+ libomp-dev \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Upgrade pip to avoid dependency issues
15
+ RUN pip install --upgrade pip
16
+
17
+ # Copy the dependencies file first for caching efficiency
18
+ COPY requirements.txt /app/requirements.txt
19
+
20
+ # Install Python dependencies
21
+ RUN pip install --no-cache-dir -r requirements.txt
22
+
23
+ # Copy the rest of the application code
24
+ COPY . /app
25
+
26
+ # Expose port 7860 (required by Hugging Face Spaces)
27
+ EXPOSE 7860
28
+
29
+ # Command to run the Flask app
30
+ CMD ["python", "main.py"]
bert_imdb_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2bd216b42904c9382c0381c94b5852b099c8db7890b14dcf0ebd1f950c2218b
3
+ size 438015111
main.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, render_template
2
+ import pandas as pd
3
+ import torch
4
+ from transformers import BertTokenizer, BertForSequenceClassification
5
+ from collections import Counter
6
+ import matplotlib
7
+ matplotlib.use('Agg') # Set the backend before importing pyplot
8
+ import matplotlib.pyplot as plt
9
+ import base64
10
+ from io import BytesIO
11
+ import os
12
+
13
+ app = Flask(__name__)
14
+
15
+ # Load Model - Check if local model exists; otherwise, load from Hugging Face
16
+ MODEL_PATH = "bert_imdb_model.bin"
17
+ MODEL_HF_REPO = "philipobiorah/bert-imdb-model" # Replace with your Hugging Face model repo
18
+
19
+ if os.path.exists(MODEL_PATH):
20
+ print("Loading model from local file...")
21
+ model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
22
+ model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device('cpu')))
23
+ else:
24
+ print("Loading model from Hugging Face Hub...")
25
+ model = BertForSequenceClassification.from_pretrained(MODEL_HF_REPO)
26
+
27
+ model.eval()
28
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
29
+
30
+ def predict_sentiment(text):
31
+ # Tokenize and split into chunks
32
+ tokens = tokenizer.encode(text, add_special_tokens=True)
33
+ chunks = [tokens[i:i + 512] for i in range(0, len(tokens), 512)]
34
+
35
+ # Predict sentiment for each chunk
36
+ sentiments = []
37
+ for chunk in chunks:
38
+ inputs = tokenizer.decode(chunk, skip_special_tokens=True, clean_up_tokenization_spaces=True)
39
+ inputs = tokenizer(inputs, return_tensors="pt", truncation=True, padding=True, max_length=512)
40
+
41
+ with torch.no_grad():
42
+ outputs = model(**inputs)
43
+
44
+ sentiments.append(outputs.logits.argmax(dim=1).item())
45
+
46
+ # Aggregate sentiment results (majority voting)
47
+ majority_sentiment = Counter(sentiments).most_common(1)[0][0]
48
+ return 'Positive' if majority_sentiment == 1 else 'Negative'
49
+
50
+ @app.route('/')
51
+ def upload_file():
52
+ return render_template('upload.html')
53
+
54
+ @app.route('/analyze_text', methods=['POST'])
55
+ def analyze_text():
56
+ text = request.form['text']
57
+ sentiment = predict_sentiment(text)
58
+ return render_template('upload.html', sentiment=sentiment)
59
+
60
+ @app.route('/uploader', methods=['GET', 'POST'])
61
+ def upload_file_post():
62
+ if request.method == 'POST':
63
+ f = request.files['file']
64
+ data = pd.read_csv(f)
65
+
66
+ # Predict sentiment for each review
67
+ data['sentiment'] = data['review'].apply(predict_sentiment)
68
+
69
+ # Sentiment Analysis Summary
70
+ sentiment_counts = data['sentiment'].value_counts().to_dict()
71
+ summary = f"Total Reviews: {len(data)}<br>" \
72
+ f"Positive: {sentiment_counts.get('Positive', 0)}<br>" \
73
+ f"Negative: {sentiment_counts.get('Negative', 0)}<br>"
74
+
75
+ # Generate bar chart
76
+ fig, ax = plt.subplots()
77
+ ax.bar(sentiment_counts.keys(), sentiment_counts.values(), color=['red', 'blue'])
78
+ ax.set_ylabel('Counts')
79
+ ax.set_title('Sentiment Analysis Summary')
80
+
81
+ # Convert plot to base64 for embedding
82
+ img = BytesIO()
83
+ plt.savefig(img, format='png', bbox_inches='tight')
84
+ img.seek(0)
85
+ plot_url = base64.b64encode(img.getvalue()).decode('utf8')
86
+ plt.close(fig)
87
+
88
+ return render_template('result.html', tables=[data.to_html(classes='data')], titles=data.columns.values, summary=summary, plot_url=plot_url)
89
+
90
+ if __name__ == '__main__':
91
+ app.run(host='0.0.0.0', port=7860, debug=True)
requirements.txt ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ astunparse==1.6.3
2
+ attrs==24.2.0
3
+ blinker==1.8.2
4
+ certifi==2024.8.30
5
+ charset-normalizer==3.3.2
6
+ click==8.1.7
7
+ cmake==3.30.3
8
+ contourpy==1.2.1 # Compatible with Python 3.9
9
+ cycler==0.12.1
10
+ expecttest==0.2.1
11
+ filelock==3.16.1
12
+ Flask==3.0.3
13
+ fonttools==4.56.0
14
+ fsspec==2024.9.0
15
+ huggingface-hub==0.28.1
16
+ hypothesis==6.112.1
17
+ idna==3.10
18
+ itsdangerous==2.2.0
19
+ Jinja2==3.1.4
20
+
21
+ lintrunner==0.12.5
22
+ MarkupSafe==2.1.5
23
+ matplotlib
24
+ mpmath==1.3.0
25
+
26
+ ninja==1.11.1.1
27
+ numpy
28
+ optree==0.12.1
29
+ packaging==24.1
30
+ pandas==2.2.3
31
+ pillow==11.1.0
32
+ psutil==6.0.0
33
+ pyparsing==3.2.1
34
+ python-dateutil==2.9.0.post0
35
+ pytz==2025.1
36
+ PyYAML==6.0.2
37
+ regex==2024.11.6
38
+ requests==2.32.3
39
+ safetensors==0.5.2
40
+ six==1.16.0
41
+ sortedcontainers==2.4.0
42
+ sympy==1.13.1
43
+ tokenizers==0.21.0
44
+ torch
45
+ tqdm==4.67.1
46
+ transformers==4.48.3
47
+ types-dataclasses==0.6.6
48
+ typing_extensions==4.12.2
49
+ tzdata==2025.1
50
+ urllib3==2.2.3
51
+ Werkzeug==3.0.4