philipobiorah commited on
Commit
9e7ff90
·
verified ·
1 Parent(s): ab74064

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +38 -32
main.py CHANGED
@@ -1,69 +1,62 @@
1
- import os
2
  from flask import Flask, request, render_template
3
  import pandas as pd
4
  import torch
5
  from transformers import BertTokenizer, BertForSequenceClassification
6
  from collections import Counter
7
  import matplotlib
8
- matplotlib.use('Agg') # Prevents GUI issues for Matplotlib
9
  import matplotlib.pyplot as plt
 
10
  import base64
11
  from io import BytesIO
12
 
13
- # Set writable cache directories within /tmp
14
- os.environ["HF_HOME"] = "/tmp/huggingface_cache" # Replaces TRANSFORMERS_CACHE
15
- os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
16
-
17
- # Create directories if they don't exist
18
- os.makedirs(os.environ["HF_HOME"], exist_ok=True)
19
- os.makedirs(os.environ["MPLCONFIGDIR"], exist_ok=True)
20
 
21
  app = Flask(__name__)
22
 
23
- # Load Model from Local Directory
24
- MODEL_PATH = "bert_imdb_model.bin"
25
- TOKENIZER_PATH = "bert-base-uncased"
26
 
27
- if os.path.exists(MODEL_PATH):
28
- print("Loading model from local file...")
29
- model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
30
- model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device('cpu')))
31
- else:
32
- print(f"Error: Model file {MODEL_PATH} not found.")
33
- exit(1)
34
 
35
- model.eval()
36
- tokenizer = BertTokenizer.from_pretrained(TOKENIZER_PATH)
 
37
 
38
- # ... rest of your code (keep the rest unchanged) ...
39
 
 
 
 
40
 
41
  def predict_sentiment(text):
42
- tokens = tokenizer.encode(text, add_special_tokens=True)
43
- chunks = [tokens[i:i + 512] for i in range(0, len(tokens), 512)]
 
44
 
 
45
  sentiments = []
46
  for chunk in chunks:
47
- inputs = tokenizer.decode(chunk, skip_special_tokens=True, clean_up_tokenization_spaces=True)
 
48
  inputs = tokenizer(inputs, return_tensors="pt", truncation=True, padding=True, max_length=512)
49
-
50
  with torch.no_grad():
51
  outputs = model(**inputs)
52
-
53
  sentiments.append(outputs.logits.argmax(dim=1).item())
54
 
55
- majority_sentiment = Counter(sentiments).most_common(1)[0][0]
 
 
56
  return 'Positive' if majority_sentiment == 1 else 'Negative'
57
 
58
  @app.route('/')
59
  def upload_file():
60
  return render_template('upload.html')
61
 
 
62
  @app.route('/analyze_text', methods=['POST'])
63
  def analyze_text():
64
- text = request.form['text']
65
- sentiment = predict_sentiment(text)
66
- return render_template('upload.html', sentiment=sentiment)
 
 
67
 
68
  @app.route('/uploader', methods=['GET', 'POST'])
69
  def upload_file_post():
@@ -71,25 +64,38 @@ def upload_file_post():
71
  f = request.files['file']
72
  data = pd.read_csv(f)
73
 
 
74
  data['sentiment'] = data['review'].apply(predict_sentiment)
75
 
 
76
  sentiment_counts = data['sentiment'].value_counts().to_dict()
77
  summary = f"Total Reviews: {len(data)}<br>" \
78
  f"Positive: {sentiment_counts.get('Positive', 0)}<br>" \
79
  f"Negative: {sentiment_counts.get('Negative', 0)}<br>"
80
 
 
81
  fig, ax = plt.subplots()
82
  ax.bar(sentiment_counts.keys(), sentiment_counts.values(), color=['red', 'blue'])
83
  ax.set_ylabel('Counts')
84
  ax.set_title('Sentiment Analysis Summary')
85
-
 
86
  img = BytesIO()
87
  plt.savefig(img, format='png', bbox_inches='tight')
88
  img.seek(0)
 
 
89
  plot_url = base64.b64encode(img.getvalue()).decode('utf8')
 
 
90
  plt.close(fig)
91
 
92
  return render_template('result.html', tables=[data.to_html(classes='data')], titles=data.columns.values, summary=summary, plot_url=plot_url)
93
 
 
 
94
  if __name__ == '__main__':
95
  app.run(host='0.0.0.0', port=7860, debug=True)
 
 
 
 
 
1
  from flask import Flask, request, render_template
2
  import pandas as pd
3
  import torch
4
  from transformers import BertTokenizer, BertForSequenceClassification
5
  from collections import Counter
6
  import matplotlib
7
+ matplotlib.use('Agg') # Set the backend before importing pyplot
8
  import matplotlib.pyplot as plt
9
+
10
  import base64
11
  from io import BytesIO
12
 
 
 
 
 
 
 
 
13
 
14
  app = Flask(__name__)
15
 
 
 
 
16
 
17
+ model_name = "philipobiorah/bert-imdb-model"
 
 
 
 
 
 
18
 
19
+ # Load tokenizer and model
20
+ tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
21
+ model = BertForSequenceClassification.from_pretrained(model_name)
22
 
 
23
 
24
+ model.eval()
25
+
26
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
27
 
28
  def predict_sentiment(text):
29
+ # Split the text into chunks of 512 tokens
30
+ tokenized_text = tokenizer.encode(text, add_special_tokens=True)
31
+ chunks = [tokenized_text[i:i + 512] for i in range(0, len(tokenized_text), 512)]
32
 
33
+ # Predict sentiment for each chunk
34
  sentiments = []
35
  for chunk in chunks:
36
+ # inputs = tokenizer.decode(chunk, skip_special_tokens=True)
37
+ inputs = tokenizer.decode(chunk, skip_special_tokens=True, clean_up_tokenization_spaces=True) # Explicitly set clean_up_tokenization_spaces
38
  inputs = tokenizer(inputs, return_tensors="pt", truncation=True, padding=True, max_length=512)
 
39
  with torch.no_grad():
40
  outputs = model(**inputs)
 
41
  sentiments.append(outputs.logits.argmax(dim=1).item())
42
 
43
+ # Aggregate the predictions (majority voting)
44
+ sentiment_counts = Counter(sentiments)
45
+ majority_sentiment = sentiment_counts.most_common(1)[0][0]
46
  return 'Positive' if majority_sentiment == 1 else 'Negative'
47
 
48
  @app.route('/')
49
  def upload_file():
50
  return render_template('upload.html')
51
 
52
+
53
  @app.route('/analyze_text', methods=['POST'])
54
  def analyze_text():
55
+ if request.method == 'POST':
56
+ text = request.form['text']
57
+ sentiment = predict_sentiment(text)
58
+ return render_template('upload.html', sentiment=sentiment)
59
+
60
 
61
  @app.route('/uploader', methods=['GET', 'POST'])
62
  def upload_file_post():
 
64
  f = request.files['file']
65
  data = pd.read_csv(f)
66
 
67
+ # Predict sentiment for each review
68
  data['sentiment'] = data['review'].apply(predict_sentiment)
69
 
70
+ # Sentiment Analysis Summary
71
  sentiment_counts = data['sentiment'].value_counts().to_dict()
72
  summary = f"Total Reviews: {len(data)}<br>" \
73
  f"Positive: {sentiment_counts.get('Positive', 0)}<br>" \
74
  f"Negative: {sentiment_counts.get('Negative', 0)}<br>"
75
 
76
+ # Generate plot
77
  fig, ax = plt.subplots()
78
  ax.bar(sentiment_counts.keys(), sentiment_counts.values(), color=['red', 'blue'])
79
  ax.set_ylabel('Counts')
80
  ax.set_title('Sentiment Analysis Summary')
81
+
82
+ # Save the plot to a BytesIO object
83
  img = BytesIO()
84
  plt.savefig(img, format='png', bbox_inches='tight')
85
  img.seek(0)
86
+
87
+ # Encode the image in base64 and decode it to UTF-8
88
  plot_url = base64.b64encode(img.getvalue()).decode('utf8')
89
+
90
+ # Close the plot to free memory
91
  plt.close(fig)
92
 
93
  return render_template('result.html', tables=[data.to_html(classes='data')], titles=data.columns.values, summary=summary, plot_url=plot_url)
94
 
95
+
96
+
97
  if __name__ == '__main__':
98
  app.run(host='0.0.0.0', port=7860, debug=True)
99
+
100
+
101
+