Zasha1 commited on
Commit
e089e5b
·
verified ·
1 Parent(s): 46e8866

Update sentiment_analysis.py

Browse files
Files changed (1) hide show
  1. sentiment_analysis.py +66 -87
sentiment_analysis.py CHANGED
@@ -1,8 +1,7 @@
1
  import os
2
  import json
3
  import time
4
- import speech_recognition as sr
5
- from vosk import Model, KaldiRecognizer
6
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
7
  from huggingface_hub import login
8
  from product_recommender import ProductRecommender
@@ -28,35 +27,10 @@ model = AutoModelForSequenceClassification.from_pretrained(model_name)
28
  tokenizer = AutoTokenizer.from_pretrained(model_name)
29
  sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
30
 
31
- # Vosk Speech Recognition Model
32
- vosk_model_path = config["vosk_model_path"]
33
-
34
- if not vosk_model_path:
35
- raise ValueError("Error: vosk_model_path is not set in the .env file.")
36
-
37
- try:
38
- vosk_model = Model(vosk_model_path)
39
- print("Vosk model loaded successfully.")
40
- except Exception as e:
41
- raise ValueError(f"Failed to load Vosk model: {e}")
42
-
43
- recognizer = KaldiRecognizer(vosk_model, 16000)
44
- audio = pyaudio.PyAudio()
45
-
46
- stream = audio.open(format=pyaudio.paInt16,
47
- channels=1,
48
- rate=16000,
49
- input=True,
50
- frames_per_buffer=4000)
51
- stream.start_stream()
52
 
53
  # Function to analyze sentiment
54
- def preprocess_text(text):
55
- """Preprocess text for better sentiment analysis."""
56
- # Strip whitespace and convert to lowercase
57
- processed = text.strip().lower()
58
- return processed
59
-
60
  def preprocess_text(text):
61
  """Preprocess text for better sentiment analysis."""
62
  return text.strip().lower()
@@ -104,74 +78,79 @@ def transcribe_with_chunks(objections_dict):
104
  model = SentenceTransformer('all-MiniLM-L6-v2')
105
 
106
  try:
107
- while True:
108
- data = stream.read(4000, exception_on_overflow=False)
109
-
110
- if recognizer.AcceptWaveform(data):
111
- result = recognizer.Result()
112
- text = json.loads(result)["text"]
113
-
114
- if "start listening" in text.lower():
115
- is_listening = True
116
- print("Listening started. Speak into the microphone.")
117
- continue
118
- elif "stop listening" in text.lower():
119
- is_listening = False
120
- print("Listening stopped.")
121
- if current_chunk:
122
- chunk_text = " ".join(current_chunk)
123
- sentiment, score = analyze_sentiment(chunk_text)
124
- chunks.append((chunk_text, sentiment, score))
125
- current_chunk = []
126
- continue
127
-
128
- if is_listening and text.strip():
129
- print(f"Transcription: {text}")
130
- current_chunk.append(text)
131
-
132
- if time.time() - chunk_start_time > 3:
133
  if current_chunk:
134
  chunk_text = " ".join(current_chunk)
135
-
136
- # Always process sentiment
137
  sentiment, score = analyze_sentiment(chunk_text)
138
  chunks.append((chunk_text, sentiment, score))
139
-
140
- # Get objection responses and check similarity score
141
- query_embedding = model.encode([chunk_text])
142
- distances, indices = objection_handler.index.search(query_embedding, 1)
143
-
144
- # If similarity is high enough, show objection response
145
- if distances[0][0] < 1.5: # Threshold for similarity
146
- responses = objection_handler.handle_objection(chunk_text)
147
- if responses:
148
- print("\nSuggested Response:")
149
- for response in responses:
150
- print(f"→ {response}")
151
-
152
- # Get product recommendations and check similarity score
153
- distances, indices = product_recommender.index.search(query_embedding, 1)
154
-
155
- # If similarity is high enough, show recommendations
156
- if distances[0][0] < 1.5: # Threshold for similarity
157
- recommendations = product_recommender.get_recommendations(chunk_text)
158
- if recommendations:
159
- print(f"\nRecommendations for this response:")
160
- for idx, rec in enumerate(recommendations, 1):
161
- print(f"{idx}. {rec}")
162
-
163
- print("\n")
164
  current_chunk = []
165
- chunk_start_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  except KeyboardInterrupt:
168
  print("\nExiting...")
169
- stream.stop_stream()
170
-
171
- return chunks
172
 
173
  if __name__ == "__main__":
174
  objections_file_path = r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet3.csv"
175
  objections_dict = load_objections(objections_file_path)
176
  transcribed_chunks = transcribe_with_chunks(objections_dict)
177
- print("Final transcriptions and sentiments:", transcribed_chunks)
 
1
  import os
2
  import json
3
  import time
4
+ from speech_recognition import Recognizer, Microphone, AudioData, UnknownValueError, RequestError
 
5
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
6
  from huggingface_hub import login
7
  from product_recommender import ProductRecommender
 
27
  tokenizer = AutoTokenizer.from_pretrained(model_name)
28
  sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
29
 
30
+ # Speech Recognition Setup
31
+ recognizer = Recognizer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # Function to analyze sentiment
 
 
 
 
 
 
34
  def preprocess_text(text):
35
  """Preprocess text for better sentiment analysis."""
36
  return text.strip().lower()
 
78
  model = SentenceTransformer('all-MiniLM-L6-v2')
79
 
80
  try:
81
+ with Microphone() as source:
82
+ recognizer.adjust_for_ambient_noise(source)
83
+ print("Microphone calibrated. Please speak.")
84
+
85
+ while True:
86
+ print("Listening for speech...")
87
+ try:
88
+ audio_data = recognizer.listen(source, timeout=5)
89
+ text = recognizer.recognize_google(audio_data)
90
+
91
+ if "start listening" in text.lower():
92
+ is_listening = True
93
+ print("Listening started. Speak into the microphone.")
94
+ continue
95
+ elif "stop listening" in text.lower():
96
+ is_listening = False
97
+ print("Listening stopped.")
 
 
 
 
 
 
 
 
 
98
  if current_chunk:
99
  chunk_text = " ".join(current_chunk)
 
 
100
  sentiment, score = analyze_sentiment(chunk_text)
101
  chunks.append((chunk_text, sentiment, score))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  current_chunk = []
103
+ continue
104
+
105
+ if is_listening and text.strip():
106
+ print(f"Transcription: {text}")
107
+ current_chunk.append(text)
108
+
109
+ if time.time() - chunk_start_time > 3:
110
+ if current_chunk:
111
+ chunk_text = " ".join(current_chunk)
112
+
113
+ # Always process sentiment
114
+ sentiment, score = analyze_sentiment(chunk_text)
115
+ chunks.append((chunk_text, sentiment, score))
116
+
117
+ # Get objection responses and check similarity score
118
+ query_embedding = model.encode([chunk_text])
119
+ distances, indices = objection_handler.index.search(query_embedding, 1)
120
+
121
+ # If similarity is high enough, show objection response
122
+ if distances[0][0] < 1.5: # Threshold for similarity
123
+ responses = objection_handler.handle_objection(chunk_text)
124
+ if responses:
125
+ print("\nSuggested Response:")
126
+ for response in responses:
127
+ print(f"→ {response}")
128
+
129
+ # Get product recommendations and check similarity score
130
+ distances, indices = product_recommender.index.search(query_embedding, 1)
131
+
132
+ # If similarity is high enough, show recommendations
133
+ if distances[0][0] < 1.5: # Threshold for similarity
134
+ recommendations = product_recommender.get_recommendations(chunk_text)
135
+ if recommendations:
136
+ print(f"\nRecommendations for this response:")
137
+ for idx, rec in enumerate(recommendations, 1):
138
+ print(f"{idx}. {rec}")
139
+
140
+ print("\n")
141
+ current_chunk = []
142
+ chunk_start_time = time.time()
143
+ except UnknownValueError:
144
+ print("Could not understand the audio.")
145
+ except RequestError as e:
146
+ print(f"Could not request results from Google Speech Recognition service; {e}")
147
 
148
  except KeyboardInterrupt:
149
  print("\nExiting...")
150
+ return chunks
 
 
151
 
152
  if __name__ == "__main__":
153
  objections_file_path = r"C:\Users\shaik\Downloads\Sales Calls Transcriptions - Sheet3.csv"
154
  objections_dict = load_objections(objections_file_path)
155
  transcribed_chunks = transcribe_with_chunks(objections_dict)
156
+ print("Final transcriptions and sentiments:", transcribed_chunks)