aryachakraborty commited on
Commit
e8b0736
Β·
verified Β·
1 Parent(s): 266070f

Upload 6 files

Browse files
Files changed (6) hide show
  1. Dockerfile +16 -0
  2. README.md +35 -12
  3. main.py +65 -0
  4. requirements.txt +5 -0
  5. templates/index.html +580 -0
  6. web_search_tool.py +5 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["gunicorn","-b","0.0.0.0:7860", "main:app"]
README.md CHANGED
@@ -1,12 +1,35 @@
1
- ---
2
- title: Voice Assistant SmolLM2
3
- emoji: 🏒
4
- colorFrom: pink
5
- colorTo: blue
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- short_description: This is a voice assistant powered by smolLM2-130M-IT
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ARIA – AI-Responsive Interactive Assistant
2
+
3
+ ARIA is an AI-powered voice assistant that provides intelligent, web-enhanced answers to user queries. Built using a lightweight HuggingFace model, it integrates real-time web search and responds in a professional tone.
4
+
5
+ ---
6
+
7
+ ## πŸš€ Features
8
+
9
+ - Uses `SmolLM2-135M-Instruct` for fast, efficient responses.
10
+ - Integrates web context for better accuracy using a custom search tool.
11
+ - Hosted using Flask + Gunicorn in a Hugging Face Space (Docker-based).
12
+ - Clean web UI with voice interaction (frontend via `index.html`).
13
+
14
+ ---
15
+
16
+ ## 🧱 Project Structure
17
+
18
+ β”œβ”€β”€ Dockerfile # For Hugging Face Space deployment β”œβ”€β”€ main.py # Flask application β”œβ”€β”€ requirements.txt # Python dependencies β”œβ”€β”€ web_search_tool.py # Web search context integration └── templates/ └── index.html # Web UI
19
+
20
+ ---
21
+
22
+ ## 🐳 Running Locally with Docker
23
+
24
+ ```bash
25
+ # Build the Docker image
26
+ docker build -t aria-assistant .
27
+
28
+ # Run the container
29
+ docker run -p 7860:7860 aria-assistant
30
+
31
+ πŸ€– Model Used
32
+ SmolLM2-135M-Instruct
33
+
34
+ πŸ“„ License
35
+ This project is under the MIT License.
main.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### ARIA – AI-Responsive Interactive Assistant
2
+
3
+ from flask import Flask, request, render_template
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import torch
6
+ from web_search_tool import web_search
7
+ import warnings
8
+ warnings.filterwarnings("ignore", message="Failed to load image Python extension")
9
+
10
+
11
+ app = Flask(__name__)
12
+
13
+ # Load model and tokenizer
14
+ checkpoint = "HuggingFaceTB/SmolLM2-135M-Instruct"
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
17
+ model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
18
+
19
+ @app.route("/", methods=["GET", "POST"])
20
+ def index():
21
+ response = ""
22
+ if request.method == "POST":
23
+ user_query = request.form["query"]
24
+
25
+ # Get context from the web
26
+ try:
27
+ context = web_search(user_query)
28
+ except Exception as e:
29
+ context = "No additional context could be retrieved."
30
+ print("Web search failed:", e)
31
+
32
+ # System prompt setup with context included
33
+ messages = [
34
+ {"role": "system", "content": (
35
+ "You are a voice assistant that answers in a polite and professional tone. "
36
+ "Use the following context to help answer the question:\n"
37
+ f"{context}\n"
38
+ "If the context is insufficient, still try to give the best possible answer."
39
+ )},
40
+ {"role": "user", "content": user_query}
41
+ ]
42
+
43
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False)
44
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
45
+
46
+ outputs = model.generate(
47
+ inputs,
48
+ max_new_tokens=128,
49
+ temperature=0.1,
50
+ top_p=0.9,
51
+ do_sample=False
52
+ )
53
+
54
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
55
+
56
+ # Extract only the assistant's reply
57
+ if "assistant" in response:
58
+ response = response.split("assistant")[-1].strip(": ").strip()
59
+ else:
60
+ response = "Sorry, couldn't understand your query. Can you ask again?"
61
+
62
+ return render_template("index.html", response=response)
63
+
64
+ if __name__ == "__main__":
65
+ app.run(debug=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ duckduckgo-search>7.1.1
2
+ gunicorn
3
+ transformers
4
+ flask
5
+ torch
templates/index.html ADDED
@@ -0,0 +1,580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>AI Voice Assistant</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
9
+ <style>
10
+ :root {
11
+ --primary: #00f7ff;
12
+ --secondary: #7b2cbf;
13
+ --accent: #ff00e4;
14
+ --bg-dark: #0f172a;
15
+ --bg-darker: #0b1120;
16
+ --text-light: #e2e8f0;
17
+ }
18
+
19
+ body {
20
+ background-color: var(--bg-dark);
21
+ color: var(--text-light);
22
+ transition: all 0.3s ease;
23
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
24
+ }
25
+
26
+ .glass-effect {
27
+ background: rgba(15, 23, 42, 0.7);
28
+ backdrop-filter: blur(10px);
29
+ -webkit-backdrop-filter: blur(10px);
30
+ border: 1px solid rgba(255, 255, 255, 0.1);
31
+ }
32
+
33
+ .neon-glow {
34
+ text-shadow: 0 0 8px var(--primary),
35
+ 0 0 16px var(--primary);
36
+ }
37
+
38
+ .pulse {
39
+ animation: pulse 2s infinite;
40
+ }
41
+
42
+ @keyframes pulse {
43
+ 0% {
44
+ box-shadow: 0 0 0 0 rgba(0, 247, 255, 0.7);
45
+ }
46
+ 70% {
47
+ box-shadow: 0 0 0 15px rgba(0, 247, 255, 0);
48
+ }
49
+ 100% {
50
+ box-shadow: 0 0 0 0 rgba(0, 247, 255, 0);
51
+ }
52
+ }
53
+
54
+ .wave {
55
+ position: relative;
56
+ height: 80px;
57
+ width: 80px;
58
+ display: flex;
59
+ justify-content: center;
60
+ align-items: center;
61
+ }
62
+
63
+ .wave .dot {
64
+ display: inline-block;
65
+ width: 8px;
66
+ height: 8px;
67
+ border-radius: 50%;
68
+ margin-right: 3px;
69
+ background: var(--primary);
70
+ animation: wave 1.3s linear infinite;
71
+ }
72
+
73
+ .wave .dot:nth-child(2) {
74
+ animation-delay: -1.1s;
75
+ }
76
+
77
+ .wave .dot:nth-child(3) {
78
+ animation-delay: -0.9s;
79
+ }
80
+
81
+ @keyframes wave {
82
+ 0%, 60%, 100% {
83
+ transform: initial;
84
+ }
85
+ 30% {
86
+ transform: translateY(-15px);
87
+ }
88
+ }
89
+
90
+ .voice-btn {
91
+ transition: all 0.3s ease;
92
+ box-shadow: 0 0 20px rgba(0, 247, 255, 0.3);
93
+ }
94
+
95
+ .voice-btn.active {
96
+ background: var(--primary);
97
+ color: var(--bg-darker);
98
+ box-shadow: 0 0 30px var(--primary);
99
+ }
100
+
101
+ .response-text {
102
+ border-left: 3px solid var(--primary);
103
+ animation: textAppear 0.5s ease-out;
104
+ }
105
+
106
+ @keyframes textAppear {
107
+ from {
108
+ opacity: 0;
109
+ transform: translateY(10px);
110
+ }
111
+ to {
112
+ opacity: 1;
113
+ transform: translateY(0);
114
+ }
115
+ }
116
+
117
+ .floating-orb {
118
+ position: absolute;
119
+ width: 200px;
120
+ height: 200px;
121
+ border-radius: 50%;
122
+ background: radial-gradient(circle at 30% 30%, var(--primary), transparent 60%);
123
+ filter: blur(30px);
124
+ opacity: 0.3;
125
+ z-index: -1;
126
+ animation: float 15s infinite ease-in-out;
127
+ }
128
+
129
+ @keyframes float {
130
+ 0%, 100% {
131
+ transform: translate(0, 0);
132
+ }
133
+ 25% {
134
+ transform: translate(50px, 50px);
135
+ }
136
+ 50% {
137
+ transform: translate(0, 100px);
138
+ }
139
+ 75% {
140
+ transform: translate(-50px, 50px);
141
+ }
142
+ }
143
+
144
+ .typing-cursor {
145
+ display: inline-block;
146
+ width: 8px;
147
+ height: 20px;
148
+ background: var(--primary);
149
+ animation: blink 1s infinite;
150
+ }
151
+
152
+ @keyframes blink {
153
+ 0%, 100% {
154
+ opacity: 1;
155
+ }
156
+ 50% {
157
+ opacity: 0;
158
+ }
159
+ }
160
+ </style>
161
+ </head>
162
+ <body class="overflow-hidden">
163
+ <!-- Floating orbs for background effect -->
164
+ <div class="floating-orb" style="top: 20%; left: 10%;"></div>
165
+ <div class="floating-orb" style="top: 70%; left: 80%; animation-delay: 5s;"></div>
166
+ <div class="floating-orb" style="top: 30%; left: 60%; animation-delay: 10s; width: 150px; height: 150px;"></div>
167
+
168
+ <div class="min-h-screen flex flex-col items-center justify-center p-4">
169
+ <div class="glass-effect rounded-2xl p-8 w-full max-w-3xl shadow-2xl">
170
+ <!-- Header -->
171
+ <div class="text-center mb-8">
172
+ <h1 class="text-4xl font-bold mb-2 neon-glow" style="color: var(--primary)">A.R.I.A</h1>
173
+ <p class="text-gray-400">Your futuristic voice-controlled companion, powered by smolLM2</p>
174
+ </div>
175
+
176
+ <!-- Status indicator -->
177
+ <div class="flex justify-center mb-8">
178
+ <div class="bg-gray-800 rounded-full px-4 py-2 flex items-center">
179
+ <div id="statusIndicator" class="w-3 h-3 rounded-full bg-gray-500 mr-2"></div>
180
+ <span id="statusText" class="text-sm">Ready</span>
181
+ </div>
182
+ </div>
183
+
184
+ <!-- Voice visualization -->
185
+ <div class="flex justify-center mb-8">
186
+ <div id="voiceVisualization" class="wave hidden">
187
+ <div class="dot"></div>
188
+ <div class="dot"></div>
189
+ <div class="dot"></div>
190
+ </div>
191
+ </div>
192
+
193
+ <!-- Response area -->
194
+ <div id="responseArea" class="glass-effect rounded-xl p-6 mb-8 min-h-32 {{ 'hidden' if not response }}">
195
+ <div class="flex items-start">
196
+ <div class="flex-shrink-0 h-10 w-10 rounded-full bg-cyan-900 flex items-center justify-center mr-3">
197
+ <i class="fas fa-robot text-cyan-300"></i>
198
+ </div>
199
+ <div class="flex-1">
200
+ <p class="font-semibold mb-2" style="color: var(--primary)">AI Response</p>
201
+ <div id="responseText" class="response-text pl-4">
202
+ <!-- Response will appear here -->
203
+ {% if response %}
204
+ {{ response }}
205
+ {% endif %}
206
+ </div>
207
+ </div>
208
+ </div>
209
+ </div>
210
+
211
+ <!-- Voice button -->
212
+ <div class="flex justify-center">
213
+ <button id="voiceButton" class="voice-btn w-24 h-24 rounded-full bg-gray-800 border-2 border-cyan-500 flex items-center justify-center text-3xl text-cyan-400 pulse">
214
+ <i class="fas fa-microphone"></i>
215
+ </button>
216
+ </div>
217
+
218
+ <!-- Instructions -->
219
+ <div class="mt-8 text-center text-gray-400 text-sm">
220
+ <p>Press and hold the microphone button to speak</p>
221
+ <p class="mt-1">Release to send your voice command</p>
222
+ </div>
223
+
224
+ <!-- Hidden form for Flask communication -->
225
+ <form id="queryForm" action="/" method="POST" class="hidden">
226
+ <input type="text" id="queryInput" name="query">
227
+ </form>
228
+
229
+ <!-- Fallback text input -->
230
+ <div id="textInputFallback" class="mt-6">
231
+ <div class="glass-effect rounded-xl p-4">
232
+ <input type="text" id="textCommand" placeholder="Type your command instead..."
233
+ class="w-full bg-gray-800 border border-cyan-700 rounded px-4 py-2 text-white">
234
+ <button id="sendText" class="mt-2 bg-cyan-700 hover:bg-cyan-600 text-white py-2 px-4 rounded">
235
+ Send
236
+ </button>
237
+ </div>
238
+ </div>
239
+ </div>
240
+
241
+ <!-- Footer -->
242
+ <div class="mt-8 text-center text-gray-500 text-sm">
243
+ <p>Β© 2023 AI Voice Assistant | Futuristic Interface</p>
244
+ </div>
245
+ </div>
246
+
247
+ <script>
248
+ // DOM Elements
249
+ const voiceButton = document.getElementById('voiceButton');
250
+ const responseArea = document.getElementById('responseArea');
251
+ const responseText = document.getElementById('responseText');
252
+ const statusIndicator = document.getElementById('statusIndicator');
253
+ const statusText = document.getElementById('statusText');
254
+ const voiceVisualization = document.getElementById('voiceVisualization');
255
+ const queryForm = document.getElementById('queryForm');
256
+ const queryInput = document.getElementById('queryInput');
257
+ const textInputFallback = document.getElementById('textInputFallback');
258
+ const textCommand = document.getElementById('textCommand');
259
+ const sendText = document.getElementById('sendText');
260
+
261
+ // Speech recognition setup
262
+ let recognition;
263
+ let isListening = false;
264
+ let finalTranscript = '';
265
+ let speechSynthesis = window.speechSynthesis;
266
+ let recognitionTimeout;
267
+ let recognitionRetries = 0;
268
+ const MAX_RETRIES = 3;
269
+
270
+ const checkSpeechSupport = () => {
271
+ return 'SpeechRecognition' in window || 'webkitSpeechRecognition' in window;
272
+ };
273
+
274
+ // Function to initialize speech recognition
275
+ function initSpeechRecognition() {
276
+ if (checkSpeechSupport()) {
277
+ const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
278
+ recognition = new SpeechRecognition();
279
+ recognition.continuous = false;
280
+ recognition.interimResults = true;
281
+ recognition.lang = 'en-US'; // Set language explicitly
282
+
283
+ recognition.onstart = () => {
284
+ clearTimeout(recognitionTimeout); // Clear timeout when properly started
285
+ isListening = true;
286
+ voiceButton.classList.add('active');
287
+ statusIndicator.classList.remove('bg-gray-500', 'bg-red-500');
288
+ statusIndicator.classList.add('bg-green-500');
289
+ statusText.textContent = 'Listening...';
290
+ voiceVisualization.classList.remove('hidden');
291
+ finalTranscript = '';
292
+ recognitionRetries = 0; // Reset retries counter
293
+ };
294
+
295
+ recognition.onresult = (event) => {
296
+ let interimTranscript = '';
297
+
298
+ for (let i = event.resultIndex; i < event.results.length; i++) {
299
+ const transcript = event.results[i][0].transcript;
300
+ if (event.results[i].isFinal) {
301
+ finalTranscript += transcript;
302
+ } else {
303
+ interimTranscript += transcript;
304
+ }
305
+ }
306
+
307
+ // Show interim results
308
+ if (interimTranscript || finalTranscript) {
309
+ responseArea.classList.remove('hidden');
310
+ responseText.innerHTML = `<span class="text-gray-400">${interimTranscript || finalTranscript}</span>${interimTranscript ? '<span class="typing-cursor"></span>' : ''}`;
311
+ }
312
+ };
313
+
314
+ recognition.onerror = (event) => {
315
+ console.error('Speech recognition error', event.error);
316
+ isListening = false;
317
+ voiceButton.classList.remove('active');
318
+ voiceVisualization.classList.add('hidden');
319
+ statusIndicator.classList.remove('bg-green-500');
320
+ statusIndicator.classList.add('bg-red-500');
321
+
322
+ // Special handling for permission errors
323
+ if (event.error === 'not-allowed') {
324
+ statusText.textContent = 'Microphone permission denied';
325
+ } else if (event.error === 'network') {
326
+ statusText.textContent = 'Network error. Check your connection.';
327
+ } else {
328
+ statusText.textContent = 'Error: ' + event.error;
329
+ }
330
+
331
+ setTimeout(resetStatus, 3000);
332
+ };
333
+
334
+ recognition.onend = () => {
335
+ clearTimeout(recognitionTimeout);
336
+ isListening = false;
337
+ voiceButton.classList.remove('active');
338
+ voiceVisualization.classList.add('hidden');
339
+
340
+ if (finalTranscript) {
341
+ processVoiceCommand(finalTranscript);
342
+ } else if (recognitionRetries < MAX_RETRIES) {
343
+ // Recognition ended without results, try again
344
+ recognitionRetries++;
345
+ statusText.textContent = `No speech detected, retrying (${recognitionRetries}/${MAX_RETRIES})...`;
346
+ setTimeout(() => {
347
+ try {
348
+ recognition.start();
349
+ } catch (err) {
350
+ console.error('Failed to restart recognition:', err);
351
+ resetStatus();
352
+ }
353
+ }, 1000);
354
+ } else {
355
+ statusText.textContent = 'No speech detected. Please try again.';
356
+ setTimeout(resetStatus, 2000);
357
+ }
358
+ };
359
+
360
+ // Button event handlers
361
+ voiceButton.addEventListener('mousedown', startListening);
362
+ voiceButton.addEventListener('touchstart', startListening);
363
+ voiceButton.addEventListener('mouseup', stopListening);
364
+ voiceButton.addEventListener('touchend', stopListening);
365
+ voiceButton.addEventListener('mouseleave', stopListening);
366
+
367
+ return true;
368
+ } else {
369
+ console.error('Speech recognition not supported in this browser');
370
+ voiceButton.disabled = true;
371
+ voiceButton.innerHTML = '<i class="fas fa-microphone-slash"></i>';
372
+ statusIndicator.classList.remove('bg-gray-500');
373
+ statusIndicator.classList.add('bg-red-500');
374
+ statusText.textContent = 'Voice not supported';
375
+ return false;
376
+ }
377
+ }
378
+
379
+ // Initialize speech recognition when page loads
380
+ let speechInitialized = false;
381
+ window.addEventListener('DOMContentLoaded', () => {
382
+ speechInitialized = initSpeechRecognition();
383
+
384
+ // Check for existing response from Flask and speak it
385
+ const existingResponse = responseText.innerText.trim();
386
+ if (existingResponse && !existingResponse.startsWith('You said:')) {
387
+ speakResponse(existingResponse);
388
+ }
389
+
390
+ // Try to test speech recognition without actually listening
391
+ if (speechInitialized) {
392
+ try {
393
+ // Just ping the recognition system to trigger permission requests
394
+ const testRecognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
395
+ testRecognition.continuous = false;
396
+ testRecognition.interimResults = false;
397
+ testRecognition.maxAlternatives = 1;
398
+
399
+ let testTimeout = setTimeout(() => {
400
+ try { testRecognition.stop(); } catch(e) {}
401
+ }, 1000);
402
+
403
+ testRecognition.onstart = () => {
404
+ clearTimeout(testTimeout);
405
+ setTimeout(() => {
406
+ try { testRecognition.stop(); } catch(e) {}
407
+ }, 100);
408
+ };
409
+
410
+ testRecognition.start();
411
+ } catch(e) {
412
+ console.warn('Speech recognition test failed:', e);
413
+ }
414
+ }
415
+ });
416
+
417
+ function startListening(e) {
418
+ e.preventDefault();
419
+ if (!isListening && recognition) {
420
+ try {
421
+ recognition.start();
422
+ // Set timeout in case recognition doesn't trigger onstart
423
+ recognitionTimeout = setTimeout(() => {
424
+ if (!isListening) {
425
+ console.warn("Recognition didn't start properly, retrying...");
426
+ try {
427
+ recognition.stop();
428
+ setTimeout(() => {
429
+ try {
430
+ recognition.start();
431
+ } catch(err) {
432
+ console.error('Failed to restart recognition:', err);
433
+ resetStatus();
434
+ }
435
+ }, 300);
436
+ } catch (err) {
437
+ console.error('Failed to stop non-started recognition:', err);
438
+ resetStatus();
439
+ }
440
+ }
441
+ }, 2000);
442
+ } catch (err) {
443
+ console.error('Recognition error:', err);
444
+ statusIndicator.classList.remove('bg-gray-500');
445
+ statusIndicator.classList.add('bg-red-500');
446
+ statusText.textContent = 'Error starting recognition';
447
+ setTimeout(resetStatus, 3000);
448
+ }
449
+ }
450
+ }
451
+
452
+ function stopListening(e) {
453
+ e.preventDefault();
454
+ if (recognition) {
455
+ try {
456
+ recognition.stop();
457
+ } catch (err) {
458
+ console.error('Error stopping recognition:', err);
459
+ }
460
+ }
461
+ }
462
+
463
+ function resetStatus() {
464
+ statusIndicator.classList.remove('bg-green-500', 'bg-red-500', 'bg-yellow-500', 'bg-blue-500');
465
+ statusIndicator.classList.add('bg-gray-500');
466
+ statusText.textContent = 'Ready';
467
+ }
468
+
469
+ function processVoiceCommand(command) {
470
+ // Show recognized text
471
+ responseArea.classList.remove('hidden');
472
+ responseText.innerHTML = `<span class="text-gray-400">You said: "${command}"</span>`;
473
+
474
+ // Set query in form and submit to Flask
475
+ queryInput.value = command;
476
+
477
+ // Simulate AI thinking
478
+ statusIndicator.classList.remove('bg-green-500');
479
+ statusIndicator.classList.add('bg-yellow-500');
480
+ statusText.textContent = 'Processing...';
481
+
482
+ // Submit form to Flask backend
483
+ queryForm.submit();
484
+ }
485
+
486
+ // Text input fallback handlers
487
+ sendText.addEventListener('click', () => {
488
+ const command = textCommand.value;
489
+ if (command.trim()) {
490
+ processVoiceCommand(command);
491
+ textCommand.value = '';
492
+ }
493
+ });
494
+
495
+ textCommand.addEventListener('keypress', (e) => {
496
+ if (e.key === 'Enter') {
497
+ const command = textCommand.value;
498
+ if (command.trim()) {
499
+ processVoiceCommand(command);
500
+ textCommand.value = '';
501
+ }
502
+ }
503
+ });
504
+
505
+ function speakResponse(text) {
506
+ if (speechSynthesis) {
507
+ // Cancel any ongoing speech
508
+ speechSynthesis.cancel();
509
+
510
+ const utterance = new SpeechSynthesisUtterance(text);
511
+
512
+ // Get available voices
513
+ let voices = speechSynthesis.getVoices();
514
+
515
+ // If voices array is empty, wait for the onvoiceschanged event
516
+ if (voices.length === 0) {
517
+ speechSynthesis.onvoiceschanged = () => {
518
+ voices = speechSynthesis.getVoices();
519
+ setVoiceAndSpeak();
520
+ };
521
+ } else {
522
+ setVoiceAndSpeak();
523
+ }
524
+
525
+ function setVoiceAndSpeak() {
526
+ // Find a good English voice
527
+ const preferredVoice = voices.find(voice =>
528
+ voice.name.includes('Google US English') ||
529
+ voice.name.includes('Samantha') ||
530
+ voice.name.includes('Google UK English Female') ||
531
+ voice.name.includes('en-US')
532
+ ) || voices.find(voice => voice.lang.includes('en')) || voices[0];
533
+
534
+ if (preferredVoice) {
535
+ utterance.voice = preferredVoice;
536
+ }
537
+
538
+ utterance.rate = 1.1;
539
+ utterance.pitch = 1.1;
540
+
541
+ // Start speaking
542
+ speechSynthesis.speak(utterance);
543
+
544
+ // Visual feedback
545
+ statusIndicator.classList.remove('bg-yellow-500');
546
+ statusIndicator.classList.add('bg-blue-500');
547
+ statusText.textContent = 'Speaking';
548
+
549
+ utterance.onend = () => {
550
+ resetStatus();
551
+ };
552
+ }
553
+ }
554
+ }
555
+ </script>
556
+ <script>
557
+ window.addEventListener("DOMContentLoaded", () => {
558
+ const responseText = {{ response|tojson }};
559
+
560
+
561
+ if ('speechSynthesis' in window && responseText.trim().length > 0) {
562
+ const utterance = new SpeechSynthesisUtterance(responseText);
563
+ utterance.lang = 'en-US';
564
+
565
+ // Cancel any ongoing speech
566
+ window.speechSynthesis.cancel();
567
+
568
+ // Optional: add debug listeners
569
+ utterance.onend = () => console.log("βœ… Speech finished");
570
+ utterance.onerror = (e) => console.error("❌ Speech error:", e);
571
+
572
+ window.speechSynthesis.speak(utterance);
573
+ } else {
574
+ console.log("⚠️ Speech synthesis not triggered: either not supported or empty response.");
575
+ }
576
+ });
577
+ </script>
578
+
579
+ </body>
580
+ </html>
web_search_tool.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from duckduckgo_search import DDGS
2
+
3
+ def web_search(query):
4
+ results = DDGS().text(query, max_results=1)
5
+ return results[0]['body']