Spaces:
Running
Running
File size: 5,577 Bytes
698c6c9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# app.py
from fastapi import FastAPI, Request, Form
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from main import process_query
from voice.speech_to_text import SpeechToText
import os
import asyncio
import pyaudio
import wave
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI()
# Mount static files (for CSS, JS, etc.)
app.mount("/static", StaticFiles(directory="static"), name="static")
# Set up templates
templates = Jinja2Templates(directory="templates")
# Vosk model path and audio file path
vosk_model_path = "./vosk-model-small-en-us-0.15"
audio_file_path = "voice/temp_audio.wav"
# Ensure the voice directory exists
os.makedirs("voice", exist_ok=True)
# Initialize SpeechToText
stt = SpeechToText(model_path=vosk_model_path)
# Global variables for recording state
recording = False
audio_frames = []
recording_task = None
def save_audio_to_wav(frames, sample_rate=16000):
"""Save audio frames to a WAV file."""
try:
logger.info(f"Saving audio to {audio_file_path} with {len(frames)} frames")
wf = wave.open(audio_file_path, 'wb')
wf.setnchannels(1)
wf.setsampwidth(2) # 16-bit
wf.setframerate(sample_rate)
wf.writeframes(b''.join(frames))
wf.close()
if os.path.exists(audio_file_path):
logger.info(f"WAV file saved successfully: {os.path.getsize(audio_file_path)} bytes")
else:
logger.error("WAV file was not created")
except Exception as e:
logger.error(f"Error saving WAV file: {str(e)}")
raise
async def record_audio():
"""Background task to record audio."""
global audio_frames
p = pyaudio.PyAudio()
try:
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
stream.start_stream()
logger.info("Recording started...")
while recording:
data = stream.read(1024, exception_on_overflow=False)
audio_frames.append(data)
await asyncio.sleep(0.01) # Small sleep to prevent blocking
stream.stop_stream()
stream.close()
logger.info(f"Recording stopped, captured {len(audio_frames)} frames")
except Exception as e:
logger.error(f"Error during recording: {str(e)}")
finally:
p.terminate()
@app.get("/", response_class=HTMLResponse)
async def get_index(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
@app.post("/start_recording", response_class=JSONResponse)
async def start_recording():
global recording, audio_frames, recording_task
if not recording:
recording = True
audio_frames = []
recording_task = asyncio.create_task(record_audio())
logger.info("Started recording task")
return {"status": "Recording started"}
logger.warning("Recording already in progress")
return {"status": "Already recording"}
@app.post("/stop_recording", response_class=HTMLResponse)
async def stop_recording(request: Request):
global recording, recording_task
if recording:
recording = False
if recording_task:
await recording_task # Wait for the recording task to complete
recording_task = None
# Save the audio to WAV
try:
save_audio_to_wav(audio_frames)
except Exception as e:
logger.error(f"Failed to save audio: {str(e)}")
return templates.TemplateResponse("index.html", {
"request": request,
"error": f"Failed to save audio: {str(e)}"
})
# Transcribe the saved audio
try:
text = stt.transcribe_audio(audio_file_path)
logger.info(f"Transcription result: '{text}'")
if not text:
logger.warning("Transcription returned no text")
return templates.TemplateResponse("index.html", {
"request": request,
"error": "Could not understand the audio."
})
return templates.TemplateResponse("index.html", {
"request": request,
"transcribed_text": text
})
except Exception as e:
logger.error(f"Transcription error: {str(e)}")
return templates.TemplateResponse("index.html", {
"request": request,
"error": f"Transcription error: {str(e)}"
})
logger.warning("No recording in progress")
return templates.TemplateResponse("index.html", {
"request": request,
"error": "No recording in progress."
})
@app.post("/query", response_class=HTMLResponse)
async def handle_query(request: Request, query_text: str = Form(...), use_retriever: str = Form("no")):
use_retriever = use_retriever.lower() in ["yes", "y"]
result = await process_query(vosk_model_path, query_text=query_text, use_retriever=use_retriever)
return templates.TemplateResponse("index.html", {
"request": request,
"User_Query": query_text,
"Intent": result["intent"],
"Entities": result["entities"],
"API_Response": result["base_response"],
"RAG_Response": result["retriever_response"],
"Web_Search_Response": result["web_search_response"],
"Final_Response": result["final_response"],
"Error": result["error"]
}) |