Spaces:

pradeepsengarr
/

Voice_Assistant

Sleeping

App Files Files Community

pradeepsengarr commited on 5 days ago

Commit

1d02232

verified ·

1 Parent(s): 8ae45bd

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -53

app.py CHANGED Viewed

@@ -1,65 +1,73 @@
-import streamlit as st
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import whisper
-from streamlit_webrtc import webrtc_streamer, AudioProcessorBase
 import torch
-# ----------------------------- SETUP -----------------------------
-st.set_page_config(page_title="🧠 Talkative AI Bot", layout="centered")
-# ----------------------------- LOAD MODELS -----------------------------
-# Load Whisper model for speech-to-text
-@st.cache_resource
-def load_whisper():
-    try:
-        model = whisper.load_model("base")
-        return model
-    except Exception as e:
-        st.error(f"An error occurred while loading Whisper model: {e}")
-        return None
-# Load DistilGPT-2 model for generating responses
-@st.cache_resource
-def load_language_model():
     try:
-        tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
-        model = AutoModelForCausalLM.from_pretrained("distilgpt2")
-        return model, tokenizer
-    except Exception as e:
-        st.error(f"An error occurred while loading Language model: {e}")
-        return None, None
-# ----------------------------- FUNCTION TO HANDLE SPEECH -----------------------------
-class AudioProcessor(AudioProcessorBase):
-    def __init__(self):
-        self.whisper_model = load_whisper()
-    def transform(self, audio_frame):
-        # Convert audio frame to audio file and get text transcription
-        result = self.whisper_model.transcribe(audio_frame)
-        return result['text']
-# ----------------------------- FUNCTION TO GENERATE RESPONSE -----------------------------
-def generate_response(user_input):
-    model, tokenizer = load_language_model()
-    if model and tokenizer:
-        inputs = tokenizer(user_input, return_tensors="pt")
-        outputs = model.generate(inputs['input_ids'], max_length=100)
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return response
-    return "Sorry, I couldn't process that."
-# ----------------------------- STREAMLIT UI -----------------------------
-st.title("🧠 Talkative AI Bot")
-st.write("Talk to the bot using your microphone, and it will respond!")
-# Streamlit WebRTC for speech-to-text
-webrtc_streamer(key="example", audio_processor_factory=AudioProcessor)
-# Input text for chatbot
-user_input = st.text_input("Type something for the bot:")
-# Handle text input and generate response
-if user_input:
-    response = generate_response(user_input)
-    st.write(f"Bot: {response}")

+# app.py
 import torch
+import whisper
+import gradio as gr
+from gtts import gTTS
+from pydub import AudioSegment
+import tempfile
+import os
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load Whisper model
+whisper_model = whisper.load_model("base")
+# Load Qwen model
+model_name = "Qwen/Qwen2.5-1.5B"
+tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
+model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=True).to("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Model loaded on: {'GPU' if next(model.parameters()).is_cuda else 'CPU'}")
+def respond(prompt_text, audio_file):
+    transcription = None
     try:
+        if prompt_text and prompt_text.strip():
+            final_prompt = prompt_text.strip()
+        elif audio_file:
+            sound = AudioSegment.from_file(audio_file)
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpwav:
+                sound.export(tmpwav.name, format="wav")
+                transcription = whisper_model.transcribe(tmpwav.name)["text"]
+                final_prompt = transcription
+        else:
+            return "No prompt provided", "", None
+        inputs = tokenizer(final_prompt, return_tensors="pt").to(model.device)
+        outputs = model.generate(**inputs, max_new_tokens=100, do_sample=True, temperature=0.7)
+        text_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        tts = gTTS(text_response)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
+            tts.save(fp.name)
+            audio_path = fp.name
+        return transcription if transcription else "Typed input used", text_response, audio_path
+    except Exception as e:
+        return f"Error: {str(e)}", "", None
+with gr.Blocks(theme=gr.themes.Soft(), title="Chat with Vidhya") as demo:
+    gr.Markdown("""
+        # 🧠 Chat with Vidhya
+        **An AI assistant that understands your voice or typed input, and responds in speech + text.**
+        💡 Try asking about:
+        - Technology trends
+        - Motorbikes & automobiles
+        - Finance and money tips
+        - Gaming news or strategies
+    """)
+    with gr.Row():
+        txt_input = gr.Textbox(lines=2, label="Type your prompt (optional)")
+        audio_input = gr.Audio(type="filepath", label="Or speak your prompt")
+    with gr.Row():
+        transcription_output = gr.Textbox(label="Transcribed Speech")
+        text_output = gr.Textbox(label="Model's Response")
+        audio_output = gr.Audio(type="filepath", label="Spoken Response")
+    submit_btn = gr.Button("Submit")
+    submit_btn.click(fn=respond, inputs=[txt_input, audio_input], outputs=[transcription_output, text_output, audio_output])
+demo.launch()