Spaces:

Pontonkid
/

AI-Voice-system

Sleeping

App Files Files Community

Pontonkid commited on Feb 28

Commit

ac36bdc

verified ·

1 Parent(s): b0073f1

Create app.py

Browse files

Files changed (1) hide show

app.py +117 -0

app.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import whisper as openai_whisper
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from TTS.api import TTS
+import gradio as gr
+import torch
+import os
+# 1. Speech-to-Text (STT) Implementation
+def setup_stt():
+    model = openai_whisper.load_model("base")  # Explicit OpenAI Whisper
+    return model
+def transcribe_audio(model, audio_file):
+    result = model.transcribe(audio_file)
+    print("Transcription:", result['text'])
+    return result['text']
+# 2. Natural Language Processing (NLP) Implementation
+def setup_nlp():
+    model_name = "gpt2"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    return tokenizer, model
+def generate_response(tokenizer, model, input_text):
+    prompt = f"User: {input_text}\nAssistant:"
+    input_ids = tokenizer.encode(prompt, return_tensors="pt")
+    response = model.generate(
+        input_ids,
+        max_length=150,
+        num_return_sequences=1,
+        temperature=0.7,
+        top_p=0.9,
+        pad_token_id=tokenizer.eos_token_id,
+        no_repeat_ngram_size=2
+    )
+    return tokenizer.decode(response[0], skip_special_tokens=True)
+# 3. Text-to-Speech (TTS) Implementation
+def setup_tts():
+    tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
+    return tts
+def generate_speech(tts, text, file_path="output.wav"):
+    tts.tts_to_file(text, file_path=file_path)
+    return file_path
+# 4. Voice AI System Class
+class VoiceAISystem:
+    def __init__(self):
+        print("Initializing Voice AI System...")
+        print("Loading STT model...")
+        self.stt_model = setup_stt()
+        print("Loading NLP model...")
+        self.tokenizer, self.nlp_model = setup_nlp()
+        print("Loading TTS model...")
+        self.tts_model = setup_tts()
+        # GPU Optimization
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        print(f"Using device: {self.device}")
+        self.nlp_model = self.nlp_model.to(self.device)
+        print("System initialization complete!")
+    def process_audio(self, audio_file):
+        try:
+            os.makedirs("tmp", exist_ok=True)
+            print("Transcribing audio...")
+            text = transcribe_audio(self.stt_model, audio_file)
+            print("Generating response...")
+            with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
+                response = generate_response(self.tokenizer, self.nlp_model, text)
+            print("Converting response to speech...")
+            output_path = os.path.join("tmp", "response.wav")
+            audio_response = generate_speech(self.tts_model, response, output_path)
+            return audio_response, text, response
+        except Exception as e:
+            print(f"Error during processing: {str(e)}")
+            return None, f"Error: {str(e)}", "Error processing request"
+# 5. Gradio UI Integration
+def create_voice_ai_interface():
+    system = VoiceAISystem()
+    def chat(audio):
+        if audio is None:
+            return None, "No audio provided", "No response generated"
+        return system.process_audio(audio)
+    interface = gr.Interface(
+        fn=chat,
+        inputs=[
+            gr.Audio(
+                type="filepath",
+                label="Speak here"
+            )
+        ],
+        outputs=[
+            gr.Audio(label="AI Response"),
+            gr.Textbox(label="Transcribed Text"),
+            gr.Textbox(label="AI Response Text")
+        ],
+        title="Voice AI System",
+        description="Click to record your voice and interact with the AI"
+    )
+    return interface
+# Launch the interface
+if __name__ == "__main__":
+    iface = create_voice_ai_interface()
+    iface.launch(share=True)