Spaces:

sagar007
/

shuka_audio

Sleeping

App Files Files Community

sagar007 commited on Aug 24, 2024

Commit

e3a075e

verified ·

1 Parent(s): 6c4a628

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -19

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGenera
 from gtts import gTTS
 import gradio as gr
 import spaces
 print("Using GPU for operations when available")
@@ -51,15 +52,29 @@ def process_audio_input(audio, whisper_processor, whisper_model):
 # Generate response within a GPU-decorated function
 @spaces.GPU
 def text_to_speech(text, lang='hi'):
     try:
         # Use a better TTS engine for Indic languages
         if lang in ['hi', 'bn', 'gu', 'kn', 'ml', 'mr', 'or', 'pa', 'ta', 'te']:
-            # You might want to use a different TTS library here
-            # For example, you could use the Google Cloud Text-to-Speech API
-            # or a specialized Indic language TTS library
-            # This is a placeholder for a better Indic TTS solution
             tts = gTTS(text=text, lang=lang, tld='co.in')  # Use Indian TLD
         else:
             tts = gTTS(text=text, lang=lang)
@@ -70,7 +85,7 @@ def text_to_speech(text, lang='hi'):
         print(f"Error in text-to-speech: {str(e)}")
         return None
-# Replace the existing detect_language function with this improved version
 def detect_language(text):
     lang_codes = {
         'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
@@ -87,17 +102,6 @@ def detect_language(text):
             if any(ord(char) >= 0x0900 and ord(char) <= 0x097F for char in text):  # Devanagari script
                 return 'hi'
         return 'en'  # Default to English if no Indic script is detected
-@spaces.GPU
-def generate_response(transcription, sarvam_pipe):
-    if sarvam_pipe is None:
-        return "Error: Text generation model is not available."
-    try:
-        # Generate response using the sarvam-2b model
-        response = sarvam_pipe(transcription, max_length=100, num_return_sequences=1)[0]['generated_text']
-        return response
-    except Exception as e:
-        return f"Error generating response: {str(e)}"
 @spaces.GPU
 def indic_language_assistant(input_type, audio_input, text_input):
@@ -121,13 +125,111 @@ def indic_language_assistant(input_type, audio_input, text_input):
     except Exception as e:
         error_message = f"An error occurred: {str(e)}"
         return error_message, error_message, None
 # Create Gradio interface
 iface = gr.Interface(
     fn=indic_language_assistant,
     inputs=[
         gr.Radio(["audio", "text"], label="Input Type", value="audio"),
         gr.Audio(type="filepath", label="Speak (if audio input selected)"),
-        gr.Textbox(label="Type your message (if text input selected)")
     ],
     outputs=[
         gr.Textbox(label="Transcription/Input"),
@@ -135,7 +237,10 @@ iface = gr.Interface(
         gr.Audio(label="Audio Response")
     ],
     title="Indic Language Virtual Assistant",
-    description="Speak or type in any supported Indic language or English. The assistant will respond in text and audio."
 )
 # Launch the app

 from gtts import gTTS
 import gradio as gr
 import spaces
+from langdetect import detect
 print("Using GPU for operations when available")
 # Generate response within a GPU-decorated function
 @spaces.GPU
+def generate_response(transcription, sarvam_pipe):
+    if sarvam_pipe is None:
+        return "Error: Text generation model is not available."
+    try:
+        # Prepare the prompt
+        prompt = f"Human: {transcription}\n\nAssistant:"
+        # Generate response using the sarvam-2b model
+        response = sarvam_pipe(prompt, max_length=200, num_return_sequences=1, do_sample=True, temperature=0.7)[0]['generated_text']
+        # Extract the assistant's response
+        assistant_response = response.split("Assistant:")[-1].strip()
+        return assistant_response
+    except Exception as e:
+        return f"Error generating response: {str(e)}"
+# Text-to-speech function
 def text_to_speech(text, lang='hi'):
     try:
         # Use a better TTS engine for Indic languages
         if lang in ['hi', 'bn', 'gu', 'kn', 'ml', 'mr', 'or', 'pa', 'ta', 'te']:
             tts = gTTS(text=text, lang=lang, tld='co.in')  # Use Indian TLD
         else:
             tts = gTTS(text=text, lang=lang)
         print(f"Error in text-to-speech: {str(e)}")
         return None
+# Language detection function
 def detect_language(text):
     lang_codes = {
         'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
             if any(ord(char) >= 0x0900 and ord(char) <= 0x097F for char in text):  # Devanagari script
                 return 'hi'
         return 'en'  # Default to English if no Indic script is detected
 @spaces.GPU
 def indic_language_assistant(input_type, audio_input, text_input):
     except Exception as e:
         error_message = f"An error occurred: {str(e)}"
         return error_message, error_message, None
+# Custom CSS
+custom_css = """
+body {
+    background-color: #1a1a1a;
+    color: #ffffff;
+    font-family: Arial, sans-serif;
+}
+.container {
+    max-width: 800px;
+    margin: 0 auto;
+    padding: 20px;
+}
+h1 {
+    font-size: 2.5em;
+    background: linear-gradient(45deg, #4a90e2, #f48fb1);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    margin-bottom: 10px;
+}
+h2 {
+    color: #a0a0a0;
+    font-weight: normal;
+}
+.task-container {
+    display: flex;
+    justify-content: space-between;
+    flex-wrap: wrap;
+    margin-top: 30px;
+}
+.task-card {
+    background-color: #2a2a2a;
+    border-radius: 10px;
+    padding: 15px;
+    margin: 10px 0;
+    width: calc(50% - 10px);
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    transition: transform 0.3s ease;
+}
+.task-card:hover {
+    transform: translateY(-5px);
+}
+.task-icon {
+    font-size: 24px;
+    margin-bottom: 10px;
+}
+.input-box {
+    width: 100%;
+    padding: 10px;
+    border-radius: 20px;
+    border: none;
+    background-color: #333;
+    color: #fff;
+    margin-top: 20px;
+}
+.submit-btn {
+    background-color: #4a90e2;
+    color: white;
+    border: none;
+    padding: 10px 20px;
+    border-radius: 20px;
+    cursor: pointer;
+    margin-top: 10px;
+    transition: background-color 0.3s ease;
+}
+.submit-btn:hover {
+    background-color: #3a7bd5;
+}
+"""
+# Custom HTML
+custom_html = """
+<div class="container">
+    <h1>Hello, User</h1>
+    <h2>How can I help you today?</h2>
+    <div class="task-container">
+        <div class="task-card">
+            <div class="task-icon">🎤</div>
+            <p>Speak in any Indic language</p>
+        </div>
+        <div class="task-card">
+            <div class="task-icon">⌨️</div>
+            <p>Type in any Indic language</p>
+        </div>
+    </div>
+</div>
+"""
 # Create Gradio interface
 iface = gr.Interface(
     fn=indic_language_assistant,
     inputs=[
         gr.Radio(["audio", "text"], label="Input Type", value="audio"),
         gr.Audio(type="filepath", label="Speak (if audio input selected)"),
+        gr.Textbox(label="Type your message (if text input selected)", elem_classes="input-box")
     ],
     outputs=[
         gr.Textbox(label="Transcription/Input"),
         gr.Audio(label="Audio Response")
     ],
     title="Indic Language Virtual Assistant",
+    description="Speak or type in any supported Indic language or English. The assistant will respond in text and audio.",
+    css=custom_css,
+    elem_id="indic-assistant",
+    theme="dark"
 )
 # Launch the app