Spaces:

sagar007
/

shuka_audio

Sleeping

App Files Files Community

sagar007 commited on Aug 18, 2024

Commit

5c15933

verified ·

1 Parent(s): 80a4b52

Create app.py

Browse files

Files changed (1) hide show

app.py +43 -0

app.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import gradio as gr
+import transformers
+import librosa
+import torch
+# Load the model pipeline
+pipe = transformers.pipeline(model='sarvamai/shuka_v1', trust_remote_code=True, device=0 if torch.cuda.is_available() else -1, torch_dtype=torch.bfloat16)
+def process_audio(audio_file, system_prompt, user_prompt):
+    # Load and preprocess the audio
+    audio, sr = librosa.load(audio_file.name, sr=16000)
+    # Prepare the conversation turns
+    turns = [
+        {'role': 'system', 'content': system_prompt},
+        {'role': 'user', 'content': f'<|audio|>{user_prompt}'}
+    ]
+    # Generate response
+    result = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
+    return result[0]['generated_text']
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=process_audio,
+    inputs=[
+        gr.Audio(type="filepath", label="Upload Audio (Indic language)"),
+        gr.Textbox(label="System Prompt", value="Respond naturally and informatively."),
+        gr.Textbox(label="User Prompt (optional)", value="")
+    ],
+    outputs=gr.Textbox(label="Shuka v1 Response"),
+    title="Shuka v1 Demo: Multilingual Audio Understanding",
+    description="Upload an audio file in any Indic language, and Shuka v1 will process and respond to it.",
+    examples=[
+        ["path/to/hindi_sample.wav", "Respond naturally and informatively.", "What is the main topic of this audio?"],
+        ["path/to/tamil_sample.wav", "Translate the audio content to English.", ""],
+        ["path/to/bengali_sample.wav", "Summarize the key points discussed in the audio.", ""]
+    ]
+)
+# Launch the app
+iface.launch()