sagar007 commited on
Commit
5c15933
·
verified ·
1 Parent(s): 80a4b52

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import transformers
3
+ import librosa
4
+ import torch
5
+
6
+ # Load the model pipeline
7
+ pipe = transformers.pipeline(model='sarvamai/shuka_v1', trust_remote_code=True, device=0 if torch.cuda.is_available() else -1, torch_dtype=torch.bfloat16)
8
+
9
+ def process_audio(audio_file, system_prompt, user_prompt):
10
+ # Load and preprocess the audio
11
+ audio, sr = librosa.load(audio_file.name, sr=16000)
12
+
13
+ # Prepare the conversation turns
14
+ turns = [
15
+ {'role': 'system', 'content': system_prompt},
16
+ {'role': 'user', 'content': f'<|audio|>{user_prompt}'}
17
+ ]
18
+
19
+ # Generate response
20
+ result = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
21
+
22
+ return result[0]['generated_text']
23
+
24
+ # Create the Gradio interface
25
+ iface = gr.Interface(
26
+ fn=process_audio,
27
+ inputs=[
28
+ gr.Audio(type="filepath", label="Upload Audio (Indic language)"),
29
+ gr.Textbox(label="System Prompt", value="Respond naturally and informatively."),
30
+ gr.Textbox(label="User Prompt (optional)", value="")
31
+ ],
32
+ outputs=gr.Textbox(label="Shuka v1 Response"),
33
+ title="Shuka v1 Demo: Multilingual Audio Understanding",
34
+ description="Upload an audio file in any Indic language, and Shuka v1 will process and respond to it.",
35
+ examples=[
36
+ ["path/to/hindi_sample.wav", "Respond naturally and informatively.", "What is the main topic of this audio?"],
37
+ ["path/to/tamil_sample.wav", "Translate the audio content to English.", ""],
38
+ ["path/to/bengali_sample.wav", "Summarize the key points discussed in the audio.", ""]
39
+ ]
40
+ )
41
+
42
+ # Launch the app
43
+ iface.launch()