Gijs Wijngaard
commited on
Commit
·
5ee12ec
1
Parent(s):
fbe7912
Retry
Browse files
app.py
CHANGED
@@ -105,22 +105,29 @@ def process_audio(audio_file):
|
|
105 |
|
106 |
# Decode the output
|
107 |
generated_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=False)
|
108 |
-
assistant_text = generated_text.split("
|
109 |
|
110 |
-
# Extract
|
111 |
-
|
|
|
|
|
112 |
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
# Create Gradio interface
|
116 |
demo = gr.Interface(
|
117 |
fn=process_audio,
|
118 |
inputs=gr.Audio(type="filepath", label="Upload Audio"),
|
119 |
-
outputs=
|
120 |
-
gr.Textbox(label="Thinking Process", lines=10),
|
121 |
-
gr.Textbox(label="Semantic Elements", lines=5),
|
122 |
-
gr.Textbox(label="Answer", lines=5)
|
123 |
-
],
|
124 |
title="Qwen2Audio Audio Description Demo",
|
125 |
description="Upload an audio file and the model will provide detailed analysis and description.",
|
126 |
examples=[], # Add example files here if available
|
|
|
105 |
|
106 |
# Decode the output
|
107 |
generated_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=False)
|
108 |
+
assistant_text = generated_text.split("\nassistant\n")[1]
|
109 |
|
110 |
+
# Extract sections from the response
|
111 |
+
# Add newlines before XML tags if they exist
|
112 |
+
if "<think>" in assistant_text:
|
113 |
+
assistant_text = assistant_text.replace("<think>", "\n<think>")
|
114 |
|
115 |
+
if "<semantic_elements>" in assistant_text:
|
116 |
+
assistant_text = assistant_text.replace("<semantic_elements>", "\n<semantic_elements>")
|
117 |
+
|
118 |
+
if "<answer>" in assistant_text:
|
119 |
+
assistant_text = assistant_text.replace("<answer>", "\n<answer>")
|
120 |
+
|
121 |
+
|
122 |
+
# Combine all components into a single output
|
123 |
+
|
124 |
+
return assistant_text
|
125 |
|
126 |
# Create Gradio interface
|
127 |
demo = gr.Interface(
|
128 |
fn=process_audio,
|
129 |
inputs=gr.Audio(type="filepath", label="Upload Audio"),
|
130 |
+
outputs=gr.Textbox(label="Analysis Result", lines=20),
|
|
|
|
|
|
|
|
|
131 |
title="Qwen2Audio Audio Description Demo",
|
132 |
description="Upload an audio file and the model will provide detailed analysis and description.",
|
133 |
examples=[], # Add example files here if available
|