Gijs Wijngaard commited on
Commit
5ee12ec
·
1 Parent(s): fbe7912
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -105,22 +105,29 @@ def process_audio(audio_file):
105
 
106
  # Decode the output
107
  generated_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=False)
108
- assistant_text = generated_text.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0].strip()
109
 
110
- # Extract components
111
- thinking, semantic, answer = extract_components(assistant_text)
 
 
112
 
113
- return thinking, semantic, answer
 
 
 
 
 
 
 
 
 
114
 
115
  # Create Gradio interface
116
  demo = gr.Interface(
117
  fn=process_audio,
118
  inputs=gr.Audio(type="filepath", label="Upload Audio"),
119
- outputs=[
120
- gr.Textbox(label="Thinking Process", lines=10),
121
- gr.Textbox(label="Semantic Elements", lines=5),
122
- gr.Textbox(label="Answer", lines=5)
123
- ],
124
  title="Qwen2Audio Audio Description Demo",
125
  description="Upload an audio file and the model will provide detailed analysis and description.",
126
  examples=[], # Add example files here if available
 
105
 
106
  # Decode the output
107
  generated_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=False)
108
+ assistant_text = generated_text.split("\nassistant\n")[1]
109
 
110
+ # Extract sections from the response
111
+ # Add newlines before XML tags if they exist
112
+ if "<think>" in assistant_text:
113
+ assistant_text = assistant_text.replace("<think>", "\n<think>")
114
 
115
+ if "<semantic_elements>" in assistant_text:
116
+ assistant_text = assistant_text.replace("<semantic_elements>", "\n<semantic_elements>")
117
+
118
+ if "<answer>" in assistant_text:
119
+ assistant_text = assistant_text.replace("<answer>", "\n<answer>")
120
+
121
+
122
+ # Combine all components into a single output
123
+
124
+ return assistant_text
125
 
126
  # Create Gradio interface
127
  demo = gr.Interface(
128
  fn=process_audio,
129
  inputs=gr.Audio(type="filepath", label="Upload Audio"),
130
+ outputs=gr.Textbox(label="Analysis Result", lines=20),
 
 
 
 
131
  title="Qwen2Audio Audio Description Demo",
132
  description="Upload an audio file and the model will provide detailed analysis and description.",
133
  examples=[], # Add example files here if available