Spaces:

gijs
/

SemThink

Running

Gijs Wijngaard commited on Mar 11

Commit

5ee12ec

1 Parent(s): fbe7912

Retry

Files changed (1) hide show

app.py CHANGED Viewed

@@ -105,22 +105,29 @@ def process_audio(audio_file):
     # Decode the output
     generated_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=False)
-    assistant_text = generated_text.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0].strip()
-    # Extract components
-    thinking, semantic, answer = extract_components(assistant_text)
-    return thinking, semantic, answer
 # Create Gradio interface
 demo = gr.Interface(
     fn=process_audio,
     inputs=gr.Audio(type="filepath", label="Upload Audio"),
-    outputs=[
-        gr.Textbox(label="Thinking Process", lines=10),
-        gr.Textbox(label="Semantic Elements", lines=5),
-        gr.Textbox(label="Answer", lines=5)
-    ],
     title="Qwen2Audio Audio Description Demo",
     description="Upload an audio file and the model will provide detailed analysis and description.",
     examples=[],  # Add example files here if available

     # Decode the output
     generated_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=False)
+    assistant_text = generated_text.split("\nassistant\n")[1]
+    # Extract sections from the response
+    # Add newlines before XML tags if they exist
+    if "<think>" in assistant_text:
+        assistant_text = assistant_text.replace("<think>", "\n<think>")
+    if "<semantic_elements>" in assistant_text:
+        assistant_text = assistant_text.replace("<semantic_elements>", "\n<semantic_elements>")
+    if "<answer>" in assistant_text:
+        assistant_text = assistant_text.replace("<answer>", "\n<answer>")
+    # Combine all components into a single output
+    return assistant_text
 # Create Gradio interface
 demo = gr.Interface(
     fn=process_audio,
     inputs=gr.Audio(type="filepath", label="Upload Audio"),
+    outputs=gr.Textbox(label="Analysis Result", lines=20),
     title="Qwen2Audio Audio Description Demo",
     description="Upload an audio file and the model will provide detailed analysis and description.",
     examples=[],  # Add example files here if available