Spaces:

gijs
/

SemThink

Running

App Files Files Community

Gijs Wijngaard commited on Mar 11

Commit

39d1328

1 Parent(s): 044a34e

Init

Browse files

Files changed (18) hide show

.gitattributes +1 -0
README.md +0 -12
app.py +118 -0
model/README.md +3 -0
model/adapter_config.json +3 -0
model/adapter_model.safetensors +3 -0
model/added_tokens.json +3 -0
model/merges.txt +3 -0
model/optimizer.pt +3 -0
model/rng_state.pth +3 -0
model/scheduler.pt +3 -0
model/special_tokens_map.json +3 -0
model/tokenizer.json +3 -0
model/tokenizer_config.json +3 -0
model/trainer_state.json +3 -0
model/training_args.bin +3 -0
model/vocab.json +3 -0
requirements.txt +5 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model/tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md DELETED Viewed

@@ -1,12 +0,0 @@
----
-title: SemThink
-emoji: 🐨
-colorFrom: red
-colorTo: gray
-sdk: gradio
-sdk_version: 5.20.1
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import os
+import re
+import gradio as gr
+import torch
+from transformers import AutoProcessor
+from qwen import Qwen2AudioForConditionalGeneration
+from peft import PeftModel, PeftConfig
+# Model path and configuration
+model_path = "./model"
+base_model_id = "Qwen/Qwen2-Audio-7B-Instruct"
+# Load the model and processor
+def load_model():
+    # Load the processor from the base model
+    processor = AutoProcessor.from_pretrained(
+        base_model_id,
+        trust_remote_code=True,
+    )
+    # Load the base model
+    base_model = Qwen2AudioForConditionalGeneration.from_pretrained(
+        base_model_id,
+        torch_dtype=torch.bfloat16,
+        trust_remote_code=True,
+        device_map="auto",
+    )
+    # Load the LoRA adapter
+    model = PeftModel.from_pretrained(base_model, model_path)
+    return model, processor
+# Initialize model and processor
+model, processor = load_model()
+# Function to extract components from model output
+def extract_components(text):
+    thinking = ""
+    semantic = ""
+    answer = ""
+    # Extract thinking
+    think_match = re.search(r"<think>(.*?)</think>", text, re.DOTALL)
+    if think_match:
+        thinking = think_match.group(1).strip()
+    # Extract semantic elements
+    semantic_match = re.search(r"<semantic_elements>(.*?)</semantic_elements>", text, re.DOTALL)
+    if semantic_match:
+        semantic = semantic_match.group(1).strip()
+    # Extract answer
+    answer_match = re.search(r"<answer>(.*?)</answer>", text, re.DOTALL)
+    if answer_match:
+        answer = answer_match.group(1).strip()
+    return thinking, semantic, answer
+# Function to process audio and return components
+def process_audio(audio_file):
+    # Load and process the audio
+    sampling_rate = processor.feature_extractor.sampling_rate
+    # Create conversation format
+    conversation = [
+        {"role": "user", "content": [
+            {"type": "audio", "audio": audio_file},
+            {"type": "text", "text": "Describe the audio in detail."}
+        ]}
+    ]
+    # Format the chat
+    chat_text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
+    # Process the inputs
+    inputs = processor(
+        text=chat_text,
+        audios=[audio_file],
+        return_tensors="pt",
+        sampling_rate=sampling_rate,
+    ).to(model.device)
+    # Generate the output
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=768,
+            do_sample=False,
+        )
+    # Decode the output
+    generated_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=False)
+    assistant_text = generated_text.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0].strip()
+    # Extract components
+    thinking, semantic, answer = extract_components(assistant_text)
+    return thinking, semantic, answer
+# Create Gradio interface
+demo = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(type="filepath", label="Upload Audio"),
+    outputs=[
+        gr.Textbox(label="Thinking Process", lines=10),
+        gr.Textbox(label="Semantic Elements", lines=5),
+        gr.Textbox(label="Answer", lines=5)
+    ],
+    title="Qwen2Audio Audio Description Demo",
+    description="Upload an audio file and the model will provide detailed analysis and description.",
+    examples=[],  # Add example files here if available
+    cache_examples=False,
+)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()

model/README.md ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a91b1820ee38f2fe4be96b8431300dc9296ec83df43d36f32551cb1bd496b6ac
+size 5102

model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b69cf0e03533c61b50cedb46e542755522c3b648edcfa13797dccb3d09e597b
+size 738

model/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c55095f34cd9d4450569b93545bd40482a7a033173f2241d2a52a6b69a741d73
+size 22056664

model/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e3a16457638c3955f95f98446d42eab5096a074daba4dec5d569e2177568a2b
+size 77138

model/merges.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5
+size 1671853

model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59927c3c5eaecef430745b66190edac4504648dac188a78b34a3e2aebfc37784
+size 44254970

model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:000068d3df893f9adfe39892ef754472eebe0945e015804556ea73b1840be65c
+size 14244

model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ae2a6ae5ac67c587ae44c5692203cf77c0ed6a71b7a4a293d9ec49164b5d659
+size 1064

model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1ed3a229905e152acdb6943f501075b5957bd5774c5940edb81ec1b55e86389
+size 57715

model/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fecdb47d281073055efd605d080013e3114ed0f3c5d8af201e245b199864c9c7
+size 12030779

model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ed216fb2f9e3f05ef5d667a2f644a2f91034b500e5224c003f1437247ad8e46
+size 638366

model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:917b9ca9c249f65eea8e7970856b3e03f590ee032a26a503cf78d58ac21124e6
+size 125231

model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eaeb2c00be2b954438fd006b4c6ee5c73d08a402950561ead283cc8968c44a55
+size 5944

model/vocab.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
+size 2776833

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+torch
+transformers
+qwen
+peft