Gijs Wijngaard
commited on
Commit
·
39d1328
1
Parent(s):
044a34e
Init
Browse files- .gitattributes +1 -0
- README.md +0 -12
- app.py +118 -0
- model/README.md +3 -0
- model/adapter_config.json +3 -0
- model/adapter_model.safetensors +3 -0
- model/added_tokens.json +3 -0
- model/merges.txt +3 -0
- model/optimizer.pt +3 -0
- model/rng_state.pth +3 -0
- model/scheduler.pt +3 -0
- model/special_tokens_map.json +3 -0
- model/tokenizer.json +3 -0
- model/tokenizer_config.json +3 -0
- model/trainer_state.json +3 -0
- model/training_args.bin +3 -0
- model/vocab.json +3 -0
- requirements.txt +5 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: SemThink
|
3 |
-
emoji: 🐨
|
4 |
-
colorFrom: red
|
5 |
-
colorTo: gray
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 5.20.1
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import gradio as gr
|
4 |
+
import torch
|
5 |
+
from transformers import AutoProcessor
|
6 |
+
from qwen import Qwen2AudioForConditionalGeneration
|
7 |
+
from peft import PeftModel, PeftConfig
|
8 |
+
|
9 |
+
# Model path and configuration
|
10 |
+
model_path = "./model"
|
11 |
+
base_model_id = "Qwen/Qwen2-Audio-7B-Instruct"
|
12 |
+
|
13 |
+
# Load the model and processor
|
14 |
+
def load_model():
|
15 |
+
# Load the processor from the base model
|
16 |
+
processor = AutoProcessor.from_pretrained(
|
17 |
+
base_model_id,
|
18 |
+
trust_remote_code=True,
|
19 |
+
)
|
20 |
+
|
21 |
+
# Load the base model
|
22 |
+
base_model = Qwen2AudioForConditionalGeneration.from_pretrained(
|
23 |
+
base_model_id,
|
24 |
+
torch_dtype=torch.bfloat16,
|
25 |
+
trust_remote_code=True,
|
26 |
+
device_map="auto",
|
27 |
+
)
|
28 |
+
|
29 |
+
# Load the LoRA adapter
|
30 |
+
model = PeftModel.from_pretrained(base_model, model_path)
|
31 |
+
|
32 |
+
return model, processor
|
33 |
+
|
34 |
+
# Initialize model and processor
|
35 |
+
model, processor = load_model()
|
36 |
+
|
37 |
+
# Function to extract components from model output
|
38 |
+
def extract_components(text):
|
39 |
+
thinking = ""
|
40 |
+
semantic = ""
|
41 |
+
answer = ""
|
42 |
+
|
43 |
+
# Extract thinking
|
44 |
+
think_match = re.search(r"<think>(.*?)</think>", text, re.DOTALL)
|
45 |
+
if think_match:
|
46 |
+
thinking = think_match.group(1).strip()
|
47 |
+
|
48 |
+
# Extract semantic elements
|
49 |
+
semantic_match = re.search(r"<semantic_elements>(.*?)</semantic_elements>", text, re.DOTALL)
|
50 |
+
if semantic_match:
|
51 |
+
semantic = semantic_match.group(1).strip()
|
52 |
+
|
53 |
+
# Extract answer
|
54 |
+
answer_match = re.search(r"<answer>(.*?)</answer>", text, re.DOTALL)
|
55 |
+
if answer_match:
|
56 |
+
answer = answer_match.group(1).strip()
|
57 |
+
|
58 |
+
return thinking, semantic, answer
|
59 |
+
|
60 |
+
# Function to process audio and return components
|
61 |
+
def process_audio(audio_file):
|
62 |
+
# Load and process the audio
|
63 |
+
sampling_rate = processor.feature_extractor.sampling_rate
|
64 |
+
|
65 |
+
# Create conversation format
|
66 |
+
conversation = [
|
67 |
+
{"role": "user", "content": [
|
68 |
+
{"type": "audio", "audio": audio_file},
|
69 |
+
{"type": "text", "text": "Describe the audio in detail."}
|
70 |
+
]}
|
71 |
+
]
|
72 |
+
|
73 |
+
# Format the chat
|
74 |
+
chat_text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
|
75 |
+
|
76 |
+
# Process the inputs
|
77 |
+
inputs = processor(
|
78 |
+
text=chat_text,
|
79 |
+
audios=[audio_file],
|
80 |
+
return_tensors="pt",
|
81 |
+
sampling_rate=sampling_rate,
|
82 |
+
).to(model.device)
|
83 |
+
|
84 |
+
# Generate the output
|
85 |
+
with torch.no_grad():
|
86 |
+
outputs = model.generate(
|
87 |
+
**inputs,
|
88 |
+
max_new_tokens=768,
|
89 |
+
do_sample=False,
|
90 |
+
)
|
91 |
+
|
92 |
+
# Decode the output
|
93 |
+
generated_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=False)
|
94 |
+
assistant_text = generated_text.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0].strip()
|
95 |
+
|
96 |
+
# Extract components
|
97 |
+
thinking, semantic, answer = extract_components(assistant_text)
|
98 |
+
|
99 |
+
return thinking, semantic, answer
|
100 |
+
|
101 |
+
# Create Gradio interface
|
102 |
+
demo = gr.Interface(
|
103 |
+
fn=process_audio,
|
104 |
+
inputs=gr.Audio(type="filepath", label="Upload Audio"),
|
105 |
+
outputs=[
|
106 |
+
gr.Textbox(label="Thinking Process", lines=10),
|
107 |
+
gr.Textbox(label="Semantic Elements", lines=5),
|
108 |
+
gr.Textbox(label="Answer", lines=5)
|
109 |
+
],
|
110 |
+
title="Qwen2Audio Audio Description Demo",
|
111 |
+
description="Upload an audio file and the model will provide detailed analysis and description.",
|
112 |
+
examples=[], # Add example files here if available
|
113 |
+
cache_examples=False,
|
114 |
+
)
|
115 |
+
|
116 |
+
# Launch the app
|
117 |
+
if __name__ == "__main__":
|
118 |
+
demo.launch()
|
model/README.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a91b1820ee38f2fe4be96b8431300dc9296ec83df43d36f32551cb1bd496b6ac
|
3 |
+
size 5102
|
model/adapter_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b69cf0e03533c61b50cedb46e542755522c3b648edcfa13797dccb3d09e597b
|
3 |
+
size 738
|
model/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c55095f34cd9d4450569b93545bd40482a7a033173f2241d2a52a6b69a741d73
|
3 |
+
size 22056664
|
model/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e3a16457638c3955f95f98446d42eab5096a074daba4dec5d569e2177568a2b
|
3 |
+
size 77138
|
model/merges.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5
|
3 |
+
size 1671853
|
model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59927c3c5eaecef430745b66190edac4504648dac188a78b34a3e2aebfc37784
|
3 |
+
size 44254970
|
model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:000068d3df893f9adfe39892ef754472eebe0945e015804556ea73b1840be65c
|
3 |
+
size 14244
|
model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ae2a6ae5ac67c587ae44c5692203cf77c0ed6a71b7a4a293d9ec49164b5d659
|
3 |
+
size 1064
|
model/special_tokens_map.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1ed3a229905e152acdb6943f501075b5957bd5774c5940edb81ec1b55e86389
|
3 |
+
size 57715
|
model/tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fecdb47d281073055efd605d080013e3114ed0f3c5d8af201e245b199864c9c7
|
3 |
+
size 12030779
|
model/tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ed216fb2f9e3f05ef5d667a2f644a2f91034b500e5224c003f1437247ad8e46
|
3 |
+
size 638366
|
model/trainer_state.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:917b9ca9c249f65eea8e7970856b3e03f590ee032a26a503cf78d58ac21124e6
|
3 |
+
size 125231
|
model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eaeb2c00be2b954438fd006b4c6ee5c73d08a402950561ead283cc8968c44a55
|
3 |
+
size 5944
|
model/vocab.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
|
3 |
+
size 2776833
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
torch
|
3 |
+
transformers
|
4 |
+
qwen
|
5 |
+
peft
|