Update README.md
Browse files
README.md
CHANGED
@@ -152,50 +152,6 @@ print(result)
|
|
152 |
+ punctuator=False,
|
153 |
```
|
154 |
|
155 |
-
### Transcription with Prompt
|
156 |
-
Kotoba-whisper can generate transcription with prompting as below:
|
157 |
-
|
158 |
-
```python
|
159 |
-
import re
|
160 |
-
import torch
|
161 |
-
from transformers import pipeline
|
162 |
-
from datasets import load_dataset
|
163 |
-
|
164 |
-
# config
|
165 |
-
model_id = "kotoba-tech/kotoba-whisper-v2.1"
|
166 |
-
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
167 |
-
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
168 |
-
model_kwargs = {"attn_implementation": "sdpa"} if torch.cuda.is_available() else {}
|
169 |
-
generate_kwargs = {"language": "japanese", "task": "transcribe"}
|
170 |
-
|
171 |
-
# load model
|
172 |
-
pipe = pipeline(
|
173 |
-
model=model_id,
|
174 |
-
torch_dtype=torch_dtype,
|
175 |
-
device=device,
|
176 |
-
model_kwargs=model_kwargs,
|
177 |
-
chunk_length_s=15,
|
178 |
-
batch_size=16,
|
179 |
-
trust_remote_code=True
|
180 |
-
)
|
181 |
-
|
182 |
-
# load sample audio
|
183 |
-
dataset = load_dataset("japanese-asr/ja_asr.reazonspeech_test", split="test")
|
184 |
-
|
185 |
-
# --- Without prompt ---
|
186 |
-
text = pipe(dataset[10]["audio"], generate_kwargs=generate_kwargs)['text']
|
187 |
-
print(text)
|
188 |
-
# 81ζ³γεεΌ·γθ΅°γγ«ε€γγ£γ¦γγΎγγ
|
189 |
-
|
190 |
-
# --- With prompt ---: Let's change `81` to `91`.
|
191 |
-
prompt = "91ζ³"
|
192 |
-
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors="pt").to(device)
|
193 |
-
text = pipe(dataset[10]["audio"], generate_kwargs=generate_kwargs)['text']
|
194 |
-
# currently the pipeline for ASR appends the prompt at the beginning of the transcription, so remove it
|
195 |
-
text = re.sub(rf"\A\s*{prompt}\s*", "", text)
|
196 |
-
print(text)
|
197 |
-
# γγ£γΆγ£γγ§γγΉγ«γ¬γγγ91ζ³γεεΌ·γθ΅°γγ«ε€γγ£γ¦γγΎγγ
|
198 |
-
```
|
199 |
|
200 |
### Flash Attention 2
|
201 |
We recommend using [Flash-Attention 2](https://huggingface.co/docs/transformers/main/en/perf_infer_gpu_one#flashattention-2)
|
|
|
152 |
+ punctuator=False,
|
153 |
```
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
### Flash Attention 2
|
157 |
We recommend using [Flash-Attention 2](https://huggingface.co/docs/transformers/main/en/perf_infer_gpu_one#flashattention-2)
|