Spaces:
Running
on
T4
Running
on
T4
File size: 8,042 Bytes
69dba80 07ebfb1 0f57ece 07ebfb1 37b5da4 e1a5899 07ebfb1 5b11f8b 5aa1892 07ebfb1 311ebef 99d9b3e 07ebfb1 446a864 87118ef 07ebfb1 725ba0c 37b5da4 07ebfb1 6351056 0f57ece 07ebfb1 1c4706b f1787c5 84d6345 983c638 1c4706b ddb3a35 983c638 35af703 b8fdc42 983c638 a051d76 43e8833 07ebfb1 0f57ece 25461d9 420784b 25461d9 420784b 0f57ece 4553dcb dc1ede9 37b5da4 d18f6de 404feeb 37b5da4 d18f6de 37b5da4 dc1ede9 37b5da4 4553dcb dc1ede9 f1787c5 dc1ede9 f1787c5 dc1ede9 6a3ae5e dc1ede9 a051d76 dc1ede9 a051d76 dc1ede9 3b6c548 37b5da4 10f081b 37b5da4 8b58bde 37b5da4 10f081b 37b5da4 5f56b9e 37b5da4 3b6c548 07ebfb1 e1a5899 f4d4476 99d9b3e b9fdb45 e1a5899 07ebfb1 f4d4476 07ebfb1 0f57ece 07ebfb1 1a323cc 569a668 07ebfb1 1a323cc 420784b 0f64a75 81a1d8a 5610313 81a1d8a 0f57ece dc1ede9 5c6b453 3b6c548 25461d9 f9285e5 0f57ece dc1ede9 0f57ece 7d283fd 99d9b3e 0f57ece 0f64a75 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
import time
import os
import re
import torch
import torchaudio
import gradio as gr
import spaces
from transformers import AutoFeatureExtractor, AutoTokenizer, WhisperForConditionalGeneration, WhisperProcessor, pipeline
from huggingface_hub import model_info
try:
import flash_attn
FLASH_ATTENTION = True
except ImportError:
FLASH_ATTENTION = False
import yt_dlp # Added import for yt-dlp
MODEL_NAME = "NbAiLab/nb-whisper-large"
#lang = "no"
max_audio_length= 30 * 60
share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
auth_token = os.environ.get("AUTH_TOKEN") or True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Bruker enhet: {device}")
@spaces.GPU(duration=60 * 2)
def pipe(file, return_timestamps=False,lang="nn"):
asr = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=28,
device=device,
token=auth_token,
torch_dtype=torch.float16,
model_kwargs={"attn_implementation": "flash_attention_2", "num_beams": 5, "language": lang} if FLASH_ATTENTION else {"attn_implementation": "sdpa", "num_beams": 5},
)
asr.model.config.forced_decoder_ids = asr.tokenizer.get_decoder_prompt_ids(
language=lang,
task="transcribe",
no_timestamps=not return_timestamps,
)
return asr(file, return_timestamps=return_timestamps, batch_size=24, generate_kwargs={'task': 'transcribe', 'language': lang})
def format_output(text):
# Add a line break after ".", "!", ":", or "?" unless part of sequences like "..."
#text = re.sub(r'(?<!\.)[.!:?](?!\.)', lambda m: m.group() + '<br>', text)
# Ensure line break after sequences like "..." or other punctuation patterns
text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
return text
def transcribe(file, return_timestamps=False,lang_nn=False):
waveform, sample_rate = torchaudio.load(file)
audio_duration = waveform.size(1) / sample_rate
if audio_duration > max_audio_length:
warning_message = (
"⚠️ Advarsel: Lydfilen din er lengre enn 30 minutter. Kun de første 30 minuttene vil bli transkribert.\n"
)
# Trim the waveform to the first 30 minutes
waveform = waveform[:, :int(max_audio_length * sample_rate)]
truncated_file = "truncated_audio.wav"
torchaudio.save(truncated_file, waveform, sample_rate)
file_to_transcribe = truncated_file
truncated = True
else:
file_to_transcribe = file
truncated = False
if not lang_nn:
if not return_timestamps:
text = pipe(file_to_transcribe)["text"]
formatted_text = format_output(text)
else:
chunks = pipe(file_to_transcribe, return_timestamps=True)["chunks"]
text = []
for chunk in chunks:
start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
line = f"[{start_time} -> {end_time}] {chunk['text']}"
text.append(line)
formatted_text = "<br>".join(text)
else:
if not return_timestamps:
text = pipe(file_to_transcribe,lang="nn")["text"]
formatted_text = format_output(text)
else:
chunks = pipe(file_to_transcribe, return_timestamps=True,lang="nn")["chunks"]
text = []
for chunk in chunks:
start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
line = f"[{start_time} -> {end_time}] {chunk['text']}"
text.append(line)
formatted_text = "<br>".join(text)
output_file = "transcription.txt"
with open(output_file, "w") as f:
f.write(re.sub('<br>', '\n', formatted_text))
if truncated:
link="https://github.com/NbAiLab/nostram/blob/main/leverandorer.md"
disclaimer = (
"\n\n Dette er en demo. Det er ikke tillatt å bruke denne teksten i profesjonell sammenheng. "
"Vi anbefaler at hvis du trenger å transkribere lengre opptak, så kjører du enten modellen lokalt "
"eller sjekker denne siden for å se hvem som leverer løsninger basert på NB-Whisper: "
"<a href='{link}' target='_blank'>denne siden</a>."
)
formatted_text += f"<br><br><i>{disclaimer}</i>"
formatted_text += "<br><br><i>Transkribert med NB-Whisper demo</i>"
return formatted_text, output_file
def _return_yt_html_embed(yt_url):
video_id = yt_url.split("?v=")[-1]
HTML_str = (
f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
" </center>"
)
return HTML_str
def yt_transcribe(yt_url, return_timestamps=False):
html_embed_str = _return_yt_html_embed(yt_url)
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'audio.%(ext)s',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'quiet': True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([yt_url])
text = transcribe("audio.mp3", return_timestamps=return_timestamps)
return html_embed_str, text
# Lag Gradio-appen uten faner
demo = gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.red))
with demo:
with gr.Column():
gr.HTML(f"<img src='file/Logonew.png' style='width:200px;'>")
with gr.Column(scale=8):
# Use Markdown for title and description
gr.Markdown(
"""
<h1 style="font-size: 3em;">NB-Whisper Demo</h1>
"""
)
mf_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
gr.components.Checkbox(label="Inkluder tidsstempler"),
gr.components.Checkbox(label="Nynorsk"),
],
outputs=[
gr.HTML(label="text"),
gr.File(label="Last ned transkripsjon")
],
#outputs="text",
description=(
"Transkriber lange lydopptak fra mikrofon eller lydfiler med et enkelt klikk! <br> Demoen bruker den fintunede"
f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler opp til 30 minutter."
),
allow_flagging="never",
#show_submit_button=False,
)
# Uncomment to add the YouTube transcription interface if needed
# yt_transcribe_interface = gr.Interface(
# fn=yt_transcribe,
# inputs=[
# gr.components.Textbox(lines=1, placeholder="Lim inn URL til en YouTube-video her", label="YouTube URL"),
# gr.components.Checkbox(label="Inkluder tidsstempler"),
# ],
# examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
# outputs=["html", "text"],
# title="Whisper Demo: Transkriber YouTube",
# description=(
# "Transkriber lange YouTube-videoer med et enkelt klikk! Demoen bruker den fintunede modellen:"
# f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler av"
# " vilkårlig lengde."
# ),
# allow_flagging="never",
# )
# Start demoen uten faner
demo.launch(share=share, show_api=False,allowed_paths=["Logonew.png"]).queue() |