Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -18,9 +18,7 @@ except ImportError:
|
|
18 |
import yt_dlp # Added import for yt-dlp
|
19 |
|
20 |
MODEL_NAME = "NbAiLab/nb-whisper-large"
|
21 |
-
|
22 |
-
|
23 |
-
max_audio_length= 30 * 60
|
24 |
|
25 |
share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
|
26 |
auth_token = os.environ.get("AUTH_TOKEN") or True
|
@@ -28,7 +26,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
28 |
print(f"Bruker enhet: {device}")
|
29 |
|
30 |
@spaces.GPU(duration=60 * 2)
|
31 |
-
def pipe(file, return_timestamps=False,lang="no"):
|
32 |
asr = pipeline(
|
33 |
task="automatic-speech-recognition",
|
34 |
model=MODEL_NAME,
|
@@ -46,24 +44,20 @@ def pipe(file, return_timestamps=False,lang="no"):
|
|
46 |
return asr(file, return_timestamps=return_timestamps, batch_size=24, generate_kwargs={'task': 'transcribe', 'language': lang})
|
47 |
|
48 |
def format_output(text):
|
49 |
-
# Add a line break after ".", "!", ":", or "?" unless part of sequences like "..."
|
50 |
-
#text = re.sub(r'(?<!\.)[.!:?](?!\.)', lambda m: m.group() + '<br>', text)
|
51 |
-
# Ensure line break after sequences like "..." or other punctuation patterns
|
52 |
text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
|
53 |
return text
|
54 |
|
55 |
-
def transcribe(file, return_timestamps=False,lang_nn=False):
|
56 |
|
57 |
waveform, sample_rate = torchaudio.load(file)
|
58 |
audio_duration = waveform.size(1) / sample_rate
|
59 |
-
warning_message=None
|
60 |
|
61 |
if audio_duration > max_audio_length:
|
62 |
warning_message = (
|
63 |
"<b style='color:red;'>⚠️ Advarsel:</b> "
|
64 |
"Lydfilen er lengre enn 30 minutter. Kun de første 30 minuttene vil bli transkribert."
|
65 |
)
|
66 |
-
# Trim the waveform to the first 30 minutes
|
67 |
waveform = waveform[:, :int(max_audio_length * sample_rate)]
|
68 |
truncated_file = "truncated_audio.wav"
|
69 |
torchaudio.save(truncated_file, waveform, sample_rate)
|
@@ -73,7 +67,6 @@ def transcribe(file, return_timestamps=False,lang_nn=False):
|
|
73 |
file_to_transcribe = file
|
74 |
truncated = False
|
75 |
|
76 |
-
|
77 |
if not lang_nn:
|
78 |
if not return_timestamps:
|
79 |
text = pipe(file_to_transcribe)["text"]
|
@@ -89,10 +82,10 @@ def transcribe(file, return_timestamps=False,lang_nn=False):
|
|
89 |
formatted_text = "<br>".join(text)
|
90 |
else:
|
91 |
if not return_timestamps:
|
92 |
-
text = pipe(file_to_transcribe,lang="nn")["text"]
|
93 |
formatted_text = format_output(text)
|
94 |
else:
|
95 |
-
chunks = pipe(file_to_transcribe, return_timestamps=True,lang="nn")["chunks"]
|
96 |
text = []
|
97 |
for chunk in chunks:
|
98 |
start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
|
@@ -175,35 +168,15 @@ with demo:
|
|
175 |
outputs=[
|
176 |
gr.HTML(label="Varsel"),
|
177 |
gr.HTML(label="text"),
|
178 |
-
gr.File(label="Last ned transkripsjon")
|
179 |
],
|
180 |
-
#outputs="text",
|
181 |
|
182 |
description=(
|
183 |
"Demoen bruker"
|
184 |
f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) til å transkribere lydfiler opp til 30 minutter."
|
185 |
),
|
186 |
allow_flagging="never",
|
187 |
-
#show_submit_button=False,
|
188 |
)
|
189 |
|
190 |
-
# Uncomment to add the YouTube transcription interface if needed
|
191 |
-
# yt_transcribe_interface = gr.Interface(
|
192 |
-
# fn=yt_transcribe,
|
193 |
-
# inputs=[
|
194 |
-
# gr.components.Textbox(lines=1, placeholder="Lim inn URL til en YouTube-video her", label="YouTube URL"),
|
195 |
-
# gr.components.Checkbox(label="Inkluder tidsstempler"),
|
196 |
-
# ],
|
197 |
-
# examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
|
198 |
-
# outputs=["html", "text"],
|
199 |
-
# title="Whisper Demo: Transkriber YouTube",
|
200 |
-
# description=(
|
201 |
-
# "Transkriber lange YouTube-videoer med et enkelt klikk! Demoen bruker den fintunede modellen:"
|
202 |
-
# f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler av"
|
203 |
-
# " vilkårlig lengde."
|
204 |
-
# ),
|
205 |
-
# allow_flagging="never",
|
206 |
-
# )
|
207 |
-
|
208 |
# Start demoen uten faner
|
209 |
-
demo.launch(share=share, show_api=False,allowed_paths=["Logonew.png"]).queue()
|
|
|
18 |
import yt_dlp # Added import for yt-dlp
|
19 |
|
20 |
MODEL_NAME = "NbAiLab/nb-whisper-large"
|
21 |
+
max_audio_length = 30 * 60
|
|
|
|
|
22 |
|
23 |
share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
|
24 |
auth_token = os.environ.get("AUTH_TOKEN") or True
|
|
|
26 |
print(f"Bruker enhet: {device}")
|
27 |
|
28 |
@spaces.GPU(duration=60 * 2)
|
29 |
+
def pipe(file, return_timestamps=False, lang="no"):
|
30 |
asr = pipeline(
|
31 |
task="automatic-speech-recognition",
|
32 |
model=MODEL_NAME,
|
|
|
44 |
return asr(file, return_timestamps=return_timestamps, batch_size=24, generate_kwargs={'task': 'transcribe', 'language': lang})
|
45 |
|
46 |
def format_output(text):
|
|
|
|
|
|
|
47 |
text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
|
48 |
return text
|
49 |
|
50 |
+
def transcribe(file, return_timestamps=False, lang_nn=False):
|
51 |
|
52 |
waveform, sample_rate = torchaudio.load(file)
|
53 |
audio_duration = waveform.size(1) / sample_rate
|
54 |
+
warning_message = None
|
55 |
|
56 |
if audio_duration > max_audio_length:
|
57 |
warning_message = (
|
58 |
"<b style='color:red;'>⚠️ Advarsel:</b> "
|
59 |
"Lydfilen er lengre enn 30 minutter. Kun de første 30 minuttene vil bli transkribert."
|
60 |
)
|
|
|
61 |
waveform = waveform[:, :int(max_audio_length * sample_rate)]
|
62 |
truncated_file = "truncated_audio.wav"
|
63 |
torchaudio.save(truncated_file, waveform, sample_rate)
|
|
|
67 |
file_to_transcribe = file
|
68 |
truncated = False
|
69 |
|
|
|
70 |
if not lang_nn:
|
71 |
if not return_timestamps:
|
72 |
text = pipe(file_to_transcribe)["text"]
|
|
|
82 |
formatted_text = "<br>".join(text)
|
83 |
else:
|
84 |
if not return_timestamps:
|
85 |
+
text = pipe(file_to_transcribe, lang="nn")["text"]
|
86 |
formatted_text = format_output(text)
|
87 |
else:
|
88 |
+
chunks = pipe(file_to_transcribe, return_timestamps=True, lang="nn")["chunks"]
|
89 |
text = []
|
90 |
for chunk in chunks:
|
91 |
start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
|
|
|
168 |
outputs=[
|
169 |
gr.HTML(label="Varsel"),
|
170 |
gr.HTML(label="text"),
|
171 |
+
gr.File(label="Last ned transkripsjon") # Removed right side space in the box
|
172 |
],
|
|
|
173 |
|
174 |
description=(
|
175 |
"Demoen bruker"
|
176 |
f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) til å transkribere lydfiler opp til 30 minutter."
|
177 |
),
|
178 |
allow_flagging="never",
|
|
|
179 |
)
|
180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
# Start demoen uten faner
|
182 |
+
demo.launch(share=share, show_api=False, allowed_paths=["Logonew.png"]).queue()
|