pere commited on
Commit
d761860
·
verified ·
1 Parent(s): 143ef7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -35
app.py CHANGED
@@ -18,9 +18,7 @@ except ImportError:
18
  import yt_dlp # Added import for yt-dlp
19
 
20
  MODEL_NAME = "NbAiLab/nb-whisper-large"
21
- #lang = "no"
22
-
23
- max_audio_length= 30 * 60
24
 
25
  share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
26
  auth_token = os.environ.get("AUTH_TOKEN") or True
@@ -28,7 +26,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
28
  print(f"Bruker enhet: {device}")
29
 
30
  @spaces.GPU(duration=60 * 2)
31
- def pipe(file, return_timestamps=False,lang="no"):
32
  asr = pipeline(
33
  task="automatic-speech-recognition",
34
  model=MODEL_NAME,
@@ -46,24 +44,20 @@ def pipe(file, return_timestamps=False,lang="no"):
46
  return asr(file, return_timestamps=return_timestamps, batch_size=24, generate_kwargs={'task': 'transcribe', 'language': lang})
47
 
48
  def format_output(text):
49
- # Add a line break after ".", "!", ":", or "?" unless part of sequences like "..."
50
- #text = re.sub(r'(?<!\.)[.!:?](?!\.)', lambda m: m.group() + '<br>', text)
51
- # Ensure line break after sequences like "..." or other punctuation patterns
52
  text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
53
  return text
54
 
55
- def transcribe(file, return_timestamps=False,lang_nn=False):
56
 
57
  waveform, sample_rate = torchaudio.load(file)
58
  audio_duration = waveform.size(1) / sample_rate
59
- warning_message=None
60
 
61
  if audio_duration > max_audio_length:
62
  warning_message = (
63
  "<b style='color:red;'>⚠️ Advarsel:</b> "
64
  "Lydfilen er lengre enn 30 minutter. Kun de første 30 minuttene vil bli transkribert."
65
  )
66
- # Trim the waveform to the first 30 minutes
67
  waveform = waveform[:, :int(max_audio_length * sample_rate)]
68
  truncated_file = "truncated_audio.wav"
69
  torchaudio.save(truncated_file, waveform, sample_rate)
@@ -73,7 +67,6 @@ def transcribe(file, return_timestamps=False,lang_nn=False):
73
  file_to_transcribe = file
74
  truncated = False
75
 
76
-
77
  if not lang_nn:
78
  if not return_timestamps:
79
  text = pipe(file_to_transcribe)["text"]
@@ -89,10 +82,10 @@ def transcribe(file, return_timestamps=False,lang_nn=False):
89
  formatted_text = "<br>".join(text)
90
  else:
91
  if not return_timestamps:
92
- text = pipe(file_to_transcribe,lang="nn")["text"]
93
  formatted_text = format_output(text)
94
  else:
95
- chunks = pipe(file_to_transcribe, return_timestamps=True,lang="nn")["chunks"]
96
  text = []
97
  for chunk in chunks:
98
  start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
@@ -175,35 +168,15 @@ with demo:
175
  outputs=[
176
  gr.HTML(label="Varsel"),
177
  gr.HTML(label="text"),
178
- gr.File(label="Last ned transkripsjon")
179
  ],
180
- #outputs="text",
181
 
182
  description=(
183
  "Demoen bruker"
184
  f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) til å transkribere lydfiler opp til 30 minutter."
185
  ),
186
  allow_flagging="never",
187
- #show_submit_button=False,
188
  )
189
 
190
- # Uncomment to add the YouTube transcription interface if needed
191
- # yt_transcribe_interface = gr.Interface(
192
- # fn=yt_transcribe,
193
- # inputs=[
194
- # gr.components.Textbox(lines=1, placeholder="Lim inn URL til en YouTube-video her", label="YouTube URL"),
195
- # gr.components.Checkbox(label="Inkluder tidsstempler"),
196
- # ],
197
- # examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
198
- # outputs=["html", "text"],
199
- # title="Whisper Demo: Transkriber YouTube",
200
- # description=(
201
- # "Transkriber lange YouTube-videoer med et enkelt klikk! Demoen bruker den fintunede modellen:"
202
- # f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler av"
203
- # " vilkårlig lengde."
204
- # ),
205
- # allow_flagging="never",
206
- # )
207
-
208
  # Start demoen uten faner
209
- demo.launch(share=share, show_api=False,allowed_paths=["Logonew.png"]).queue()
 
18
  import yt_dlp # Added import for yt-dlp
19
 
20
  MODEL_NAME = "NbAiLab/nb-whisper-large"
21
+ max_audio_length = 30 * 60
 
 
22
 
23
  share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
24
  auth_token = os.environ.get("AUTH_TOKEN") or True
 
26
  print(f"Bruker enhet: {device}")
27
 
28
  @spaces.GPU(duration=60 * 2)
29
+ def pipe(file, return_timestamps=False, lang="no"):
30
  asr = pipeline(
31
  task="automatic-speech-recognition",
32
  model=MODEL_NAME,
 
44
  return asr(file, return_timestamps=return_timestamps, batch_size=24, generate_kwargs={'task': 'transcribe', 'language': lang})
45
 
46
  def format_output(text):
 
 
 
47
  text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '<br>', text)
48
  return text
49
 
50
+ def transcribe(file, return_timestamps=False, lang_nn=False):
51
 
52
  waveform, sample_rate = torchaudio.load(file)
53
  audio_duration = waveform.size(1) / sample_rate
54
+ warning_message = None
55
 
56
  if audio_duration > max_audio_length:
57
  warning_message = (
58
  "<b style='color:red;'>⚠️ Advarsel:</b> "
59
  "Lydfilen er lengre enn 30 minutter. Kun de første 30 minuttene vil bli transkribert."
60
  )
 
61
  waveform = waveform[:, :int(max_audio_length * sample_rate)]
62
  truncated_file = "truncated_audio.wav"
63
  torchaudio.save(truncated_file, waveform, sample_rate)
 
67
  file_to_transcribe = file
68
  truncated = False
69
 
 
70
  if not lang_nn:
71
  if not return_timestamps:
72
  text = pipe(file_to_transcribe)["text"]
 
82
  formatted_text = "<br>".join(text)
83
  else:
84
  if not return_timestamps:
85
+ text = pipe(file_to_transcribe, lang="nn")["text"]
86
  formatted_text = format_output(text)
87
  else:
88
+ chunks = pipe(file_to_transcribe, return_timestamps=True, lang="nn")["chunks"]
89
  text = []
90
  for chunk in chunks:
91
  start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
 
168
  outputs=[
169
  gr.HTML(label="Varsel"),
170
  gr.HTML(label="text"),
171
+ gr.File(label="Last ned transkripsjon") # Removed right side space in the box
172
  ],
 
173
 
174
  description=(
175
  "Demoen bruker"
176
  f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) til å transkribere lydfiler opp til 30 minutter."
177
  ),
178
  allow_flagging="never",
 
179
  )
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  # Start demoen uten faner
182
+ demo.launch(share=share, show_api=False, allowed_paths=["Logonew.png"]).queue()