File size: 2,666 Bytes
04c40a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import streamlit as st
from stable_whisper import load_model
from stable_whisper import load_hf_whisper
from pydub import AudioSegment
import webvtt
import pysrt
import requests
import os

# Variables
#valid_api_token = st.secrets["API_TOKEN"]

st.title("Speech-to-Text")

with st.expander("README"):
  st.write("This little tool accepts and audiofile. After choosing the model a WebVTT file will be generated. The content of the WebVTT will be shown and a user can choose to download it. This can be used as Subtitle file e.g. in Davinci Resolve Import Subtitles" )

# Upload audio file
uploaded_file = st.file_uploader("Upload Audio File", type=["mp3", "wav", "mov"])

# Free tier or API token option
use_free_tier = st.checkbox("Free Tier (Max 2 minutes)")
api_token = st.text_input("API Token (Unlimited)")

# Should we translate to english? 
translate = st.checkbox("Would you like a translation to english?")

# Model selection
model_size = st.selectbox("Model Size", ("tiny", "base", "small", "medium"))

def transcribe_to_subtitle(audio_bytes, model_name):
  """Transcribe audio to subtitle using OpenAI Whisper"""
  # Load model based on selection
  model = load_model(model_name)
  #speedmodel = load_hf_whisper(model_name)
  
  # Check how long the audio is free tier
  # newAudio = AudioSegment.from_wav("audiofiles/download.wav")
  #if use_free_tier and len(audio_bytes) > 0.048 * 2 * 60 * 1024:
  #    st.error(len(audio_bytes))
  #    st.error("Free tier only supports audio files under 2 minutes")
  #    return
  
  #  Transcribe audio
  try:
    if translate: 
      result = model.transcribe(audio_bytes, verbose=True, task = 'translate')
      result.to_srt_vtt('audio.srt')     
    else:
      result = model.transcribe(audio_bytes, verbose=True)
      result.to_srt_vtt('audio.srt')
  except Exception as e:
     return {"error": f"Error during transcription: {str(e)}"}
    
  captions = pysrt.open("audio.srt")
  for caption in captions:
     print(caption.start)
     print(caption.text)
     print(caption.end)
     print()
    
  output = captions.text
  st.markdown(output, unsafe_allow_html=True)
 
  # Download option
  st.success("Transcription successful! Download subtitle file?")
  with open("audio.srt", "rb") as f:
      st.download_button("Download Subtitle in WebVtt Format", f, "audio.srt")
  os.remove("audio.srt")  # Remove temporary file

if uploaded_file is not None:
  audio_bytes = uploaded_file.read()
  # Check for API token if free tier is not selected
  if not use_free_tier and not api_token:
      st.error("API token required for non-free tier usage")
  else:
      transcribe_to_subtitle(audio_bytes, model_size)