Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pytube as pt | |
import os | |
import subprocess | |
import re | |
import whisper | |
from langchain.document_loaders import YoutubeLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
URL = 'URL' | |
TEXT = 'TEXT' | |
WHISPER = 'WHISPER' | |
PROCESSING = 'PROCESSING' | |
STATES = [URL, TEXT, WHISPER, PROCESSING] | |
AUDIO_FILE = "audio.mp3" | |
AUDIO_EXISTS = "AUDIO_EXISTS" | |
model = '' | |
st.title('Youtube Audio+Text') | |
def init_state(): | |
if URL not in st.session_state: | |
st.session_state[URL] = '' | |
if TEXT not in st.session_state: | |
st.session_state[TEXT] = '' | |
if WHISPER not in st.session_state: | |
st.session_state[WHISPER] = '' | |
if AUDIO_EXISTS not in st.session_state: | |
st.session_state[AUDIO_EXISTS] = False | |
# if not st.session_state[URL]: | |
# clear_old_files() | |
def clear_old_files(): | |
for file in os.listdir(): | |
if file.endswith(".mp3") or file == 'transcript.txt': | |
os.remove(file) | |
print(f"Removed old files::{file}") | |
def extract_youtube_video_id(url): | |
regex = r"v=([^&]+)" | |
match = re.search(regex, url) | |
if match: | |
return match.group(1) | |
else: | |
return None | |
def load_whisper(): | |
model = whisper.load_model("small") | |
print('Loaded Whisper Medium model') | |
if st.session_state[AUDIO_EXISTS]: | |
print('Transcribing with Whisper model') | |
result = model.transcribe("audio.mp3") | |
st.session_state[WHISPER] = result["text"] | |
write_file(result["text"], "transcript.txt") | |
def load_audio(): | |
if os.path.exists(AUDIO_FILE): | |
st.session_state[AUDIO_EXISTS] = True | |
audio_file = open(AUDIO_FILE, 'rb') | |
audio_bytes = audio_file.read() | |
print(f"Audio file exists...{len(audio_bytes)}") | |
st.audio(audio_bytes, format="audio/mp3") | |
elif st.session_state[AUDIO_EXISTS]: | |
st.session_state[AUDIO_EXISTS] = False | |
def display(): | |
container = st.container() | |
text_container = st.container() | |
load_audio() | |
#Download Button section | |
col1, col2 = st.columns(2) | |
with col1: | |
if st.session_state[AUDIO_EXISTS]: | |
st.download_button("Download Audio","file","audio.mp3","application/octet-stream") | |
with col2: | |
if os.path.exists("transcript.txt"): | |
st.download_button("Download Transcript",st.session_state[TEXT],"transcript.txt","text/plain") | |
with container: | |
with st.form(key='input_form'): | |
user_input = st.text_input("Youtube URL:", placeholder="http://www.youtube.com", key=URL) | |
input_submit_button = st.form_submit_button(label='Send') | |
if input_submit_button and user_input: | |
download() | |
load_whisper() | |
with text_container: | |
st.text_area(label="Youtube Transcript:", | |
height=200, | |
value=st.session_state[WHISPER]) | |
def download(): | |
id = extract_youtube_video_id(st.session_state[URL]) | |
command = [f"yt-dlp --no-config -v --extract-audio --audio-format mp3 {st.session_state[URL]} -o audio.mp3"] | |
print(command) | |
out = subprocess.run(command, shell=True) | |
def write_file(text, filename): | |
with open(filename, "w") as f: | |
f.write(text) | |
def main(): | |
init_state() | |
display() | |
if __name__ == "__main__": | |
main() |