Spaces:
Sleeping
Sleeping
Ibrahim Olanigan
commited on
Commit
·
d592885
1
Parent(s):
9b538d8
Code cleanup
Browse files
app.py
CHANGED
@@ -51,16 +51,14 @@ def extract_youtube_video_id(url):
|
|
51 |
|
52 |
|
53 |
def load_whisper():
|
54 |
-
|
55 |
model = whisper.load_model("small")
|
56 |
print('Loaded Whisper Medium model')
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
write_file(result["text"], "transcript.txt")
|
63 |
-
AUDIO_FILE = "audio.mp3"
|
64 |
|
65 |
def load_audio():
|
66 |
if os.path.exists(AUDIO_FILE):
|
@@ -75,7 +73,6 @@ def load_audio():
|
|
75 |
def display():
|
76 |
container = st.container()
|
77 |
text_container = st.container()
|
78 |
-
# whisper_container = st.container()
|
79 |
load_audio()
|
80 |
|
81 |
#Download Button section
|
@@ -93,64 +90,24 @@ def display():
|
|
93 |
input_submit_button = st.form_submit_button(label='Send')
|
94 |
|
95 |
if input_submit_button and user_input:
|
96 |
-
|
97 |
-
st.write("You entered... " + st.session_state[URL])
|
98 |
-
# transcribe()
|
99 |
-
# download()
|
100 |
-
# download_audio()
|
101 |
load_whisper()
|
102 |
|
103 |
|
104 |
with text_container:
|
105 |
st.text_area(label="Youtube Transcript:",
|
106 |
height=200,
|
107 |
-
value=st.session_state[
|
108 |
-
|
109 |
-
# st.text_area(label="Whisper Transcript:",
|
110 |
-
# height=200,
|
111 |
-
# value=st.session_state[WHISPER])
|
112 |
-
|
113 |
-
|
114 |
-
def download_audio():
|
115 |
-
if st.session_state[URL]:
|
116 |
-
print("Downloading....")
|
117 |
-
yt = pt.YouTube(st.session_state[URL])
|
118 |
-
stream = yt.streams.filter(only_audio=True)[0]
|
119 |
-
stream.download(filename="audio.mp3")
|
120 |
-
print("Downloaded Audio file....")
|
121 |
-
|
122 |
def download():
|
123 |
id = extract_youtube_video_id(st.session_state[URL])
|
124 |
command = [f"yt-dlp --no-config -v --extract-audio --audio-format mp3 {st.session_state[URL]} -o audio.mp3"]
|
125 |
print(command)
|
126 |
out = subprocess.run(command, shell=True)
|
127 |
-
print('Download with YT-DLP done!!')
|
128 |
-
|
129 |
-
|
130 |
-
def transcribe():
|
131 |
-
loader = YoutubeLoader.from_youtube_url(
|
132 |
-
st.session_state[URL], add_video_info=True)
|
133 |
-
splitter = RecursiveCharacterTextSplitter(chunk_size=2000,chunk_overlap=500)
|
134 |
-
docs = loader.load_and_split(splitter)
|
135 |
-
length = len(docs)
|
136 |
-
index = int(length/3+1)
|
137 |
-
print(f"Loaded {length} documents, Displaying {index}-th document")
|
138 |
-
# st.session_state[TEXT] = docs[index].page_content
|
139 |
-
st.session_state[TEXT] = write_chunks(docs,"transcript.txt")
|
140 |
-
|
141 |
-
|
142 |
-
def write_chunks(docs, filename):
|
143 |
-
full_doc = ''
|
144 |
-
for doc in docs:
|
145 |
-
full_doc = full_doc + doc.page_content + "\n"
|
146 |
-
with open(filename, "w") as f:
|
147 |
-
f.write(full_doc)
|
148 |
-
return full_doc
|
149 |
|
150 |
def write_file(text, filename):
|
151 |
with open(filename, "w") as f:
|
152 |
f.write(text)
|
153 |
-
# return full_doc
|
154 |
|
155 |
def main():
|
156 |
init_state()
|
|
|
51 |
|
52 |
|
53 |
def load_whisper():
|
54 |
+
|
55 |
model = whisper.load_model("small")
|
56 |
print('Loaded Whisper Medium model')
|
57 |
+
if st.session_state[AUDIO_EXISTS]:
|
58 |
+
print('Transcribing with Whisper model')
|
59 |
+
result = model.transcribe("audio.mp3")
|
60 |
+
st.session_state[WHISPER] = result["text"]
|
61 |
+
write_file(result["text"], "transcript.txt")
|
|
|
|
|
62 |
|
63 |
def load_audio():
|
64 |
if os.path.exists(AUDIO_FILE):
|
|
|
73 |
def display():
|
74 |
container = st.container()
|
75 |
text_container = st.container()
|
|
|
76 |
load_audio()
|
77 |
|
78 |
#Download Button section
|
|
|
90 |
input_submit_button = st.form_submit_button(label='Send')
|
91 |
|
92 |
if input_submit_button and user_input:
|
93 |
+
download()
|
|
|
|
|
|
|
|
|
94 |
load_whisper()
|
95 |
|
96 |
|
97 |
with text_container:
|
98 |
st.text_area(label="Youtube Transcript:",
|
99 |
height=200,
|
100 |
+
value=st.session_state[WHISPER])
|
101 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
def download():
|
103 |
id = extract_youtube_video_id(st.session_state[URL])
|
104 |
command = [f"yt-dlp --no-config -v --extract-audio --audio-format mp3 {st.session_state[URL]} -o audio.mp3"]
|
105 |
print(command)
|
106 |
out = subprocess.run(command, shell=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
def write_file(text, filename):
|
109 |
with open(filename, "w") as f:
|
110 |
f.write(text)
|
|
|
111 |
|
112 |
def main():
|
113 |
init_state()
|