MrSimple01 commited on
Commit
5f2eb21
·
verified ·
1 Parent(s): c185086

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +301 -0
  2. requirements.txt +15 -0
app.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import tempfile
4
+ from src.mainFunctions import process_video_file, process_audio_document, process_youtube_video
5
+ from src.video_processing import extract_audio_from_video
6
+ from src.documentProcessing import process_document
7
+
8
+ ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY", None)
9
+ GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY', None)
10
+
11
+ with gr.Blocks(title="Document & Media Processing Tool") as app:
12
+ gr.Markdown("# Document & Media Processor")
13
+ gr.Markdown("Upload a document, video, audio, or provide a YouTube link to generate summaries and quizzes.")
14
+
15
+ with gr.Row():
16
+ with gr.Column():
17
+ elevenlabs_api_key = gr.Textbox(
18
+ placeholder="Enter your ElevenLabs API key",
19
+ label="ElevenLabs API Key (for transcription)",
20
+ type="password",
21
+ value=ELEVENLABS_API_KEY
22
+ )
23
+ model_id = gr.Dropdown(
24
+ choices=["scribe_v1"],
25
+ value="scribe_v1",
26
+ label="Transcription Model"
27
+ )
28
+ gemini_api_key = gr.Textbox(
29
+ placeholder="Enter your Google Gemini API key",
30
+ label="Google Gemini API Key",
31
+ type="password",
32
+ value=GOOGLE_API_KEY
33
+ )
34
+
35
+ with gr.Row():
36
+ with gr.Column():
37
+ language_selector = gr.Radio(
38
+ choices=["Uzbek", "English", "Russian"],
39
+ value="English",
40
+ label="Content Language"
41
+ )
42
+
43
+ with gr.Tabs():
44
+ with gr.TabItem("Upload Document"):
45
+ with gr.Row():
46
+ with gr.Column():
47
+ document_input = gr.File(label="Upload Document", file_types=[".pdf", ".docx", ".txt"])
48
+ with gr.Row():
49
+ generate_summary_button = gr.Button("Generate Summary")
50
+ generate_quiz_button = gr.Button("Generate Quiz")
51
+ with gr.Column():
52
+ document_status_output = gr.Textbox(label="Document Processing Status")
53
+ document_text_file_output = gr.File(label="Extracted Text File")
54
+ with gr.Row():
55
+ with gr.Column():
56
+ document_output = gr.Textbox(
57
+ label="Generated Content",
58
+ lines=15
59
+ )
60
+ with gr.Row():
61
+ document_file_output = gr.File(label="Download Text File")
62
+ document_json_file_output = gr.File(label="Download JSON File")
63
+
64
+ # Video Upload Tab
65
+ with gr.TabItem("Upload Video"):
66
+ with gr.Row():
67
+ with gr.Column():
68
+ video_input = gr.Video(label="Upload Video")
69
+ format_choice_file = gr.Radio(["mp3", "wav"], value="mp3", label="Audio Format")
70
+ with gr.Row():
71
+ video_summary_button = gr.Button("Generate Summary")
72
+ video_quiz_button = gr.Button("Generate Quiz")
73
+ with gr.Column():
74
+ audio_output_file = gr.Audio(label="Extracted Audio", type="filepath")
75
+ status_output_file = gr.Textbox(label="Audio Extraction Status")
76
+ transcript_file_output = gr.File(label="Transcription Text File")
77
+ transcript_status_output = gr.Textbox(label="Transcription Status")
78
+ with gr.Row():
79
+ with gr.Column():
80
+ video_output = gr.Textbox(
81
+ label="Generated Content",
82
+ lines=15
83
+ )
84
+ with gr.Row():
85
+ video_text_file_output = gr.File(label="Download Text File")
86
+ video_json_file_output = gr.File(label="Download JSON File")
87
+
88
+ # YouTube Tab
89
+ with gr.TabItem("YouTube Video"):
90
+ with gr.Row():
91
+ with gr.Column():
92
+ youtube_url = gr.Textbox(
93
+ placeholder="Enter YouTube URL",
94
+ label="YouTube URL"
95
+ )
96
+ yt_format_choice = gr.Radio(["mp3", "wav"], value="mp3", label="Audio Format")
97
+ with gr.Row():
98
+ youtube_summary_button = gr.Button("Generate Summary")
99
+ youtube_quiz_button = gr.Button("Generate Quiz")
100
+ with gr.Column():
101
+ yt_audio_output = gr.Audio(label="Extracted Audio", type="filepath")
102
+ yt_status_output = gr.Textbox(label="YouTube Processing Status")
103
+ yt_transcript_file_output = gr.File(label="Transcription Text File")
104
+ yt_transcript_status_output = gr.Textbox(label="Transcription Status")
105
+ with gr.Row():
106
+ with gr.Column():
107
+ youtube_output = gr.Textbox(
108
+ label="Generated Content",
109
+ lines=15
110
+ )
111
+ with gr.Row():
112
+ youtube_text_file_output = gr.File(label="Download Text File")
113
+ youtube_json_file_output = gr.File(label="Download JSON File")
114
+
115
+ # Audio Tab
116
+ with gr.TabItem("Upload Audio"):
117
+ with gr.Row():
118
+ with gr.Column():
119
+ audio_input = gr.Audio(label="Upload Audio", type="filepath")
120
+ with gr.Row():
121
+ audio_summary_button = gr.Button("Generate Summary")
122
+ audio_quiz_button = gr.Button("Generate Quiz")
123
+ with gr.Column():
124
+ audio_status_output = gr.Textbox(label="Audio Processing Status")
125
+ audio_transcript_file_output = gr.File(label="Transcription Text File")
126
+ with gr.Row():
127
+ with gr.Column():
128
+ audio_output = gr.Textbox(
129
+ label="Generated Content",
130
+ lines=15
131
+ )
132
+ with gr.Row():
133
+ audio_text_file_output = gr.File(label="Download Text File")
134
+ audio_json_file_output = gr.File(label="Download JSON File")
135
+
136
+ # Document processing
137
+ generate_summary_button.click(
138
+ fn=process_document,
139
+ inputs=[
140
+ document_input,
141
+ gemini_api_key,
142
+ language_selector,
143
+ gr.State("summary")
144
+ ],
145
+ outputs=[
146
+ document_status_output,
147
+ document_text_file_output,
148
+ document_output,
149
+ document_file_output,
150
+ document_json_file_output
151
+ ]
152
+ )
153
+
154
+ generate_quiz_button.click(
155
+ fn=process_document,
156
+ inputs=[
157
+ document_input,
158
+ gemini_api_key,
159
+ language_selector,
160
+ gr.State("quiz")
161
+ ],
162
+ outputs=[
163
+ document_status_output,
164
+ document_text_file_output,
165
+ document_output,
166
+ document_file_output,
167
+ document_json_file_output
168
+ ]
169
+ )
170
+
171
+ # Video processing
172
+ video_summary_button.click(
173
+ fn=process_video_file,
174
+ inputs=[
175
+ video_input,
176
+ format_choice_file,
177
+ elevenlabs_api_key,
178
+ model_id,
179
+ gemini_api_key,
180
+ language_selector,
181
+ gr.State("summary")
182
+ ],
183
+ outputs=[
184
+ audio_output_file,
185
+ status_output_file,
186
+ transcript_file_output,
187
+ transcript_status_output,
188
+ video_output,
189
+ video_text_file_output,
190
+ video_json_file_output
191
+ ]
192
+ )
193
+
194
+ video_quiz_button.click(
195
+ fn=process_video_file,
196
+ inputs=[
197
+ video_input,
198
+ format_choice_file,
199
+ elevenlabs_api_key,
200
+ model_id,
201
+ gemini_api_key,
202
+ language_selector,
203
+ gr.State("quiz")
204
+ ],
205
+ outputs=[
206
+ audio_output_file,
207
+ status_output_file,
208
+ transcript_file_output,
209
+ transcript_status_output,
210
+ video_output,
211
+ video_text_file_output,
212
+ video_json_file_output
213
+ ]
214
+ )
215
+
216
+ # YouTube processing
217
+ youtube_summary_button.click(
218
+ fn=process_youtube_video,
219
+ inputs=[
220
+ youtube_url,
221
+ yt_format_choice,
222
+ elevenlabs_api_key,
223
+ model_id,
224
+ gemini_api_key,
225
+ language_selector,
226
+ gr.State("summary")
227
+ ],
228
+ outputs=[
229
+ yt_audio_output,
230
+ yt_status_output,
231
+ yt_transcript_file_output,
232
+ yt_transcript_status_output,
233
+ youtube_output,
234
+ youtube_text_file_output,
235
+ youtube_json_file_output
236
+ ]
237
+ )
238
+
239
+ youtube_quiz_button.click(
240
+ fn=process_youtube_video,
241
+ inputs=[
242
+ youtube_url,
243
+ yt_format_choice,
244
+ elevenlabs_api_key,
245
+ model_id,
246
+ gemini_api_key,
247
+ language_selector,
248
+ gr.State("quiz")
249
+ ],
250
+ outputs=[
251
+ yt_audio_output,
252
+ yt_status_output,
253
+ yt_transcript_file_output,
254
+ yt_transcript_status_output,
255
+ youtube_output,
256
+ youtube_text_file_output,
257
+ youtube_json_file_output
258
+ ]
259
+ )
260
+
261
+ # Audio processing
262
+ audio_summary_button.click(
263
+ fn=process_audio_document,
264
+ inputs=[
265
+ audio_input,
266
+ elevenlabs_api_key,
267
+ model_id,
268
+ gemini_api_key,
269
+ language_selector,
270
+ gr.State("summary")
271
+ ],
272
+ outputs=[
273
+ audio_status_output,
274
+ audio_transcript_file_output,
275
+ audio_output,
276
+ audio_text_file_output,
277
+ audio_json_file_output
278
+ ]
279
+ )
280
+
281
+ audio_quiz_button.click(
282
+ fn=process_audio_document,
283
+ inputs=[
284
+ audio_input,
285
+ elevenlabs_api_key,
286
+ model_id,
287
+ gemini_api_key,
288
+ language_selector,
289
+ gr.State("quiz")
290
+ ],
291
+ outputs=[
292
+ audio_status_output,
293
+ audio_transcript_file_output,
294
+ audio_output,
295
+ audio_text_file_output,
296
+ audio_json_file_output
297
+ ]
298
+ )
299
+
300
+ if __name__ == "__main__":
301
+ app.launch(share=True, debug=True)
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ uuid
3
+ yt-dlp
4
+ python-dotenv
5
+ requests
6
+ sentence-transformers
7
+ langchain-google-genai
8
+ anthropic
9
+ pydantic
10
+ transformers==4.50.3
11
+ langchain-openai
12
+ PyMuPDF
13
+ python-docx
14
+ pytube
15
+ PyPDF2