import os import gradio as gr import tempfile from src.video_processing import extract_audio_from_video, process_video_file, process_audio_document, process_youtube_video from src.documentProcessing import process_document ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY", None) GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY', None) with gr.Blocks(title="Document & Media Processing Tool") as app: gr.Markdown("# Document & Media Processor") gr.Markdown("Upload a document, video, audio, or provide a YouTube link to generate summaries and quizzes.") with gr.Row(): with gr.Column(): elevenlabs_api_key = gr.Textbox( placeholder="Enter your ElevenLabs API key", label="ElevenLabs API Key (for transcription)", type="password", value=ELEVENLABS_API_KEY ) model_id = gr.Dropdown( choices=["scribe_v1"], value="scribe_v1", label="Transcription Model" ) gemini_api_key = gr.Textbox( placeholder="Enter your Google Gemini API key", label="Google Gemini API Key", type="password", value=GOOGLE_API_KEY ) with gr.Row(): with gr.Column(): language_selector = gr.Radio( choices=["Uzbek", "English", "Russian"], value="English", label="Content Language" ) with gr.Tabs(): with gr.TabItem("Upload Document"): with gr.Row(): with gr.Column(): document_input = gr.File(label="Upload Document", file_types=[".pdf", ".docx", ".txt"]) with gr.Row(): generate_summary_button = gr.Button("Generate Summary") generate_quiz_button = gr.Button("Generate Quiz") with gr.Column(): document_status_output = gr.Textbox(label="Document Processing Status") document_text_file_output = gr.File(label="Extracted Text File") with gr.Row(): with gr.Column(): document_output = gr.Textbox( label="Generated Content", lines=15 ) with gr.Row(): document_file_output = gr.File(label="Download Text File") document_json_file_output = gr.File(label="Download JSON File") # Video Upload Tab with gr.TabItem("Upload Video"): with gr.Row(): with gr.Column(): video_input = gr.Video(label="Upload Video") format_choice_file = gr.Radio(["mp3", "wav"], value="mp3", label="Audio Format") with gr.Row(): video_summary_button = gr.Button("Generate Summary") video_quiz_button = gr.Button("Generate Quiz") with gr.Column(): audio_output_file = gr.Audio(label="Extracted Audio", type="filepath") status_output_file = gr.Textbox(label="Audio Extraction Status") transcript_file_output = gr.File(label="Transcription Text File") transcript_status_output = gr.Textbox(label="Transcription Status") with gr.Row(): with gr.Column(): video_output = gr.Textbox( label="Generated Content", lines=15 ) with gr.Row(): video_text_file_output = gr.File(label="Download Text File") video_json_file_output = gr.File(label="Download JSON File") # YouTube Tab with gr.TabItem("YouTube Video"): with gr.Row(): with gr.Column(): youtube_url = gr.Textbox( placeholder="Enter YouTube URL", label="YouTube URL" ) yt_format_choice = gr.Radio(["mp3", "wav"], value="mp3", label="Audio Format") with gr.Row(): youtube_summary_button = gr.Button("Generate Summary") youtube_quiz_button = gr.Button("Generate Quiz") with gr.Column(): yt_audio_output = gr.Audio(label="Extracted Audio", type="filepath") yt_status_output = gr.Textbox(label="YouTube Processing Status") yt_transcript_file_output = gr.File(label="Transcription Text File") yt_transcript_status_output = gr.Textbox(label="Transcription Status") with gr.Row(): with gr.Column(): youtube_output = gr.Textbox( label="Generated Content", lines=15 ) with gr.Row(): youtube_text_file_output = gr.File(label="Download Text File") youtube_json_file_output = gr.File(label="Download JSON File") # Audio Tab with gr.TabItem("Upload Audio"): with gr.Row(): with gr.Column(): audio_input = gr.Audio(label="Upload Audio", type="filepath") with gr.Row(): audio_summary_button = gr.Button("Generate Summary") audio_quiz_button = gr.Button("Generate Quiz") with gr.Column(): audio_status_output = gr.Textbox(label="Audio Processing Status") audio_transcript_file_output = gr.File(label="Transcription Text File") with gr.Row(): with gr.Column(): audio_output = gr.Textbox( label="Generated Content", lines=15 ) with gr.Row(): audio_text_file_output = gr.File(label="Download Text File") audio_json_file_output = gr.File(label="Download JSON File") # Document processing generate_summary_button.click( fn=process_document, inputs=[ document_input, gemini_api_key, language_selector, gr.State("summary") ], outputs=[ document_status_output, document_text_file_output, document_output, document_file_output, document_json_file_output ] ) generate_quiz_button.click( fn=process_document, inputs=[ document_input, gemini_api_key, language_selector, gr.State("quiz") ], outputs=[ document_status_output, document_text_file_output, document_output, document_file_output, document_json_file_output ] ) # Video processing video_summary_button.click( fn=process_video_file, inputs=[ video_input, format_choice_file, elevenlabs_api_key, model_id, gemini_api_key, language_selector, gr.State("summary") ], outputs=[ audio_output_file, status_output_file, transcript_file_output, transcript_status_output, video_output, video_text_file_output, video_json_file_output ] ) video_quiz_button.click( fn=process_video_file, inputs=[ video_input, format_choice_file, elevenlabs_api_key, model_id, gemini_api_key, language_selector, gr.State("quiz") ], outputs=[ audio_output_file, status_output_file, transcript_file_output, transcript_status_output, video_output, video_text_file_output, video_json_file_output ] ) # YouTube processing youtube_summary_button.click( fn=process_youtube_video, inputs=[ youtube_url, yt_format_choice, elevenlabs_api_key, model_id, gemini_api_key, language_selector, gr.State("summary") ], outputs=[ yt_audio_output, yt_status_output, yt_transcript_file_output, yt_transcript_status_output, youtube_output, youtube_text_file_output, youtube_json_file_output ] ) youtube_quiz_button.click( fn=process_youtube_video, inputs=[ youtube_url, yt_format_choice, elevenlabs_api_key, model_id, gemini_api_key, language_selector, gr.State("quiz") ], outputs=[ yt_audio_output, yt_status_output, yt_transcript_file_output, yt_transcript_status_output, youtube_output, youtube_text_file_output, youtube_json_file_output ] ) # Audio processing audio_summary_button.click( fn=process_audio_document, inputs=[ audio_input, elevenlabs_api_key, model_id, gemini_api_key, language_selector, gr.State("summary") ], outputs=[ audio_status_output, audio_transcript_file_output, audio_output, audio_text_file_output, audio_json_file_output ] ) audio_quiz_button.click( fn=process_audio_document, inputs=[ audio_input, elevenlabs_api_key, model_id, gemini_api_key, language_selector, gr.State("quiz") ], outputs=[ audio_status_output, audio_transcript_file_output, audio_output, audio_text_file_output, audio_json_file_output ] ) if __name__ == "__main__": app.launch(share=True, debug=True)