import os import datetime import hashlib import requests import numpy as np import gradio as gr import whisper import srt import torch LANGUAGE_OPTIONS = { "Afrikaans": "af", "Arabic": "ar", "Azerbaijani": "az", "Belarusian": "be", "Bulgarian": "bg", "Bengali": "bn", "Catalan": "ca", "Czech": "cs", "Welsh": "cy", "Danish": "da", "German": "de", "Greek": "el", "English": "en", "Spanish": "es", "Estonian": "et", "Persian": "fa", "Finnish": "fi", "French": "fr", "Irish": "ga", "Galician": "gl", "Gujarati": "gu", "Hebrew": "he", "Hindi": "hi", "Croatian": "hr", "Hungarian": "hu", "Armenian": "hy", "Indonesian": "id", "Icelandic": "is", "Italian": "it", "Japanese": "ja", "Georgian": "ka", "Kazakh": "kk", "Khmer": "km", "Kannada": "kn", "Korean": "ko", "Lithuanian": "lt", "Latvian": "lv", "Macedonian": "mk", "Malayalam": "ml", "Mongolian": "mn", "Marathi": "mr", "Malay": "ms", "Maltese": "mt", "Nepali": "ne", "Dutch": "nl", "Norwegian": "no", "Odia": "or", "Punjabi": "pa", "Polish": "pl", "Portuguese": "pt", "Romanian": "ro", "Russian": "ru", "Sinhala": "si", "Slovak": "sk", "Slovenian": "sl", "Albanian": "sq", "Serbian": "sr", "Swedish": "sv", "Swahili": "sw", "Tamil": "ta", "Telugu": "te", "Thai": "th", "Turkish": "tr", "Ukrainian": "uk", "Urdu": "ur", "Vietnamese": "vi", "Chinese": "zh" } def transcribe_audio(audio_file_path, model_size='base', language="en"): model = whisper.load_model(model_size) model.to("cpu") result = model.transcribe(audio_file_path, language=language) transcription = result["text"] segments = result["segments"] try: from whisper.utils import format_srt srt_text = format_srt(segments) except Exception: srt_text = generate_srt(segments) return transcription, srt_text, segments def generate_srt(segments): import datetime import srt subtitles = [] for i, seg in enumerate(segments): start_td = datetime.timedelta(seconds=seg["start"]) end_td = datetime.timedelta(seconds=seg["end"]) subtitle = srt.Subtitle(index=i+1, start=start_td, end=end_td, content=seg["text"]) subtitles.append(subtitle) return srt.compose(subtitles) def prepare_chapter_prompt(srt_text): """ Prepare a complete prompt (system + user instructions) for ChatGPT models. Although the prompt is in English, it instructs the model to output chapter headers in the same language as the provided SRT transcript. The output format should be one chapter per line: "mm:ss Chapter Title". """ system_prompt = ( "You are a highly skilled video content segmentation and optimization expert. " "Your task is to analyze a transcript of a YouTube video provided in SRT format and produce engaging and concise chapter headers. " "Each chapter header must be on its own line in the exact format: 'mm:ss Chapter Title'.\n\n" "- 'mm:ss' represents the starting time of the chapter (minutes and seconds).\n" "- 'Chapter Title' must be a catchy, audience-friendly title that summarizes the key idea or transition at that point in the video.\n\n" "IMPORTANT: Although these instructions are in English, please ensure that your output is in the same language as the provided SRT transcript." ) user_prompt = ( "Below is the transcript of a YouTube video in SRT format:\n\n" "```\n" f"{srt_text}\n" "```\n\n" "Please generate only the chapter breakdown using the guidelines above. " "Each chapter header should be formatted as:\n" "mm:ss Chapter Title" ) final_prompt = system_prompt + "\n\n" + user_prompt return final_prompt def format_prompt_html(prompt): """ Displays the prompt in a read-only textarea using Gradio's color variables for background and text. Includes a 'Copy Prompt' button (blue) and a short 'Prompt Copied!' confirmation message. """ html_content = f"""