Spaces:
Running
Running
import os | |
import datetime | |
import hashlib | |
import requests | |
import numpy as np | |
import gradio as gr | |
import whisper | |
import srt | |
import torch | |
LANGUAGE_OPTIONS = { | |
"Afrikaans": "af", | |
"Arabic": "ar", | |
"Azerbaijani": "az", | |
"Belarusian": "be", | |
"Bulgarian": "bg", | |
"Bengali": "bn", | |
"Catalan": "ca", | |
"Czech": "cs", | |
"Welsh": "cy", | |
"Danish": "da", | |
"German": "de", | |
"Greek": "el", | |
"English": "en", | |
"Spanish": "es", | |
"Estonian": "et", | |
"Persian": "fa", | |
"Finnish": "fi", | |
"French": "fr", | |
"Irish": "ga", | |
"Galician": "gl", | |
"Gujarati": "gu", | |
"Hebrew": "he", | |
"Hindi": "hi", | |
"Croatian": "hr", | |
"Hungarian": "hu", | |
"Armenian": "hy", | |
"Indonesian": "id", | |
"Icelandic": "is", | |
"Italian": "it", | |
"Japanese": "ja", | |
"Georgian": "ka", | |
"Kazakh": "kk", | |
"Khmer": "km", | |
"Kannada": "kn", | |
"Korean": "ko", | |
"Lithuanian": "lt", | |
"Latvian": "lv", | |
"Macedonian": "mk", | |
"Malayalam": "ml", | |
"Mongolian": "mn", | |
"Marathi": "mr", | |
"Malay": "ms", | |
"Maltese": "mt", | |
"Nepali": "ne", | |
"Dutch": "nl", | |
"Norwegian": "no", | |
"Odia": "or", | |
"Punjabi": "pa", | |
"Polish": "pl", | |
"Portuguese": "pt", | |
"Romanian": "ro", | |
"Russian": "ru", | |
"Sinhala": "si", | |
"Slovak": "sk", | |
"Slovenian": "sl", | |
"Albanian": "sq", | |
"Serbian": "sr", | |
"Swedish": "sv", | |
"Swahili": "sw", | |
"Tamil": "ta", | |
"Telugu": "te", | |
"Thai": "th", | |
"Turkish": "tr", | |
"Ukrainian": "uk", | |
"Urdu": "ur", | |
"Vietnamese": "vi", | |
"Chinese": "zh" | |
} | |
def transcribe_audio(audio_file_path, model_size='base', language="en"): | |
model = whisper.load_model(model_size) | |
model.to("cpu") | |
result = model.transcribe(audio_file_path, language=language) | |
transcription = result["text"] | |
segments = result["segments"] | |
try: | |
from whisper.utils import format_srt | |
srt_text = format_srt(segments) | |
except Exception: | |
srt_text = generate_srt(segments) | |
return transcription, srt_text, segments | |
def generate_srt(segments): | |
import datetime | |
import srt | |
subtitles = [] | |
for i, seg in enumerate(segments): | |
start_td = datetime.timedelta(seconds=seg["start"]) | |
end_td = datetime.timedelta(seconds=seg["end"]) | |
subtitle = srt.Subtitle(index=i+1, start=start_td, end=end_td, content=seg["text"]) | |
subtitles.append(subtitle) | |
return srt.compose(subtitles) | |
def prepare_chapter_prompt(srt_text): | |
""" | |
Prepare a complete prompt (system + user instructions) for ChatGPT models. | |
Although the prompt is in English, it instructs the model to output chapter headers in the same language as the provided SRT transcript. | |
The output format should be one chapter per line: "mm:ss Chapter Title". | |
""" | |
system_prompt = ( | |
"You are a highly skilled video content segmentation and optimization expert. " | |
"Your task is to analyze a transcript of a YouTube video provided in SRT format and produce engaging and concise chapter headers. " | |
"Each chapter header must be on its own line in the exact format: 'mm:ss Chapter Title'.\n\n" | |
"- 'mm:ss' represents the starting time of the chapter (minutes and seconds).\n" | |
"- 'Chapter Title' must be a catchy, audience-friendly title that summarizes the key idea or transition at that point in the video.\n\n" | |
"IMPORTANT: Although these instructions are in English, please ensure that your output is in the same language as the provided SRT transcript." | |
) | |
user_prompt = ( | |
"Below is the transcript of a YouTube video in SRT format:\n\n" | |
"```\n" | |
f"{srt_text}\n" | |
"```\n\n" | |
"Please generate only the chapter breakdown using the guidelines above. " | |
"Each chapter header should be formatted as:\n" | |
"mm:ss Chapter Title" | |
) | |
final_prompt = system_prompt + "\n\n" + user_prompt | |
return final_prompt | |
def format_prompt_html(prompt): | |
""" | |
Displays the prompt in a read-only textarea using Gradio's color variables for background and text. | |
Includes a 'Copy Prompt' button (blue) and a short 'Prompt Copied!' confirmation message. | |
""" | |
html_content = f""" | |
<div style="display: flex; flex-direction: column; gap: 10px; margin-top: 10px;"> | |
<textarea id="prompt_text" rows="10" | |
style="width: 100%; resize: vertical; | |
background-color: var(--block-background-fill); | |
color: var(--block-text-color); | |
border: 1px solid var(--block-border-color); | |
border-radius: 4px;" | |
readonly>{prompt}</textarea> | |
<button | |
style="width: 150px; padding: 8px; | |
background-color: #007bff; | |
color: white; | |
border: none; | |
border-radius: 4px; | |
cursor: pointer;" | |
onclick=" | |
navigator.clipboard.writeText(document.getElementById('prompt_text').value); | |
const copiedMsg = document.getElementById('copied_msg'); | |
copiedMsg.style.display = 'inline'; | |
setTimeout(() => copiedMsg.style.display = 'none', 2000); | |
"> | |
Copy Prompt | |
</button> | |
<span id="copied_msg" style="display: none; color: var(--primary-text-color); font-weight: bold;">Prompt Copied!</span> | |
</div> | |
""" | |
return html_content | |
def process_audio(audio, language_name): | |
lang_code = LANGUAGE_OPTIONS.get(language_name, "en") | |
try: | |
transcription, srt_text, segments = transcribe_audio(audio, model_size='base', language=lang_code) | |
except Exception as e: | |
return f"Error during transcription: {str(e)}", "", "" | |
chapter_prompt = prepare_chapter_prompt(srt_text) | |
prompt_html = format_prompt_html(chapter_prompt) | |
return transcription, srt_text, prompt_html | |
iface = gr.Interface( | |
fn=process_audio, | |
inputs=[ | |
gr.Audio(type="filepath", label="Upload Audio"), | |
gr.Dropdown(choices=list(LANGUAGE_OPTIONS.keys()), label="Audio Language", value="English") | |
], | |
outputs=[ | |
gr.Textbox(label="Full Transcription", lines=10), | |
gr.Textbox(label="SRT File Content", lines=10), | |
gr.HTML(label="Prepared Chapter Prompt (Copy & Paste into ChatGPT)") | |
], | |
title="Video Chapter Splitter from Audio (MP3)", | |
description=( | |
"Upload an audio file (e.g., MP3) of your YouTube video and select the audio language. " | |
"The app will transcribe the audio using Whisper, generate subtitles in SRT format, " | |
"and prepare a single, complete prompt that instructs ChatGPT -> o1 model to generate a chapter breakdown in the format 'mm:ss Chapter Title'.\n\n" | |
"Click the 'Copy Prompt' button to copy the entire prompt, and a brief 'Prompt Copied!' message will appear." | |
) | |
) | |
if __name__ == "__main__": | |
iface.launch() |