File size: 6,883 Bytes
fd13285
6621c82
fd13285
 
 
 
6621c82
 
 
fd13285
6621c82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd13285
6621c82
 
 
 
 
 
fd13285
6621c82
 
 
 
 
fd13285
6621c82
fd13285
6621c82
 
 
 
 
 
 
 
 
 
fd13285
6621c82
f96caac
 
 
 
 
6621c82
 
 
 
 
 
 
 
f96caac
6621c82
 
 
 
 
 
 
 
 
f96caac
 
 
fd13285
6621c82
fd13285
6621c82
 
fd13285
6621c82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd13285
6621c82
fd13285
6621c82
fd13285
6621c82
 
 
fd13285
6621c82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f96caac
6621c82
 
 
fd13285
 
6621c82
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import os
import datetime
import hashlib
import requests
import numpy as np
import gradio as gr
import whisper
import srt
import torch

LANGUAGE_OPTIONS = {
    "Afrikaans": "af",
    "Arabic": "ar",
    "Azerbaijani": "az",
    "Belarusian": "be",
    "Bulgarian": "bg",
    "Bengali": "bn",
    "Catalan": "ca",
    "Czech": "cs",
    "Welsh": "cy",
    "Danish": "da",
    "German": "de",
    "Greek": "el",
    "English": "en",
    "Spanish": "es",
    "Estonian": "et",
    "Persian": "fa",
    "Finnish": "fi",
    "French": "fr",
    "Irish": "ga",
    "Galician": "gl",
    "Gujarati": "gu",
    "Hebrew": "he",
    "Hindi": "hi",
    "Croatian": "hr",
    "Hungarian": "hu",
    "Armenian": "hy",
    "Indonesian": "id",
    "Icelandic": "is",
    "Italian": "it",
    "Japanese": "ja",
    "Georgian": "ka",
    "Kazakh": "kk",
    "Khmer": "km",
    "Kannada": "kn",
    "Korean": "ko",
    "Lithuanian": "lt",
    "Latvian": "lv",
    "Macedonian": "mk",
    "Malayalam": "ml",
    "Mongolian": "mn",
    "Marathi": "mr",
    "Malay": "ms",
    "Maltese": "mt",
    "Nepali": "ne",
    "Dutch": "nl",
    "Norwegian": "no",
    "Odia": "or",
    "Punjabi": "pa",
    "Polish": "pl",
    "Portuguese": "pt",
    "Romanian": "ro",
    "Russian": "ru",
    "Sinhala": "si",
    "Slovak": "sk",
    "Slovenian": "sl",
    "Albanian": "sq",
    "Serbian": "sr",
    "Swedish": "sv",
    "Swahili": "sw",
    "Tamil": "ta",
    "Telugu": "te",
    "Thai": "th",
    "Turkish": "tr",
    "Ukrainian": "uk",
    "Urdu": "ur",
    "Vietnamese": "vi",
    "Chinese": "zh"
}

def transcribe_audio(audio_file_path, model_size='base', language="en"):
    model = whisper.load_model(model_size)
    model.to("cpu")
    result = model.transcribe(audio_file_path, language=language)
    transcription = result["text"]
    segments = result["segments"]

    try:
        from whisper.utils import format_srt
        srt_text = format_srt(segments)
    except Exception:
        srt_text = generate_srt(segments)
    
    return transcription, srt_text, segments

def generate_srt(segments):
    import datetime
    import srt
    subtitles = []
    for i, seg in enumerate(segments):
        start_td = datetime.timedelta(seconds=seg["start"])
        end_td = datetime.timedelta(seconds=seg["end"])
        subtitle = srt.Subtitle(index=i+1, start=start_td, end=end_td, content=seg["text"])
        subtitles.append(subtitle)
    return srt.compose(subtitles)

def prepare_chapter_prompt(srt_text):
    """
    Prepare a complete prompt (system + user instructions) for ChatGPT models.
    Although the prompt is in English, it instructs the model to output chapter headers in the same language as the provided SRT transcript.
    The output format should be one chapter per line: "mm:ss Chapter Title".
    """
    system_prompt = (
        "You are a highly skilled video content segmentation and optimization expert. "
        "Your task is to analyze a transcript of a YouTube video provided in SRT format and produce engaging and concise chapter headers. "
        "Each chapter header must be on its own line in the exact format: 'mm:ss Chapter Title'.\n\n"
        "- 'mm:ss' represents the starting time of the chapter (minutes and seconds).\n"
        "- 'Chapter Title' must be a catchy, audience-friendly title that summarizes the key idea or transition at that point in the video.\n\n"
        "IMPORTANT: Although these instructions are in English, please ensure that your output is in the same language as the provided SRT transcript."
    )
    
    user_prompt = (
        "Below is the transcript of a YouTube video in SRT format:\n\n"
        "```\n"
        f"{srt_text}\n"
        "```\n\n"
        "Please generate only the chapter breakdown using the guidelines above. "
        "Each chapter header should be formatted as:\n"
        "mm:ss Chapter Title"
    )
    
    final_prompt = system_prompt + "\n\n" + user_prompt
    return final_prompt

def format_prompt_html(prompt):
    """
    Displays the prompt in a read-only textarea using Gradio's color variables for background and text.
    Includes a 'Copy Prompt' button (blue) and a short 'Prompt Copied!' confirmation message.
    """
    html_content = f"""
    <div style="display: flex; flex-direction: column; gap: 10px; margin-top: 10px;">
      <textarea id="prompt_text" rows="10" 
        style="width: 100%; resize: vertical; 
               background-color: var(--block-background-fill); 
               color: var(--block-text-color); 
               border: 1px solid var(--block-border-color); 
               border-radius: 4px;"
        readonly>{prompt}</textarea>
      <button 
        style="width: 150px; padding: 8px; 
               background-color: #007bff; 
               color: white; 
               border: none; 
               border-radius: 4px; 
               cursor: pointer;"
        onclick="
          navigator.clipboard.writeText(document.getElementById('prompt_text').value);
          const copiedMsg = document.getElementById('copied_msg');
          copiedMsg.style.display = 'inline';
          setTimeout(() => copiedMsg.style.display = 'none', 2000);
        ">
        Copy Prompt
      </button>
      <span id="copied_msg" style="display: none; color: var(--primary-text-color); font-weight: bold;">Prompt Copied!</span>
    </div>
    """
    return html_content

def process_audio(audio, language_name):
    lang_code = LANGUAGE_OPTIONS.get(language_name, "en")
    try:
        transcription, srt_text, segments = transcribe_audio(audio, model_size='base', language=lang_code)
    except Exception as e:
        return f"Error during transcription: {str(e)}", "", ""
    
    chapter_prompt = prepare_chapter_prompt(srt_text)
    prompt_html = format_prompt_html(chapter_prompt)
    return transcription, srt_text, prompt_html

iface = gr.Interface(
    fn=process_audio,
    inputs=[
        gr.Audio(type="filepath", label="Upload Audio"),
        gr.Dropdown(choices=list(LANGUAGE_OPTIONS.keys()), label="Audio Language", value="English")
    ],
    outputs=[
        gr.Textbox(label="Full Transcription", lines=10),
        gr.Textbox(label="SRT File Content", lines=10),
        gr.HTML(label="Prepared Chapter Prompt (Copy & Paste into ChatGPT)")
    ],
    title="Video Chapter Splitter from Audio (MP3)",
    description=(
        "Upload an audio file (e.g., MP3) of your YouTube video and select the audio language. "
        "The app will transcribe the audio using Whisper, generate subtitles in SRT format, "
        "and prepare a single, complete prompt that instructs ChatGPT -> o1 model to generate a chapter breakdown in the format 'mm:ss Chapter Title'.\n\n"
        "Click the 'Copy Prompt' button to copy the entire prompt, and a brief 'Prompt Copied!' message will appear."
    )
)

if __name__ == "__main__":
    iface.launch()