Athspi commited on
Commit
818e336
·
verified ·
1 Parent(s): e6d59c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -131
app.py CHANGED
@@ -1,9 +1,11 @@
1
  import os
 
2
  import google.generativeai as genai
3
  from moviepy.video.io.VideoFileClip import VideoFileClip
4
  import tempfile
5
  import logging
6
  import gradio as gr
 
7
 
8
  # Suppress moviepy logs
9
  logging.getLogger("moviepy").setLevel(logging.ERROR)
@@ -12,159 +14,162 @@ logging.getLogger("moviepy").setLevel(logging.ERROR)
12
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
13
 
14
  # Create the Gemini model
15
- generation_config = {
16
- "temperature": 1,
17
- "top_p": 0.95,
18
- "top_k": 40,
19
- "max_output_tokens": 8192,
20
- "response_mime_type": "text/plain",
21
- }
22
-
23
- model = genai.GenerativeModel(
24
- model_name="gemini-2.0-flash-exp",
25
- generation_config=generation_config,
26
- )
27
-
28
- # List of all supported languages
29
  SUPPORTED_LANGUAGES = [
30
- "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
31
- "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
32
- "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
33
- "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
34
- "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
35
- "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
36
- "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
37
- "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
38
- "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
39
- "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
40
- "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
41
- "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
42
- "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
43
- "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
44
- "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
45
- "Sundanese"
46
  ]
47
 
48
- def extract_audio_from_video(video_file):
49
- """Extract audio from a video file and save it as a WAV file."""
50
- video = VideoFileClip(video_file)
51
- audio_file = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
52
- video.audio.write_audiofile(audio_file, fps=16000, logger=None) # Suppress logs
53
- return audio_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- def transcribe_audio_with_gemini(audio_file):
56
- """Transcribe audio using Gemini."""
57
- with open(audio_file, "rb") as f:
 
 
 
 
 
 
 
 
 
 
 
 
58
  audio_data = f.read()
59
 
60
- # Create proper audio blob
61
- audio_blob = {
62
- 'mime_type': 'audio/wav',
63
- 'data': audio_data
64
- }
65
-
66
- # Transcribe audio
67
- convo = model.start_chat()
68
- convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
69
- response = convo.send_message(audio_blob)
70
- return response.text.strip()
71
-
72
- def generate_subtitles(transcription):
73
- """Generate SRT subtitles from transcription."""
74
- # Split transcription into lines (assuming each line is a sentence)
75
- lines = transcription.split("\n")
76
 
77
- # Generate SRT format subtitles
78
- srt_subtitles = ""
79
- for i, line in enumerate(lines, start=1):
80
- start_time = i * 5 # Placeholder: 5 seconds per line
81
- end_time = start_time + 5
82
- start_time_srt = format_timestamp(start_time)
83
- end_time_srt = format_timestamp(end_time)
84
- srt_subtitles += f"{i}\n{start_time_srt} --> {end_time_srt}\n{line}\n\n"
 
 
 
 
 
 
85
 
86
- return srt_subtitles
87
-
88
- def format_timestamp(seconds):
89
- """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)."""
90
- hours = int(seconds // 3600)
91
- minutes = int((seconds % 3600) // 60)
92
- seconds = seconds % 60
93
- milliseconds = int((seconds - int(seconds)) * 1000)
94
- return f"{hours:02}:{minutes:02}:{int(seconds):02},{milliseconds:03}"
95
-
96
- def translate_srt(srt_text, target_language):
97
- """Translate an SRT file while preserving timestamps."""
98
- prompt = f"Translate the following SRT subtitles into {target_language}. Preserve the SRT format (timestamps and structure). Translate only the text after the timestamp. Do not add explanations or extra text.\n\n{srt_text}"
99
  response = model.generate_content(prompt)
100
  return response.text
101
 
102
- def process_video(video_file, language="Auto Detect", translate_to=None):
103
- """Process a video file to generate and translate subtitles."""
104
- # Extract audio from the video
105
- audio_file = extract_audio_from_video(video_file)
106
 
107
- # Transcribe audio using Gemini
108
- transcription = transcribe_audio_with_gemini(audio_file)
 
109
 
110
- # Generate subtitles
111
- subtitles = generate_subtitles(transcription)
 
 
112
 
113
- # Save original subtitles to an SRT file
114
- original_srt_file = os.path.join(tempfile.gettempdir(), "original_subtitles.srt")
115
- with open(original_srt_file, "w", encoding="utf-8") as f:
116
- f.write(subtitles)
 
 
 
117
 
118
- # Translate subtitles if a target language is provided
119
- translated_srt_file = None
120
- if translate_to and translate_to != "None":
121
- translated_subtitles = translate_srt(subtitles, translate_to)
122
- translated_srt_file = os.path.join(tempfile.gettempdir(), "translated_subtitles.srt")
123
- with open(translated_srt_file, "w", encoding="utf-8") as f:
124
- f.write(translated_subtitles)
125
 
126
- # Clean up extracted audio file
127
- os.remove(audio_file)
128
-
129
- return original_srt_file, translated_srt_file, "Detected Language: Auto"
130
-
131
- # Define the Gradio interface
132
- with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo:
133
- # Header
134
- with gr.Column():
135
- gr.Markdown("# 🎥 AutoSubGen")
136
- gr.Markdown("### AI-Powered Video Subtitle Generator")
137
- gr.Markdown("Automatically generate and translate subtitles for your videos in **SRT format**. Supports **100+ languages** and **auto-detection**.")
138
 
139
- # Main content
140
- with gr.Tab("Generate Subtitles"):
141
- gr.Markdown("### Upload a video file to generate subtitles.")
142
- with gr.Row():
143
- video_input = gr.Video(label="Upload Video File", scale=2)
144
- language_dropdown = gr.Dropdown(
145
  choices=SUPPORTED_LANGUAGES,
146
- label="Select Language",
147
- value="Auto Detect",
148
- scale=1
149
  )
150
- translate_to_dropdown = gr.Dropdown(
151
- choices=["None"] + SUPPORTED_LANGUAGES[1:], # Exclude "Auto Detect"
152
  label="Translate To",
153
- value="None",
154
- scale=1
155
  )
156
- generate_button = gr.Button("Generate Subtitles", variant="primary")
157
- with gr.Row():
158
- original_subtitle_output = gr.File(label="Download Original Subtitles (SRT)")
159
- translated_subtitle_output = gr.File(label="Download Translated Subtitles (SRT)")
160
- detected_language_output = gr.Textbox(label="Detected Language")
 
 
 
 
 
 
161
 
162
- # Link button to function
163
- generate_button.click(
164
  process_video,
165
- inputs=[video_input, language_dropdown, translate_to_dropdown],
166
- outputs=[original_subtitle_output, translated_subtitle_output, detected_language_output]
167
  )
168
 
169
- # Launch the Gradio interface with a public link
170
- demo.launch(share=True)
 
1
  import os
2
+ import re
3
  import google.generativeai as genai
4
  from moviepy.video.io.VideoFileClip import VideoFileClip
5
  import tempfile
6
  import logging
7
  import gradio as gr
8
+ from datetime import timedelta
9
 
10
  # Suppress moviepy logs
11
  logging.getLogger("moviepy").setLevel(logging.ERROR)
 
14
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
15
 
16
  # Create the Gemini model
17
+ model = genai.GenerativeModel("gemini-2.0-flash-exp")
18
+
19
+ # Enhanced language support
 
 
 
 
 
 
 
 
 
 
 
20
  SUPPORTED_LANGUAGES = [
21
+ "Auto Detect", "English", "Spanish", "French", "German", "Italian",
22
+ "Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi",
23
+ "Chinese", "Dutch", "Turkish", "Polish", "Vietnamese", "Thai"
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ]
25
 
26
+ # Magic Prompts
27
+ TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Analyze this audio and generate precise subtitles with accurate timestamps following these rules:
28
+
29
+ 1. Identify natural speech segments (3-7 words)
30
+ 2. Include exact start/end times in [HH:MM:SS.ms] format
31
+ 3. Add speaker identification when multiple voices
32
+ 4. Preserve emotional tone and punctuation
33
+ 5. Format exactly like:
34
+
35
+ [00:00:05.250 -> 00:00:08.100]
36
+ Hello world! This is an example.
37
+
38
+ [00:00:08.500 -> 00:00:10.200]
39
+ Second subtitle line.
40
+
41
+ Return ONLY the subtitles with timestamps, no explanations."""
42
+
43
+ TRANSLATION_PROMPT = """You are a certified translator. Translate these subtitles to {target_language} following these rules:
44
+
45
+ 1. Keep timestamps EXACTLY as original
46
+ 2. Match subtitle length to original timing
47
+ 3. Preserve names/technical terms
48
+ 4. Use natural colloquial speech
49
+ 5. Maintain line breaks and formatting
50
+
51
+ ORIGINAL SUBTITLES:
52
+ {subtitles}
53
+
54
+ TRANSLATED {target_language} SUBTITLES:"""
55
 
56
+ def extract_audio(video_path):
57
+ """Extract high-quality audio from video"""
58
+ video = VideoFileClip(video_path)
59
+ audio_path = os.path.join(tempfile.gettempdir(), "high_quality_audio.wav")
60
+ video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
61
+ return audio_path
62
+
63
+ def parse_timestamp(timestamp_str):
64
+ """Convert timestamp string to seconds"""
65
+ h, m, s = map(float, timestamp_str.split(':'))
66
+ return h * 3600 + m * 60 + s
67
+
68
+ def gemini_transcribe(audio_path):
69
+ """Get timestamped transcription from Gemini"""
70
+ with open(audio_path, "rb") as f:
71
  audio_data = f.read()
72
 
73
+ response = model.generate_content(
74
+ contents=[TRANSCRIPTION_PROMPT,
75
+ {'mime_type': 'audio/wav', 'data': audio_data}]
76
+ )
77
+ return response.text
78
+
79
+ def create_srt(subtitles_text):
80
+ """Convert Gemini's raw output to SRT format"""
81
+ entries = re.split(r'\n{2,}', subtitles_text.strip())
82
+ srt_output = []
 
 
 
 
 
 
83
 
84
+ for idx, entry in enumerate(entries, 1):
85
+ time_match = re.match(r'\[(.*?) -> (.*?)\]', entry)
86
+ if not time_match:
87
+ continue
88
+
89
+ start_time = parse_timestamp(time_match.group(1))
90
+ end_time = parse_timestamp(time_match.group(2))
91
+ text = entry.split(']', 1)[1].strip()
92
+
93
+ srt_output.append(
94
+ f"{idx}\n"
95
+ f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n"
96
+ f"{text}\n"
97
+ )
98
 
99
+ return "".join(srt_output)
100
+
101
+ def translate_subtitles(subtitles, target_lang):
102
+ """Translate subtitles while preserving timing"""
103
+ prompt = TRANSLATION_PROMPT.format(
104
+ target_language=target_lang,
105
+ subtitles=subtitles
106
+ )
 
 
 
 
 
107
  response = model.generate_content(prompt)
108
  return response.text
109
 
110
+ def process_video(video_path, source_lang, target_lang):
111
+ """Full processing pipeline"""
112
+ # Audio extraction
113
+ audio_path = extract_audio(video_path)
114
 
115
+ # Transcription
116
+ raw_transcription = gemini_transcribe(audio_path)
117
+ srt_original = create_srt(raw_transcription)
118
 
119
+ # Save original
120
+ original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
121
+ with open(original_srt, "w") as f:
122
+ f.write(srt_original)
123
 
124
+ # Translation
125
+ translated_srt = None
126
+ if target_lang != "None":
127
+ translated_text = translate_subtitles(srt_original, target_lang)
128
+ translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
129
+ with open(translated_srt, "w") as f:
130
+ f.write(translated_text)
131
 
132
+ # Cleanup
133
+ os.remove(audio_path)
 
 
 
 
 
134
 
135
+ return original_srt, translated_srt
136
+
137
+ # Gradio Interface
138
+ with gr.Blocks(theme=gr.themes.Default(spacing_size="sm")) as app:
139
+ gr.Markdown("# 🎬 Professional Subtitle Studio")
140
+ gr.Markdown("Generate broadcast-quality subtitles with perfect timing")
 
 
 
 
 
 
141
 
142
+ with gr.Row():
143
+ with gr.Column():
144
+ video_input = gr.Video(label="Upload Video", sources=["upload"])
145
+ lang_row = gr.Row()
146
+ source_lang = gr.Dropdown(
147
+ label="Source Language",
148
  choices=SUPPORTED_LANGUAGES,
149
+ value="Auto Detect"
 
 
150
  )
151
+ target_lang = gr.Dropdown(
 
152
  label="Translate To",
153
+ choices=["None"] + SUPPORTED_LANGUAGES[1:],
154
+ value="None"
155
  )
156
+ process_btn = gr.Button("Generate Subtitles", variant="primary")
157
+
158
+ with gr.Column():
159
+ original_sub = gr.File(label="Original Subtitles")
160
+ translated_sub = gr.File(label="Translated Subtitles")
161
+ preview_area = gr.HTML("""
162
+ <div style='border: 2px dashed #666; padding: 20px; border-radius: 8px;'>
163
+ <h3 style='margin-top: 0;'>Subtitle Preview</h3>
164
+ <div id='preview-content' style='height: 300px; overflow-y: auto;'></div>
165
+ </div>
166
+ """)
167
 
168
+ process_btn.click(
 
169
  process_video,
170
+ inputs=[video_input, source_lang, target_lang],
171
+ outputs=[original_sub, translated_sub]
172
  )
173
 
174
+ if __name__ == "__main__":
175
+ app.launch(server_port=7860, share=True)