Athspi commited on
Commit
43fec16
·
verified ·
1 Parent(s): 818e336

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -95
app.py CHANGED
@@ -12,11 +12,9 @@ logging.getLogger("moviepy").setLevel(logging.ERROR)
12
 
13
  # Configure Gemini API
14
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
15
-
16
- # Create the Gemini model
17
  model = genai.GenerativeModel("gemini-2.0-flash-exp")
18
 
19
- # Enhanced language support
20
  SUPPORTED_LANGUAGES = [
21
  "Auto Detect", "English", "Spanish", "French", "German", "Italian",
22
  "Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi",
@@ -24,82 +22,100 @@ SUPPORTED_LANGUAGES = [
24
  ]
25
 
26
  # Magic Prompts
27
- TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Analyze this audio and generate precise subtitles with accurate timestamps following these rules:
28
 
29
- 1. Identify natural speech segments (3-7 words)
30
- 2. Include exact start/end times in [HH:MM:SS.ms] format
31
- 3. Add speaker identification when multiple voices
32
- 4. Preserve emotional tone and punctuation
33
- 5. Format exactly like:
34
 
35
  [00:00:05.250 -> 00:00:08.100]
36
- Hello world! This is an example.
37
-
38
- [00:00:08.500 -> 00:00:10.200]
39
- Second subtitle line.
40
 
41
- Return ONLY the subtitles with timestamps, no explanations."""
42
 
43
- TRANSLATION_PROMPT = """You are a certified translator. Translate these subtitles to {target_language} following these rules:
44
 
45
- 1. Keep timestamps EXACTLY as original
46
- 2. Match subtitle length to original timing
47
- 3. Preserve names/technical terms
48
- 4. Use natural colloquial speech
49
- 5. Maintain line breaks and formatting
50
 
51
- ORIGINAL SUBTITLES:
52
  {subtitles}
53
 
54
- TRANSLATED {target_language} SUBTITLES:"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  def extract_audio(video_path):
57
- """Extract high-quality audio from video"""
58
  video = VideoFileClip(video_path)
59
- audio_path = os.path.join(tempfile.gettempdir(), "high_quality_audio.wav")
60
  video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
61
  return audio_path
62
 
63
- def parse_timestamp(timestamp_str):
64
- """Convert timestamp string to seconds"""
65
- h, m, s = map(float, timestamp_str.split(':'))
66
- return h * 3600 + m * 60 + s
67
-
68
  def gemini_transcribe(audio_path):
69
- """Get timestamped transcription from Gemini"""
70
  with open(audio_path, "rb") as f:
71
  audio_data = f.read()
72
 
73
  response = model.generate_content(
74
- contents=[TRANSCRIPTION_PROMPT,
75
- {'mime_type': 'audio/wav', 'data': audio_data}]
76
  )
77
  return response.text
78
 
79
- def create_srt(subtitles_text):
80
- """Convert Gemini's raw output to SRT format"""
81
- entries = re.split(r'\n{2,}', subtitles_text.strip())
82
- srt_output = []
83
-
84
- for idx, entry in enumerate(entries, 1):
85
- time_match = re.match(r'\[(.*?) -> (.*?)\]', entry)
86
- if not time_match:
87
- continue
88
-
89
- start_time = parse_timestamp(time_match.group(1))
90
- end_time = parse_timestamp(time_match.group(2))
91
- text = entry.split(']', 1)[1].strip()
92
-
93
- srt_output.append(
94
- f"{idx}\n"
95
- f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n"
96
- f"{text}\n"
97
- )
98
-
99
- return "".join(srt_output)
100
-
101
  def translate_subtitles(subtitles, target_lang):
102
- """Translate subtitles while preserving timing"""
103
  prompt = TRANSLATION_PROMPT.format(
104
  target_language=target_lang,
105
  subtitles=subtitles
@@ -108,41 +124,37 @@ def translate_subtitles(subtitles, target_lang):
108
  return response.text
109
 
110
  def process_video(video_path, source_lang, target_lang):
111
- """Full processing pipeline"""
112
- # Audio extraction
113
- audio_path = extract_audio(video_path)
114
-
115
- # Transcription
116
- raw_transcription = gemini_transcribe(audio_path)
117
- srt_original = create_srt(raw_transcription)
118
-
119
- # Save original
120
- original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
121
- with open(original_srt, "w") as f:
122
- f.write(srt_original)
123
-
124
- # Translation
125
- translated_srt = None
126
- if target_lang != "None":
127
- translated_text = translate_subtitles(srt_original, target_lang)
128
- translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
129
- with open(translated_srt, "w") as f:
130
- f.write(translated_text)
131
-
132
- # Cleanup
133
- os.remove(audio_path)
134
 
135
- return original_srt, translated_srt
 
 
136
 
137
  # Gradio Interface
138
- with gr.Blocks(theme=gr.themes.Default(spacing_size="sm")) as app:
139
- gr.Markdown("# 🎬 Professional Subtitle Studio")
140
- gr.Markdown("Generate broadcast-quality subtitles with perfect timing")
141
 
142
  with gr.Row():
 
143
  with gr.Column():
144
- video_input = gr.Video(label="Upload Video", sources=["upload"])
145
- lang_row = gr.Row()
146
  source_lang = gr.Dropdown(
147
  label="Source Language",
148
  choices=SUPPORTED_LANGUAGES,
@@ -153,17 +165,11 @@ with gr.Blocks(theme=gr.themes.Default(spacing_size="sm")) as app:
153
  choices=["None"] + SUPPORTED_LANGUAGES[1:],
154
  value="None"
155
  )
156
- process_btn = gr.Button("Generate Subtitles", variant="primary")
157
-
158
- with gr.Column():
159
- original_sub = gr.File(label="Original Subtitles")
160
- translated_sub = gr.File(label="Translated Subtitles")
161
- preview_area = gr.HTML("""
162
- <div style='border: 2px dashed #666; padding: 20px; border-radius: 8px;'>
163
- <h3 style='margin-top: 0;'>Subtitle Preview</h3>
164
- <div id='preview-content' style='height: 300px; overflow-y: auto;'></div>
165
- </div>
166
- """)
167
 
168
  process_btn.click(
169
  process_video,
 
12
 
13
  # Configure Gemini API
14
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 
 
15
  model = genai.GenerativeModel("gemini-2.0-flash-exp")
16
 
17
+ # Supported languages
18
  SUPPORTED_LANGUAGES = [
19
  "Auto Detect", "English", "Spanish", "French", "German", "Italian",
20
  "Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi",
 
22
  ]
23
 
24
  # Magic Prompts
25
+ TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Generate precise subtitles with accurate timestamps following these rules:
26
 
27
+ 1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format
28
+ 2. Each subtitle 3-7 words
29
+ 3. Include speaker changes
30
+ 4. Preserve emotional tone
31
+ 5. Format example:
32
 
33
  [00:00:05.250 -> 00:00:08.100]
34
+ Example subtitle text
 
 
 
35
 
36
+ Return ONLY subtitles with timestamps."""
37
 
38
+ TRANSLATION_PROMPT = """Translate these subtitles to {target_language} following:
39
 
40
+ 1. Keep timestamps identical
41
+ 2. Match text length to timing
42
+ 3. Preserve technical terms
43
+ 4. Use natural speech patterns
 
44
 
45
+ ORIGINAL:
46
  {subtitles}
47
 
48
+ TRANSLATED:"""
49
+
50
+ def parse_timestamp(timestamp_str):
51
+ """Flexible timestamp parser supporting multiple formats"""
52
+ clean_ts = timestamp_str.strip("[] ").replace(',', '.')
53
+ parts = clean_ts.split(':')
54
+
55
+ seconds = 0.0
56
+ if len(parts) == 3: # HH:MM:SS.ss
57
+ hours, minutes, seconds_part = parts
58
+ seconds += float(hours) * 3600
59
+ elif len(parts) == 2: # MM:SS.ss
60
+ minutes, seconds_part = parts
61
+ else:
62
+ raise ValueError(f"Invalid timestamp: {timestamp_str}")
63
+
64
+ seconds += float(minutes) * 60
65
+ seconds += float(seconds_part)
66
+ return seconds
67
+
68
+ def create_srt(subtitles_text):
69
+ """Robust SRT converter with error handling"""
70
+ entries = re.split(r'\n{2,}', subtitles_text.strip())
71
+ srt_output = []
72
+
73
+ for idx, entry in enumerate(entries, 1):
74
+ try:
75
+ # Match various timestamp formats
76
+ time_match = re.search(
77
+ r'\[?\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*->\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*\]?',
78
+ entry
79
+ )
80
+ if not time_match:
81
+ continue
82
+
83
+ start_time = parse_timestamp(time_match.group(1))
84
+ end_time = parse_timestamp(time_match.group(2))
85
+ text = entry.split(']', 1)[-1].strip()
86
+
87
+ srt_entry = (
88
+ f"{idx}\n"
89
+ f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n"
90
+ f"{text}\n"
91
+ )
92
+ srt_output.append(srt_entry)
93
+
94
+ except Exception as e:
95
+ print(f"Skipping invalid entry {idx}: {str(e)}")
96
+ continue
97
+
98
+ return "\n".join(srt_output)
99
 
100
  def extract_audio(video_path):
101
+ """High-quality audio extraction"""
102
  video = VideoFileClip(video_path)
103
+ audio_path = os.path.join(tempfile.gettempdir(), "hq_audio.wav")
104
  video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
105
  return audio_path
106
 
 
 
 
 
 
107
  def gemini_transcribe(audio_path):
108
+ """Audio transcription with Gemini"""
109
  with open(audio_path, "rb") as f:
110
  audio_data = f.read()
111
 
112
  response = model.generate_content(
113
+ [TRANSCRIPTION_PROMPT, {"mime_type": "audio/wav", "data": audio_data}]
 
114
  )
115
  return response.text
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  def translate_subtitles(subtitles, target_lang):
118
+ """Context-aware translation"""
119
  prompt = TRANSLATION_PROMPT.format(
120
  target_language=target_lang,
121
  subtitles=subtitles
 
124
  return response.text
125
 
126
  def process_video(video_path, source_lang, target_lang):
127
+ """Complete processing pipeline"""
128
+ try:
129
+ audio_path = extract_audio(video_path)
130
+ raw_transcription = gemini_transcribe(audio_path)
131
+ srt_original = create_srt(raw_transcription)
132
+
133
+ original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
134
+ with open(original_srt, "w") as f:
135
+ f.write(srt_original)
136
+
137
+ translated_srt = None
138
+ if target_lang != "None":
139
+ translated_text = translate_subtitles(srt_original, target_lang)
140
+ translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
141
+ with open(translated_srt, "w") as f:
142
+ f.write(create_srt(translated_text)) # Re-parse translated text
143
+
144
+ os.remove(audio_path)
145
+ return original_srt, translated_srt
 
 
 
 
146
 
147
+ except Exception as e:
148
+ print(f"Processing error: {str(e)}")
149
+ return None, None
150
 
151
  # Gradio Interface
152
+ with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app:
153
+ gr.Markdown("# 🎬 Professional Subtitle Generator")
 
154
 
155
  with gr.Row():
156
+ video_input = gr.Video(label="Upload Video", sources=["upload"])
157
  with gr.Column():
 
 
158
  source_lang = gr.Dropdown(
159
  label="Source Language",
160
  choices=SUPPORTED_LANGUAGES,
 
165
  choices=["None"] + SUPPORTED_LANGUAGES[1:],
166
  value="None"
167
  )
168
+ process_btn = gr.Button("Generate", variant="primary")
169
+
170
+ with gr.Row():
171
+ original_sub = gr.File(label="Original Subtitles")
172
+ translated_sub = gr.File(label="Translated Subtitles")
 
 
 
 
 
 
173
 
174
  process_btn.click(
175
  process_video,