Athspi commited on
Commit
224f399
·
verified ·
1 Parent(s): e057eaf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -40
app.py CHANGED
@@ -6,13 +6,14 @@ import tempfile
6
  import logging
7
  import gradio as gr
8
  from datetime import timedelta
 
9
 
10
  # Suppress moviepy logs
11
  logging.getLogger("moviepy").setLevel(logging.ERROR)
12
 
13
  # Configure Gemini API
14
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
15
- model = genai.GenerativeModel("gemini-2.0-pro-exp-02-05")
16
 
17
  # Supported languages
18
  SUPPORTED_LANGUAGES = [
@@ -22,21 +23,19 @@ SUPPORTED_LANGUAGES = [
22
  ]
23
 
24
  # Magic Prompts
25
- TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Generate precise subtitles with accurate timestamps following these rules:
26
-
27
  1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format
28
  2. Each subtitle 3-7 words
29
  3. Include speaker changes
30
  4. Preserve emotional tone
31
- 5. Format example:
32
 
33
  [00:00:05.250 -> 00:00:08.100]
34
  Example subtitle text
35
 
36
  Return ONLY subtitles with timestamps."""
37
 
38
- TRANSLATION_PROMPT = """Translate these subtitles to {target_language} following:
39
-
40
  1. Keep timestamps identical
41
  2. Match text length to timing
42
  3. Preserve technical terms
@@ -47,8 +46,49 @@ ORIGINAL:
47
 
48
  TRANSLATED:"""
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  def parse_timestamp(timestamp_str):
51
- """Flexible timestamp parser supporting multiple formats"""
52
  clean_ts = timestamp_str.strip("[] ").replace(',', '.')
53
  parts = clean_ts.split(':')
54
 
@@ -65,14 +105,17 @@ def parse_timestamp(timestamp_str):
65
  seconds += float(seconds_part)
66
  return seconds
67
 
 
 
 
 
68
  def create_srt(subtitles_text):
69
- """Robust SRT converter with error handling"""
70
  entries = re.split(r'\n{2,}', subtitles_text.strip())
71
  srt_output = []
72
 
73
  for idx, entry in enumerate(entries, 1):
74
  try:
75
- # Match various timestamp formats
76
  time_match = re.search(
77
  r'\[?\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*->\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*\]?',
78
  entry
@@ -86,7 +129,7 @@ def create_srt(subtitles_text):
86
 
87
  srt_entry = (
88
  f"{idx}\n"
89
- f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n"
90
  f"{text}\n"
91
  )
92
  srt_output.append(srt_entry)
@@ -97,56 +140,46 @@ def create_srt(subtitles_text):
97
 
98
  return "\n".join(srt_output)
99
 
100
- def extract_audio(video_path):
101
- """High-quality audio extraction"""
102
- video = VideoFileClip(video_path)
103
- audio_path = os.path.join(tempfile.gettempdir(), "hq_audio.wav")
104
- video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
105
- return audio_path
106
-
107
- def gemini_transcribe(audio_path):
108
- """Audio transcription with Gemini"""
109
- with open(audio_path, "rb") as f:
110
- audio_data = f.read()
111
-
112
- response = model.generate_content(
113
- [TRANSCRIPTION_PROMPT, {"mime_type": "audio/wav", "data": audio_data}]
114
- )
115
- return response.text
116
-
117
- def translate_subtitles(subtitles, target_lang):
118
- """Context-aware translation"""
119
- prompt = TRANSLATION_PROMPT.format(
120
- target_language=target_lang,
121
- subtitles=subtitles
122
- )
123
- response = model.generate_content(prompt)
124
- return response.text
125
-
126
  def process_video(video_path, source_lang, target_lang):
127
  """Complete processing pipeline"""
128
  try:
 
129
  audio_path = extract_audio(video_path)
130
- raw_transcription = gemini_transcribe(audio_path)
131
- srt_original = create_srt(raw_transcription)
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
134
  with open(original_srt, "w") as f:
135
  f.write(srt_original)
136
 
 
137
  translated_srt = None
138
  if target_lang != "None":
139
  translated_text = translate_subtitles(srt_original, target_lang)
140
  translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
141
  with open(translated_srt, "w") as f:
142
- f.write(create_srt(translated_text)) # Re-parse translated text
143
 
144
- os.remove(audio_path)
145
  return original_srt, translated_srt
146
 
147
  except Exception as e:
148
  print(f"Processing error: {str(e)}")
149
  return None, None
 
 
 
150
 
151
  # Gradio Interface
152
  with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app:
 
6
  import logging
7
  import gradio as gr
8
  from datetime import timedelta
9
+ from pydub import AudioSegment
10
 
11
  # Suppress moviepy logs
12
  logging.getLogger("moviepy").setLevel(logging.ERROR)
13
 
14
  # Configure Gemini API
15
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
16
+ model = genai.GenerativeModel("gemini-2.0-flash-exp")
17
 
18
  # Supported languages
19
  SUPPORTED_LANGUAGES = [
 
23
  ]
24
 
25
  # Magic Prompts
26
+ TRANSCRIPTION_PROMPT = """Generate precise subtitles with accurate timestamps:
 
27
  1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format
28
  2. Each subtitle 3-7 words
29
  3. Include speaker changes
30
  4. Preserve emotional tone
31
+ 5. Example:
32
 
33
  [00:00:05.250 -> 00:00:08.100]
34
  Example subtitle text
35
 
36
  Return ONLY subtitles with timestamps."""
37
 
38
+ TRANSLATION_PROMPT = """Translate these subtitles to {target_language}:
 
39
  1. Keep timestamps identical
40
  2. Match text length to timing
41
  3. Preserve technical terms
 
46
 
47
  TRANSLATED:"""
48
 
49
+ def split_audio(audio_path, chunk_duration=60):
50
+ """Split audio into smaller chunks (default: 60 seconds)"""
51
+ audio = AudioSegment.from_wav(audio_path)
52
+ chunks = []
53
+
54
+ for i in range(0, len(audio), chunk_duration * 1000):
55
+ chunk = audio[i:i + chunk_duration * 1000]
56
+ chunk_path = os.path.join(tempfile.gettempdir(), f"chunk_{i//1000}.wav")
57
+ chunk.export(chunk_path, format="wav")
58
+ chunks.append(chunk_path)
59
+
60
+ return chunks
61
+
62
+ def process_audio_chunk(chunk_path, start_time):
63
+ """Transcribe a single audio chunk"""
64
+ try:
65
+ # Upload file using Gemini's File API
66
+ uploaded_file = genai.upload_file(path=chunk_path)
67
+
68
+ # Get transcription
69
+ response = model.generate_content(
70
+ [TRANSCRIPTION_PROMPT, uploaded_file]
71
+ )
72
+
73
+ # Adjust timestamps relative to chunk start
74
+ adjusted_transcription = []
75
+ for line in response.text.splitlines():
76
+ if '->' in line:
77
+ start, end = line.split('->')
78
+ adjusted_start = parse_timestamp(start.strip()) + start_time
79
+ adjusted_end = parse_timestamp(end.strip()) + start_time
80
+ adjusted_line = f"[{format_timestamp(adjusted_start)} -> {format_timestamp(adjusted_end)}]"
81
+ adjusted_transcription.append(adjusted_line)
82
+ else:
83
+ adjusted_transcription.append(line)
84
+
85
+ return "\n".join(adjusted_transcription)
86
+
87
+ finally:
88
+ os.remove(chunk_path)
89
+
90
  def parse_timestamp(timestamp_str):
91
+ """Flexible timestamp parser"""
92
  clean_ts = timestamp_str.strip("[] ").replace(',', '.')
93
  parts = clean_ts.split(':')
94
 
 
105
  seconds += float(seconds_part)
106
  return seconds
107
 
108
+ def format_timestamp(seconds):
109
+ """Convert seconds to SRT format"""
110
+ return str(timedelta(seconds=seconds)).replace('.', ',')
111
+
112
  def create_srt(subtitles_text):
113
+ """Convert raw transcription to SRT format"""
114
  entries = re.split(r'\n{2,}', subtitles_text.strip())
115
  srt_output = []
116
 
117
  for idx, entry in enumerate(entries, 1):
118
  try:
 
119
  time_match = re.search(
120
  r'\[?\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*->\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*\]?',
121
  entry
 
129
 
130
  srt_entry = (
131
  f"{idx}\n"
132
+ f"{format_timestamp(start_time)} --> {format_timestamp(end_time)}\n"
133
  f"{text}\n"
134
  )
135
  srt_output.append(srt_entry)
 
140
 
141
  return "\n".join(srt_output)
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  def process_video(video_path, source_lang, target_lang):
144
  """Complete processing pipeline"""
145
  try:
146
+ # Extract audio
147
  audio_path = extract_audio(video_path)
 
 
148
 
149
+ # Split into chunks
150
+ chunks = split_audio(audio_path)
151
+ full_transcription = []
152
+
153
+ # Process each chunk
154
+ for i, chunk_path in enumerate(chunks):
155
+ start_time = i * 60 # 60 seconds per chunk
156
+ chunk_transcription = process_audio_chunk(chunk_path, start_time)
157
+ full_transcription.append(chunk_transcription)
158
+
159
+ # Combine results
160
+ srt_original = create_srt("\n\n".join(full_transcription))
161
+
162
+ # Save original subtitles
163
  original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
164
  with open(original_srt, "w") as f:
165
  f.write(srt_original)
166
 
167
+ # Translate if needed
168
  translated_srt = None
169
  if target_lang != "None":
170
  translated_text = translate_subtitles(srt_original, target_lang)
171
  translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
172
  with open(translated_srt, "w") as f:
173
+ f.write(create_srt(translated_text))
174
 
 
175
  return original_srt, translated_srt
176
 
177
  except Exception as e:
178
  print(f"Processing error: {str(e)}")
179
  return None, None
180
+ finally:
181
+ if os.path.exists(audio_path):
182
+ os.remove(audio_path)
183
 
184
  # Gradio Interface
185
  with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app: