Athspi commited on
Commit
63a0fca
·
verified ·
1 Parent(s): d060ce1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -77
app.py CHANGED
@@ -11,7 +11,7 @@ from flask_cors import CORS
11
 
12
  app = Flask(__name__, static_folder='static')
13
  CORS(app)
14
- app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB limit
15
 
16
  # Configure Gemini API
17
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
@@ -34,36 +34,52 @@ KOKORO_LANGUAGES = {
34
  GTTS_LANGUAGES = lang.tts_langs()
35
  GTTS_LANGUAGES['ja'] = 'Japanese'
36
 
37
- SUPPORTED_LANGUAGES = sorted(list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))))
 
 
38
 
39
- def upload_large_file(file_path):
40
- """Handle large file uploads with chunking"""
 
 
 
41
  try:
42
- return genai.upload_file(path=file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  except Exception as e:
44
- if "payload size exceeds" in str(e).lower():
45
- # Chunking strategy for large files
46
- chunk_size = 20 * 1024 * 1024 # 20MB chunks
47
- file_parts = []
48
-
49
- with open(file_path, 'rb') as f:
50
- i = 0
51
- while chunk := f.read(chunk_size):
52
- part_path = f"{file_path}_part{i}"
53
- with open(part_path, 'wb') as part_file:
54
- part_file.write(chunk)
55
- part = genai.upload_file(path=part_path)
56
- file_parts.append(part)
57
- os.remove(part_path)
58
- i += 1
59
-
60
- return file_parts
61
- raise
62
 
63
  @app.route('/translate', methods=['POST'])
64
  def translate_audio():
65
- temp_input_path = None
66
- uploaded_file = None
67
 
68
  try:
69
  if 'audio' not in request.files:
@@ -76,82 +92,66 @@ def translate_audio():
76
  return jsonify({'error': 'Invalid audio file'}), 400
77
 
78
  # Save to temp file
79
- temp_input_path = os.path.join(tempfile.gettempdir(), secure_filename(audio_file.filename))
80
- audio_file.save(temp_input_path)
81
 
82
- # Upload using File API
83
- uploaded_file = upload_large_file(temp_input_path)
84
-
85
- # Get transcription
86
- model = genai.GenerativeModel("gemini-2.0-flash-lite")
87
 
88
- if isinstance(uploaded_file, list):
89
- # Handle chunked files
90
- transcripts = []
91
- for chunk in uploaded_file:
92
- response = model.generate_content(["Transcribe this audio chunk:", chunk])
93
- transcripts.append(response.text)
94
- chunk.delete() # Clean up each chunk
95
- transcription = " ".join(transcripts)
96
- else:
97
- response = model.generate_content(["Transcribe this audio file:", uploaded_file])
98
- transcription = response.text
99
 
100
- # Clean up main file
101
- if uploaded_file and not isinstance(uploaded_file, list):
102
- uploaded_file.delete()
103
 
104
- # Translate text using Gemini
105
- prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
106
  response = model.generate_content(prompt)
107
  translated_text = response.text.strip()
108
-
109
- # Generate TTS
110
  if target_language in KOKORO_LANGUAGES:
 
111
  lang_code = KOKORO_LANGUAGES[target_language]
112
  pipeline = KPipeline(lang_code=lang_code)
113
  generator = pipeline(translated_text, voice="af_heart", speed=1)
114
 
115
- # Collect all audio segments
116
  audio_segments = []
117
  for _, _, audio in generator:
118
  if audio is not None:
119
  audio_segments.append(audio)
120
 
121
- if audio_segments:
122
- audio_data = np.concatenate(audio_segments)
123
- _, temp_output_path = tempfile.mkstemp(suffix=".wav")
124
- sf.write(temp_output_path, audio_data, 24000)
125
- else:
126
  raise ValueError("No audio generated by Kokoro")
 
 
 
 
127
  else:
128
- # Standard gTTS handling
129
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
130
  tts = gTTS(translated_text, lang=lang_code)
131
- _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
132
- tts.save(temp_output_path)
133
-
134
  return jsonify({
135
  'transcription': transcription,
136
  'translation': translated_text,
137
- 'audio_url': f'/download/{os.path.basename(temp_output_path)}'
138
  })
139
-
140
  except Exception as e:
141
- # Cleanup resources on error
142
- if uploaded_file:
143
- if isinstance(uploaded_file, list):
144
- for f in uploaded_file:
145
- f.delete()
146
- else:
147
- uploaded_file.delete()
148
- if temp_input_path and os.path.exists(temp_input_path):
149
- os.remove(temp_input_path)
150
- app.logger.error(f"Error processing request: {str(e)}")
151
  return jsonify({'error': str(e)}), 500
152
  finally:
153
- if temp_input_path and os.path.exists(temp_input_path):
154
- os.remove(temp_input_path)
155
 
156
  @app.route('/download/<filename>')
157
  def download_file(filename):
@@ -162,8 +162,8 @@ def download_file(filename):
162
  as_attachment=True,
163
  download_name=f"translated_{filename}"
164
  )
165
- except FileNotFoundError:
166
- return jsonify({'error': 'File not found'}), 404
167
 
168
  if __name__ == '__main__':
169
- app.run(host="0.0.0.0", port=7820)
 
11
 
12
  app = Flask(__name__, static_folder='static')
13
  CORS(app)
14
+ app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB limit
15
 
16
  # Configure Gemini API
17
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 
34
  GTTS_LANGUAGES = lang.tts_langs()
35
  GTTS_LANGUAGES['ja'] = 'Japanese'
36
 
37
+ SUPPORTED_LANGUAGES = sorted(
38
+ list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))
39
+ )
40
 
41
+ MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB Gemini limit
42
+ CHUNK_SIZE = 20 * 1024 * 1024 # 20MB chunks
43
+
44
+ def process_large_audio(file_path):
45
+ """Process large audio files in chunks"""
46
  try:
47
+ file_size = os.path.getsize(file_path)
48
+ if file_size <= MAX_FILE_SIZE:
49
+ # Process small files normally
50
+ uploaded_file = genai.upload_file(file_path)
51
+ return [uploaded_file]
52
+
53
+ # Split large files into chunks
54
+ chunks = []
55
+ with open(file_path, 'rb') as f:
56
+ chunk_num = 0
57
+ while chunk_data := f.read(CHUNK_SIZE):
58
+ chunk_path = f"{file_path}_chunk_{chunk_num}"
59
+ with open(chunk_path, 'wb') as chunk_file:
60
+ chunk_file.write(chunk_data)
61
+ chunks.append(genai.upload_file(chunk_path))
62
+ chunk_num += 1
63
+ return chunks
64
  except Exception as e:
65
+ raise RuntimeError(f"File processing failed: {str(e)}")
66
+
67
+ def cleanup_files(file_path, chunks):
68
+ """Cleanup temporary files and uploaded chunks"""
69
+ try:
70
+ if os.path.exists(file_path):
71
+ os.remove(file_path)
72
+ for chunk in chunks:
73
+ if os.path.exists(chunk.name):
74
+ os.remove(chunk.name)
75
+ chunk.delete()
76
+ except Exception as e:
77
+ app.logger.error(f"Cleanup error: {str(e)}")
 
 
 
 
 
78
 
79
  @app.route('/translate', methods=['POST'])
80
  def translate_audio():
81
+ temp_path = None
82
+ uploaded_chunks = []
83
 
84
  try:
85
  if 'audio' not in request.files:
 
92
  return jsonify({'error': 'Invalid audio file'}), 400
93
 
94
  # Save to temp file
95
+ temp_path = os.path.join(tempfile.gettempdir(), secure_filename(audio_file.filename))
96
+ audio_file.save(temp_path)
97
 
98
+ # Process file in chunks if needed
99
+ uploaded_chunks = process_large_audio(temp_path)
 
 
 
100
 
101
+ # Transcribe chunks
102
+ model = genai.GenerativeModel("gemini-2.0-flash")
103
+ transcripts = []
104
+
105
+ for chunk in uploaded_chunks:
106
+ response = model.generate_content(
107
+ ["Transcribe this audio chunk verbatim. Respond only with the transcription:", chunk]
108
+ )
109
+ transcripts.append(response.text.strip())
110
+ chunk.delete()
 
111
 
112
+ transcription = " ".join(transcripts)
 
 
113
 
114
+ # Translation
115
+ prompt = f"Translate to {target_language} preserving meaning:\n\n{transcription}"
116
  response = model.generate_content(prompt)
117
  translated_text = response.text.strip()
118
+
119
+ # TTS Generation
120
  if target_language in KOKORO_LANGUAGES:
121
+ # Kokoro processing
122
  lang_code = KOKORO_LANGUAGES[target_language]
123
  pipeline = KPipeline(lang_code=lang_code)
124
  generator = pipeline(translated_text, voice="af_heart", speed=1)
125
 
 
126
  audio_segments = []
127
  for _, _, audio in generator:
128
  if audio is not None:
129
  audio_segments.append(audio)
130
 
131
+ if not audio_segments:
 
 
 
 
132
  raise ValueError("No audio generated by Kokoro")
133
+
134
+ audio_data = np.concatenate(audio_segments)
135
+ _, output_path = tempfile.mkstemp(suffix=".wav")
136
+ sf.write(output_path, audio_data, 24000)
137
  else:
138
+ # gTTS processing
139
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
140
  tts = gTTS(translated_text, lang=lang_code)
141
+ _, output_path = tempfile.mkstemp(suffix=".mp3")
142
+ tts.save(output_path)
143
+
144
  return jsonify({
145
  'transcription': transcription,
146
  'translation': translated_text,
147
+ 'audio_url': f'/download/{os.path.basename(output_path)}'
148
  })
149
+
150
  except Exception as e:
151
+ app.logger.error(f"Processing error: {str(e)}")
 
 
 
 
 
 
 
 
 
152
  return jsonify({'error': str(e)}), 500
153
  finally:
154
+ cleanup_files(temp_path, uploaded_chunks)
 
155
 
156
  @app.route('/download/<filename>')
157
  def download_file(filename):
 
162
  as_attachment=True,
163
  download_name=f"translated_{filename}"
164
  )
165
+ except Exception as e:
166
+ return jsonify({'error': str(e)}), 404
167
 
168
  if __name__ == '__main__':
169
+ app.run(host='0.0.0.0', port=5000, debug=True)