Athspi commited on
Commit
dbed07a
·
verified ·
1 Parent(s): 4ee4d7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -83
app.py CHANGED
@@ -11,7 +11,6 @@ from flask_cors import CORS
11
 
12
  app = Flask(__name__, static_folder='static')
13
  CORS(app)
14
- app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB limit
15
 
16
  # Configure Gemini API
17
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
@@ -32,124 +31,91 @@ KOKORO_LANGUAGES = {
32
  }
33
 
34
  GTTS_LANGUAGES = lang.tts_langs()
35
- GTTS_LANGUAGES['ja'] = 'Japanese'
36
 
37
- SUPPORTED_LANGUAGES = sorted(list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))))
 
 
38
 
39
- MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB Gemini limit
40
- CHUNK_SIZE = 20 * 1024 * 1024 # 20MB chunks
 
41
 
42
- def process_large_audio(file_path):
43
- """Process large audio files in chunks"""
44
- try:
45
- file_size = os.path.getsize(file_path)
46
- if file_size <= MAX_FILE_SIZE:
47
- # Process small files normally
48
- uploaded_file = genai.upload_file(file_path)
49
- return [uploaded_file]
50
-
51
- # Split large files into chunks
52
- chunks = []
53
- with open(file_path, 'rb') as f:
54
- chunk_num = 0
55
- while chunk_data := f.read(CHUNK_SIZE):
56
- chunk_path = f"{file_path}_chunk_{chunk_num}"
57
- with open(chunk_path, 'wb') as chunk_file:
58
- chunk_file.write(chunk_data)
59
- chunks.append(genai.upload_file(chunk_path))
60
- chunk_num += 1
61
- return chunks
62
- except Exception as e:
63
- raise RuntimeError(f"File processing failed: {str(e)}")
64
-
65
- def cleanup_files(file_path, chunks):
66
- """Cleanup temporary files and uploaded chunks"""
67
- try:
68
- if os.path.exists(file_path):
69
- os.remove(file_path)
70
- for chunk in chunks:
71
- if os.path.exists(chunk.name):
72
- os.remove(chunk.name)
73
- chunk.delete()
74
- except Exception as e:
75
- app.logger.error(f"Cleanup error: {str(e)}")
76
 
77
  @app.route('/translate', methods=['POST'])
78
  def translate_audio():
79
- temp_path = None
80
- uploaded_chunks = []
81
-
82
  try:
83
  if 'audio' not in request.files:
84
  return jsonify({'error': 'No audio file uploaded'}), 400
85
-
86
  audio_file = request.files['audio']
87
  target_language = request.form.get('language', 'English')
88
-
89
  if not audio_file or audio_file.filename == '':
90
  return jsonify({'error': 'Invalid audio file'}), 400
91
 
92
- # Save to temp file
93
- temp_path = os.path.join(tempfile.gettempdir(), secure_filename(audio_file.filename))
94
- audio_file.save(temp_path)
 
95
 
96
- # Process file in chunks if needed
97
- uploaded_chunks = process_large_audio(temp_path)
98
-
99
- # Transcribe chunks
100
  model = genai.GenerativeModel("gemini-2.0-flash")
101
- transcripts = []
102
 
103
- for chunk in uploaded_chunks:
104
- response = model.generate_content(
105
- ["Transcribe this audio chunk verbatim. Respond only with the transcription:", chunk]
106
- )
107
- transcripts.append(response.text.strip())
108
- chunk.delete()
109
-
110
- transcription = " ".join(transcripts)
111
-
112
- # Translation
113
- prompt = f"Translate to {target_language} preserving meaning:\n\n{transcription}"
 
 
 
114
  response = model.generate_content(prompt)
115
  translated_text = response.text.strip()
116
-
117
- # TTS Generation
118
  if target_language in KOKORO_LANGUAGES:
119
- # Kokoro processing
120
  lang_code = KOKORO_LANGUAGES[target_language]
121
  pipeline = KPipeline(lang_code=lang_code)
122
  generator = pipeline(translated_text, voice="af_heart", speed=1)
123
 
 
124
  audio_segments = []
125
  for _, _, audio in generator:
126
  if audio is not None:
127
  audio_segments.append(audio)
128
 
129
- if not audio_segments:
 
 
 
 
130
  raise ValueError("No audio generated by Kokoro")
131
-
132
- audio_data = np.concatenate(audio_segments)
133
- _, output_path = tempfile.mkstemp(suffix=".wav")
134
- sf.write(output_path, audio_data, 24000)
135
  else:
136
- # gTTS processing
137
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
138
  tts = gTTS(translated_text, lang=lang_code)
139
- _, output_path = tempfile.mkstemp(suffix=".mp3")
140
- tts.save(output_path)
141
-
142
  return jsonify({
143
  'transcription': transcription,
144
  'translation': translated_text,
145
- 'audio_url': f'/download/{os.path.basename(output_path)}'
146
  })
147
-
148
  except Exception as e:
149
- app.logger.error(f"Processing error: {str(e)}")
150
  return jsonify({'error': str(e)}), 500
151
- finally:
152
- cleanup_files(temp_path, uploaded_chunks)
153
 
154
  @app.route('/download/<filename>')
155
  def download_file(filename):
@@ -160,8 +126,8 @@ def download_file(filename):
160
  as_attachment=True,
161
  download_name=f"translated_{filename}"
162
  )
163
- except Exception as e:
164
- return jsonify({'error': str(e)}), 404
165
 
166
  if __name__ == '__main__':
167
- app.run(host="0.0.0.0", port=7860)
 
11
 
12
  app = Flask(__name__, static_folder='static')
13
  CORS(app)
 
14
 
15
  # Configure Gemini API
16
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 
31
  }
32
 
33
  GTTS_LANGUAGES = lang.tts_langs()
34
+ GTTS_LANGUAGES['ja'] = 'Japanese' # Explicit Japanese support
35
 
36
+ SUPPORTED_LANGUAGES = sorted(
37
+ list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values())))
38
+ )
39
 
40
+ @app.route('/')
41
+ def serve_index():
42
+ return send_from_directory(app.static_folder, 'index.html')
43
 
44
+ @app.route('/languages')
45
+ def get_languages():
46
+ return jsonify(SUPPORTED_LANGUAGES)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  @app.route('/translate', methods=['POST'])
49
  def translate_audio():
 
 
 
50
  try:
51
  if 'audio' not in request.files:
52
  return jsonify({'error': 'No audio file uploaded'}), 400
53
+
54
  audio_file = request.files['audio']
55
  target_language = request.form.get('language', 'English')
56
+
57
  if not audio_file or audio_file.filename == '':
58
  return jsonify({'error': 'Invalid audio file'}), 400
59
 
60
+ # Validate MIME type
61
+ allowed_mime_types = ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']
62
+ if audio_file.mimetype not in allowed_mime_types:
63
+ return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
64
 
65
+ # Transcribe audio using Gemini
 
 
 
66
  model = genai.GenerativeModel("gemini-2.0-flash")
 
67
 
68
+ # Create proper audio blob
69
+ audio_blob = {
70
+ 'mime_type': audio_file.mimetype,
71
+ 'data': audio_file.read()
72
+ }
73
+
74
+ # Get transcription
75
+ convo = model.start_chat()
76
+ convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
77
+ response = convo.send_message(audio_blob)
78
+ transcription = response.text.strip()
79
+
80
+ # Translate text using Gemini
81
+ prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
82
  response = model.generate_content(prompt)
83
  translated_text = response.text.strip()
84
+
85
+ # Generate TTS
86
  if target_language in KOKORO_LANGUAGES:
 
87
  lang_code = KOKORO_LANGUAGES[target_language]
88
  pipeline = KPipeline(lang_code=lang_code)
89
  generator = pipeline(translated_text, voice="af_heart", speed=1)
90
 
91
+ # Collect all audio segments
92
  audio_segments = []
93
  for _, _, audio in generator:
94
  if audio is not None:
95
  audio_segments.append(audio)
96
 
97
+ if audio_segments:
98
+ audio_data = np.concatenate(audio_segments)
99
+ _, temp_output_path = tempfile.mkstemp(suffix=".wav")
100
+ sf.write(temp_output_path, audio_data, 24000)
101
+ else:
102
  raise ValueError("No audio generated by Kokoro")
 
 
 
 
103
  else:
104
+ # Standard gTTS handling
105
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
106
  tts = gTTS(translated_text, lang=lang_code)
107
+ _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
108
+ tts.save(temp_output_path)
109
+
110
  return jsonify({
111
  'transcription': transcription,
112
  'translation': translated_text,
113
+ 'audio_url': f'/download/{os.path.basename(temp_output_path)}'
114
  })
115
+
116
  except Exception as e:
117
+ app.logger.error(f"Error processing request: {str(e)}")
118
  return jsonify({'error': str(e)}), 500
 
 
119
 
120
  @app.route('/download/<filename>')
121
  def download_file(filename):
 
126
  as_attachment=True,
127
  download_name=f"translated_{filename}"
128
  )
129
+ except FileNotFoundError:
130
+ return jsonify({'error': 'File not found'}), 404
131
 
132
  if __name__ == '__main__':
133
+ app.run(host='0.0.0.0', port=5000, debug=True)