Spaces:

Athspi-ai
/

Audio-translation

Running

App Files Files Community

Athspi commited on Mar 12

Commit

63a0fca

verified ·

1 Parent(s): d060ce1

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -77

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from flask_cors import CORS
 app = Flask(__name__, static_folder='static')
 CORS(app)
-app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024  # 50MB limit
 # Configure Gemini API
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
@@ -34,36 +34,52 @@ KOKORO_LANGUAGES = {
 GTTS_LANGUAGES = lang.tts_langs()
 GTTS_LANGUAGES['ja'] = 'Japanese'
-SUPPORTED_LANGUAGES = sorted(list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))))
-def upload_large_file(file_path):
-    """Handle large file uploads with chunking"""
     try:
-        return genai.upload_file(path=file_path)
     except Exception as e:
-        if "payload size exceeds" in str(e).lower():
-            # Chunking strategy for large files
-            chunk_size = 20 * 1024 * 1024  # 20MB chunks
-            file_parts = []
-            with open(file_path, 'rb') as f:
-                i = 0
-                while chunk := f.read(chunk_size):
-                    part_path = f"{file_path}_part{i}"
-                    with open(part_path, 'wb') as part_file:
-                        part_file.write(chunk)
-                    part = genai.upload_file(path=part_path)
-                    file_parts.append(part)
-                    os.remove(part_path)
-                    i += 1
-            return file_parts
-        raise
 @app.route('/translate', methods=['POST'])
 def translate_audio():
-    temp_input_path = None
-    uploaded_file = None
     try:
         if 'audio' not in request.files:
@@ -76,82 +92,66 @@ def translate_audio():
             return jsonify({'error': 'Invalid audio file'}), 400
         # Save to temp file
-        temp_input_path = os.path.join(tempfile.gettempdir(), secure_filename(audio_file.filename))
-        audio_file.save(temp_input_path)
-        # Upload using File API
-        uploaded_file = upload_large_file(temp_input_path)
-        # Get transcription
-        model = genai.GenerativeModel("gemini-2.0-flash-lite")
-        if isinstance(uploaded_file, list):
-            # Handle chunked files
-            transcripts = []
-            for chunk in uploaded_file:
-                response = model.generate_content(["Transcribe this audio chunk:", chunk])
-                transcripts.append(response.text)
-                chunk.delete()  # Clean up each chunk
-            transcription = " ".join(transcripts)
-        else:
-            response = model.generate_content(["Transcribe this audio file:", uploaded_file])
-            transcription = response.text
-        # Clean up main file
-        if uploaded_file and not isinstance(uploaded_file, list):
-            uploaded_file.delete()
-        # Translate text using Gemini
-        prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
         response = model.generate_content(prompt)
         translated_text = response.text.strip()
-        # Generate TTS
         if target_language in KOKORO_LANGUAGES:
             lang_code = KOKORO_LANGUAGES[target_language]
             pipeline = KPipeline(lang_code=lang_code)
             generator = pipeline(translated_text, voice="af_heart", speed=1)
-            # Collect all audio segments
             audio_segments = []
             for _, _, audio in generator:
                 if audio is not None:
                     audio_segments.append(audio)
-            if audio_segments:
-                audio_data = np.concatenate(audio_segments)
-                _, temp_output_path = tempfile.mkstemp(suffix=".wav")
-                sf.write(temp_output_path, audio_data, 24000)
-            else:
                 raise ValueError("No audio generated by Kokoro")
         else:
-            # Standard gTTS handling
             lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
             tts = gTTS(translated_text, lang=lang_code)
-            _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
-            tts.save(temp_output_path)
         return jsonify({
             'transcription': transcription,
             'translation': translated_text,
-            'audio_url': f'/download/{os.path.basename(temp_output_path)}'
         })
     except Exception as e:
-        # Cleanup resources on error
-        if uploaded_file:
-            if isinstance(uploaded_file, list):
-                for f in uploaded_file:
-                    f.delete()
-            else:
-                uploaded_file.delete()
-        if temp_input_path and os.path.exists(temp_input_path):
-            os.remove(temp_input_path)
-        app.logger.error(f"Error processing request: {str(e)}")
         return jsonify({'error': str(e)}), 500
     finally:
-        if temp_input_path and os.path.exists(temp_input_path):
-            os.remove(temp_input_path)
 @app.route('/download/<filename>')
 def download_file(filename):
@@ -162,8 +162,8 @@ def download_file(filename):
             as_attachment=True,
             download_name=f"translated_{filename}"
         )
-    except FileNotFoundError:
-        return jsonify({'error': 'File not found'}), 404
 if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=7820)

 app = Flask(__name__, static_folder='static')
 CORS(app)
+app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 100MB limit
 # Configure Gemini API
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 GTTS_LANGUAGES = lang.tts_langs()
 GTTS_LANGUAGES['ja'] = 'Japanese'
+SUPPORTED_LANGUAGES = sorted(
+    list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))
+)
+MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB Gemini limit
+CHUNK_SIZE = 20 * 1024 * 1024  # 20MB chunks
+def process_large_audio(file_path):
+    """Process large audio files in chunks"""
     try:
+        file_size = os.path.getsize(file_path)
+        if file_size <= MAX_FILE_SIZE:
+            # Process small files normally
+            uploaded_file = genai.upload_file(file_path)
+            return [uploaded_file]
+        # Split large files into chunks
+        chunks = []
+        with open(file_path, 'rb') as f:
+            chunk_num = 0
+            while chunk_data := f.read(CHUNK_SIZE):
+                chunk_path = f"{file_path}_chunk_{chunk_num}"
+                with open(chunk_path, 'wb') as chunk_file:
+                    chunk_file.write(chunk_data)
+                chunks.append(genai.upload_file(chunk_path))
+                chunk_num += 1
+        return chunks
     except Exception as e:
+        raise RuntimeError(f"File processing failed: {str(e)}")
+def cleanup_files(file_path, chunks):
+    """Cleanup temporary files and uploaded chunks"""
+    try:
+        if os.path.exists(file_path):
+            os.remove(file_path)
+        for chunk in chunks:
+            if os.path.exists(chunk.name):
+                os.remove(chunk.name)
+            chunk.delete()
+    except Exception as e:
+        app.logger.error(f"Cleanup error: {str(e)}")
 @app.route('/translate', methods=['POST'])
 def translate_audio():
+    temp_path = None
+    uploaded_chunks = []
     try:
         if 'audio' not in request.files:
             return jsonify({'error': 'Invalid audio file'}), 400
         # Save to temp file
+        temp_path = os.path.join(tempfile.gettempdir(), secure_filename(audio_file.filename))
+        audio_file.save(temp_path)
+        # Process file in chunks if needed
+        uploaded_chunks = process_large_audio(temp_path)
+        # Transcribe chunks
+        model = genai.GenerativeModel("gemini-2.0-flash")
+        transcripts = []
+        for chunk in uploaded_chunks:
+            response = model.generate_content(
+                ["Transcribe this audio chunk verbatim. Respond only with the transcription:", chunk]
+            )
+            transcripts.append(response.text.strip())
+            chunk.delete()
+        transcription = " ".join(transcripts)
+        # Translation
+        prompt = f"Translate to {target_language} preserving meaning:\n\n{transcription}"
         response = model.generate_content(prompt)
         translated_text = response.text.strip()
+        # TTS Generation
         if target_language in KOKORO_LANGUAGES:
+            # Kokoro processing
             lang_code = KOKORO_LANGUAGES[target_language]
             pipeline = KPipeline(lang_code=lang_code)
             generator = pipeline(translated_text, voice="af_heart", speed=1)
             audio_segments = []
             for _, _, audio in generator:
                 if audio is not None:
                     audio_segments.append(audio)
+            if not audio_segments:
                 raise ValueError("No audio generated by Kokoro")
+            audio_data = np.concatenate(audio_segments)
+            _, output_path = tempfile.mkstemp(suffix=".wav")
+            sf.write(output_path, audio_data, 24000)
         else:
+            # gTTS processing
             lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
             tts = gTTS(translated_text, lang=lang_code)
+            _, output_path = tempfile.mkstemp(suffix=".mp3")
+            tts.save(output_path)
         return jsonify({
             'transcription': transcription,
             'translation': translated_text,
+            'audio_url': f'/download/{os.path.basename(output_path)}'
         })
     except Exception as e:
+        app.logger.error(f"Processing error: {str(e)}")
         return jsonify({'error': str(e)}), 500
     finally:
+        cleanup_files(temp_path, uploaded_chunks)
 @app.route('/download/<filename>')
 def download_file(filename):
             as_attachment=True,
             download_name=f"translated_{filename}"
         )
+    except Exception as e:
+        return jsonify({'error': str(e)}), 404
 if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000, debug=True)