Spaces:

Athspi-ai
/

Audio-translation

Running

App Files Files Community

Athspi commited on Mar 11

Commit

5ddb059

verified ·

1 Parent(s): a14dcb8

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -78

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
-import os
-import hashlib
 import numpy as np
 from flask import Flask, request, jsonify, send_file, send_from_directory
 import google.generativeai as genai
@@ -9,19 +8,16 @@ import soundfile as sf
 from kokoro import KPipeline
 from werkzeug.utils import secure_filename
 from flask_cors import CORS
-from werkzeug.middleware.proxy_fix import ProxyFix
 app = Flask(__name__, static_folder='static')
-CORS(app, supports_credentials=True)
-app.config.update(
-    MAX_CONTENT_LENGTH=100 * 1024 * 1024,  # 100MB
-    SECRET_KEY=os.urandom(24),
-    SESSION_COOKIE_SAMESITE='Lax'
-)
-app.wsgi_app = ProxyFix(app.wsgi_app)
 # Configure Gemini API
-genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 # Language configurations
 KOKORO_LANGUAGES = {
@@ -38,75 +34,77 @@ KOKORO_LANGUAGES = {
 GTTS_LANGUAGES = lang.tts_langs()
 GTTS_LANGUAGES['ja'] = 'Japanese'
-SUPPORTED_LANGUAGES = sorted(list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))))
-@app.route('/')
-def serve_index():
-    return send_from_directory(app.static_folder, 'index.html')
-@app.route('/languages')
-def get_languages():
-    return jsonify(SUPPORTED_LANGUAGES)
-@app.route('/upload-chunk', methods=['POST'])
-def upload_chunk():
-    try:
-        file = request.files['file']
-        chunk_index = int(request.form['chunkIndex'])
-        total_chunks = int(request.form['totalChunks'])
-        file_hash = request.form['fileHash']
-        # Save chunk to temp directory
-        chunk_dir = os.path.join(tempfile.gettempdir(), file_hash)
-        os.makedirs(chunk_dir, exist_ok=True)
-        chunk_path = os.path.join(chunk_dir, f"{chunk_index:04d}")
-        file.save(chunk_path)
-        return jsonify({'status': 'success', 'received': chunk_index})
-    except Exception as e:
-        return jsonify({'error': str(e)}), 500
-@app.route('/process-file', methods=['POST'])
-def process_file():
     try:
-        file_hash = request.json['fileHash']
-        target_language = request.json['language']
-        chunk_dir = os.path.join(tempfile.gettempdir(), file_hash)
-        # Reassemble file
-        final_path = os.path.join(tempfile.gettempdir(), file_hash + ".wav")
-        with open(final_path, 'wb') as output_file:
-            for chunk_name in sorted(os.listdir(chunk_dir)):
-                with open(os.path.join(chunk_dir, chunk_name), 'rb') as chunk_file:
-                    output_file.write(chunk_file.read())
-        # Process file
-        result = process_audio(final_path, target_language)
-        # Cleanup
-        os.remove(final_path)
-        for f in os.listdir(chunk_dir):
-            os.remove(os.path.join(chunk_dir, f))
-        os.rmdir(chunk_dir)
-        return jsonify(result)
     except Exception as e:
-        return jsonify({'error': str(e)}), 500
-def process_audio(file_path, target_language):
-    # Transcribe using Gemini
-    model = genai.GenerativeModel("gemini-2.0-flash")
-    uploaded_file = genai.upload_file(path=file_path)
     try:
-        response = model.generate_content(["Transcribe this audio file:", uploaded_file])
-        transcription = response.text.strip()
-        # Translate
-        prompt = f"Translate to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
         response = model.generate_content(prompt)
         translated_text = response.text.strip()
@@ -129,20 +127,33 @@ def process_audio(file_path, target_language):
             else:
                 raise ValueError("No audio generated by Kokoro")
         else:
-            # Fallback to gTTS
             lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
             tts = gTTS(translated_text, lang=lang_code)
             _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
             tts.save(temp_output_path)
-        return {
             'transcription': transcription,
             'translation': translated_text,
             'audio_url': f'/download/{os.path.basename(temp_output_path)}'
-        }
     finally:
-        uploaded_file.delete()
 @app.route('/download/<filename>')
 def download_file(filename):

+ import os
 import numpy as np
 from flask import Flask, request, jsonify, send_file, send_from_directory
 import google.generativeai as genai
 from kokoro import KPipeline
 from werkzeug.utils import secure_filename
 from flask_cors import CORS
 app = Flask(__name__, static_folder='static')
+CORS(app)
+app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024  # 50MB limit
 # Configure Gemini API
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+if not GEMINI_API_KEY:
+    raise ValueError("GEMINI_API_KEY environment variable not set")
+genai.configure(api_key=GEMINI_API_KEY)
 # Language configurations
 KOKORO_LANGUAGES = {
 GTTS_LANGUAGES = lang.tts_langs()
 GTTS_LANGUAGES['ja'] = 'Japanese'
+SUPPORTED_LANGUAGES = sorted(
+    list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))
+)
+def upload_large_file(file_path):
+    """Handle large file uploads with chunking"""
     try:
+        return genai.upload_file(path=file_path)
     except Exception as e:
+        if "payload size exceeds" in str(e).lower():
+            # Chunking strategy for large files
+            chunk_size = 20 * 1024 * 1024  # 20MB chunks
+            file_parts = []
+            with open(file_path, 'rb') as f:
+                i = 0
+                while chunk := f.read(chunk_size):
+                    part_path = f"{file_path}_part{i}"
+                    with open(part_path, 'wb') as part_file:
+                        part_file.write(chunk)
+                    part = genai.upload_file(path=part_path)
+                    file_parts.append(part)
+                    os.remove(part_path)
+                    i += 1
+            return file_parts
+        raise
+@app.route('/translate', methods=['POST'])
+def translate_audio():
+    temp_input_path = None
+    uploaded_file = None
     try:
+        if 'audio' not in request.files:
+            return jsonify({'error': 'No audio file uploaded'}), 400
+        audio_file = request.files['audio']
+        target_language = request.form.get('language', 'English')
+        if not audio_file or audio_file.filename == '':
+            return jsonify({'error': 'Invalid audio file'}), 400
+        # Save to temp file
+        temp_input_path = os.path.join(tempfile.gettempdir(), secure_filename(audio_file.filename))
+        audio_file.save(temp_input_path)
+        # Upload using File API
+        uploaded_file = upload_large_file(temp_input_path)
+        # Get transcription
+        model = genai.GenerativeModel("gemini-2.0-flash")
+        if isinstance(uploaded_file, list):
+            # Handle chunked files
+            transcripts = []
+            for chunk in uploaded_file:
+                response = model.generate_content(["Transcribe this audio chunk:", chunk])
+                transcripts.append(response.text)
+                chunk.delete()  # Clean up each chunk
+            transcription = " ".join(transcripts)
+        else:
+            response = model.generate_content(["Transcribe this audio file:", uploaded_file])
+            transcription = response.text
+        # Clean up main file
+        if uploaded_file and not isinstance(uploaded_file, list):
+            uploaded_file.delete()
+        # Translate text using Gemini
+        prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
         response = model.generate_content(prompt)
         translated_text = response.text.strip()
             else:
                 raise ValueError("No audio generated by Kokoro")
         else:
+            # Standard gTTS handling
             lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
             tts = gTTS(translated_text, lang=lang_code)
             _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
             tts.save(temp_output_path)
+        return jsonify({
             'transcription': transcription,
             'translation': translated_text,
             'audio_url': f'/download/{os.path.basename(temp_output_path)}'
+        })
+    except Exception as e:
+        # Cleanup resources on error
+        if uploaded_file:
+            if isinstance(uploaded_file, list):
+                for f in uploaded_file:
+                    f.delete()
+            else:
+                uploaded_file.delete()
+        if temp_input_path and os.path.exists(temp_input_path):
+            os.remove(temp_input_path)
+        app.logger.error(f"Error processing request: {str(e)}")
+        return jsonify({'error': str(e)}), 500
     finally:
+        if temp_input_path and os.path.exists(temp_input_path):
+            os.remove(temp_input_path)
 @app.route('/download/<filename>')
 def download_file(filename):