Spaces:

Athspi-ai
/

Audio-translation

Running

App Files Files Community

Athspi commited on Mar 11

Commit

6c131f6

verified ·

1 Parent(s): a5c5569

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -42

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import numpy as np
 from flask import Flask, request, jsonify, send_file, send_from_directory
 import google.generativeai as genai
@@ -8,15 +9,19 @@ import soundfile as sf
 from kokoro import KPipeline
 from werkzeug.utils import secure_filename
 from flask_cors import CORS
 app = Flask(__name__, static_folder='static')
-CORS(app)
 # Configure Gemini API
-GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
-if not GEMINI_API_KEY:
-    raise ValueError("GEMINI_API_KEY environment variable not set")
-genai.configure(api_key=GEMINI_API_KEY)
 # Language configurations
 KOKORO_LANGUAGES = {
@@ -31,10 +36,10 @@ KOKORO_LANGUAGES = {
 }
 GTTS_LANGUAGES = lang.tts_langs()
-GTTS_LANGUAGES['ja'] = 'Japanese'  # Explicit Japanese support
 SUPPORTED_LANGUAGES = sorted(
-    list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values())))
 )
 @app.route('/')
@@ -45,40 +50,64 @@ def serve_index():
 def get_languages():
     return jsonify(SUPPORTED_LANGUAGES)
-@app.route('/translate', methods=['POST'])
-def translate_audio():
     try:
-        if 'audio' not in request.files:
-            return jsonify({'error': 'No audio file uploaded'}), 400
-        audio_file = request.files['audio']
-        target_language = request.form.get('language', 'English')
-        if not audio_file or audio_file.filename == '':
-            return jsonify({'error': 'Invalid audio file'}), 400
-        # Validate MIME type
-        allowed_mime_types = ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']
-        if audio_file.mimetype not in allowed_mime_types:
-            return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
-        # Transcribe audio using Gemini
-        model = genai.GenerativeModel("gemini-2.0-flash-lite")
-        # Create proper audio blob
-        audio_blob = {
-            'mime_type': audio_file.mimetype,
-            'data': audio_file.read()
-        }
-        # Get transcription
-        convo = model.start_chat()
-        convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
-        response = convo.send_message(audio_blob)
         transcription = response.text.strip()
-        # Translate text using Gemini
-        prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
         response = model.generate_content(prompt)
         translated_text = response.text.strip()
@@ -101,21 +130,20 @@ def translate_audio():
             else:
                 raise ValueError("No audio generated by Kokoro")
         else:
-            # Standard gTTS handling
             lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
             tts = gTTS(translated_text, lang=lang_code)
             _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
             tts.save(temp_output_path)
-        return jsonify({
             'transcription': transcription,
             'translation': translated_text,
             'audio_url': f'/download/{os.path.basename(temp_output_path)}'
-        })
-    except Exception as e:
-        app.logger.error(f"Error processing request: {str(e)}")
-        return jsonify({'error': str(e)}), 500
 @app.route('/download/<filename>')
 def download_file(filename):

 import os
+import hashlib
 import numpy as np
 from flask import Flask, request, jsonify, send_file, send_from_directory
 import google.generativeai as genai
 from kokoro import KPipeline
 from werkzeug.utils import secure_filename
 from flask_cors import CORS
+from werkzeug.middleware.proxy_fix import ProxyFix
 app = Flask(__name__, static_folder='static')
+CORS(app, supports_credentials=True)
+app.config.update(
+    MAX_CONTENT_LENGTH=100 * 1024 * 1024,  # 100MB
+    SECRET_KEY=os.urandom(24),
+    SESSION_COOKIE_SAMESITE='Lax'
+)
+app.wsgi_app = ProxyFix(app.wsgi_app)
 # Configure Gemini API
+genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 # Language configurations
 KOKORO_LANGUAGES = {
 }
 GTTS_LANGUAGES = lang.tts_langs()
+GTTS_LANGUAGES['ja'] = 'Japanese'
 SUPPORTED_LANGUAGES = sorted(
+    list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))
 )
 @app.route('/')
 def get_languages():
     return jsonify(SUPPORTED_LANGUAGES)
+@app.route('/upload-chunk', methods=['POST'])
+def upload_chunk():
     try:
+        file = request.files['file']
+        chunk_index = int(request.form['chunkIndex'])
+        total_chunks = int(request.form['totalChunks'])
+        file_hash = request.form['fileHash']
+        # Save chunk to temp directory
+        chunk_dir = os.path.join(tempfile.gettempdir(), file_hash)
+        os.makedirs(chunk_dir, exist_ok=True)
+        chunk_path = os.path.join(chunk_dir, f"{chunk_index:04d}")
+        file.save(chunk_path)
+        return jsonify({'status': 'success', 'received': chunk_index})
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+@app.route('/process-file', methods=['POST'])
+def process_file():
+    try:
+        file_hash = request.json['fileHash']
+        target_language = request.json['language']
+        chunk_dir = os.path.join(tempfile.gettempdir(), file_hash)
+        # Reassemble file
+        final_path = os.path.join(tempfile.gettempdir(), file_hash + ".wav")
+        with open(final_path, 'wb') as output_file:
+            for chunk_name in sorted(os.listdir(chunk_dir)):
+                with open(os.path.join(chunk_dir, chunk_name), 'rb') as chunk_file:
+                    output_file.write(chunk_file.read())
+        # Process file
+        result = process_audio(final_path, target_language)
+        # Cleanup
+        os.remove(final_path)
+        for f in os.listdir(chunk_dir):
+            os.remove(os.path.join(chunk_dir, f))
+        os.rmdir(chunk_dir)
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+def process_audio(file_path, target_language):
+    # Transcribe using Gemini
+    model = genai.GenerativeModel("gemini-2.0-flash-lite")
+    uploaded_file = genai.upload_file(path=file_path)
+    try:
+        response = model.generate_content(["Transcribe this audio file:", uploaded_file])
         transcription = response.text.strip()
+        # Translate
+        prompt = f"Translate to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
         response = model.generate_content(prompt)
         translated_text = response.text.strip()
             else:
                 raise ValueError("No audio generated by Kokoro")
         else:
+            # Fallback to gTTS
             lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
             tts = gTTS(translated_text, lang=lang_code)
             _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
             tts.save(temp_output_path)
+        return {
             'transcription': transcription,
             'translation': translated_text,
             'audio_url': f'/download/{os.path.basename(temp_output_path)}'
+        }
+    finally:
+        uploaded_file.delete()
 @app.route('/download/<filename>')
 def download_file(filename):