Spaces:

Athspi-ai
/

Audio-translation

Running

App Files Files Community

Athspi commited on Mar 9

Commit

6ebed08

verified ·

1 Parent(s): 75b45e0

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -29

app.py CHANGED Viewed

@@ -1,13 +1,15 @@
 import os
-import base64
 from flask import Flask, request, jsonify, send_file, send_from_directory
 import google.generativeai as genai
 from gtts import gTTS, lang
 import tempfile
 from werkzeug.utils import secure_filename
 from flask_cors import CORS
-app = Flask(__name__, static_folder='static', static_url_path='')
 CORS(app)
 # Configure Gemini API
@@ -17,8 +19,20 @@ if not GEMINI_API_KEY:
 genai.configure(api_key=GEMINI_API_KEY)
 # Language configurations
 GTTS_LANGUAGES = lang.tts_langs()
-SUPPORTED_LANGUAGES = sorted(GTTS_LANGUAGES.values())
 @app.route('/')
 def serve_index():
@@ -44,36 +58,36 @@ def translate_audio():
         filename = secure_filename(audio_file.filename)
         temp_input_path = os.path.join(tempfile.gettempdir(), filename)
         audio_file.save(temp_input_path)
-        # Read audio file as base64
         with open(temp_input_path, "rb") as f:
-            audio_data = base64.b64encode(f.read()).decode("utf-8")
-        # Transcribe with Gemini
-        model = genai.GenerativeModel("gemini-1.5-pro-latest")
-        prompt = """Accurately transcribe this audio file. Return only the raw text without formatting."""
-        response = model.generate_content(
-            [
-                prompt,
-                {
-                    "mime_type": "audio/" + filename.split('.')[-1],
-                    "data": audio_data
-                }
-            ]
-        )
         transcription = response.text.strip()
-        # Translate with Gemini
-        translate_prompt = f"Translate to {target_language} preserving meaning: {transcription}"
-        translated_response = model.generate_content(translate_prompt)
-        translated_text = translated_response.text.strip()
         # Generate TTS
-        lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
-        tts = gTTS(translated_text, lang=lang_code)
-        _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
-        tts.save(temp_output_path)
         return jsonify({
             'transcription': transcription,
@@ -82,6 +96,7 @@ def translate_audio():
         })
     except Exception as e:
         return jsonify({'error': str(e)}), 500
 @app.route('/download/<filename>')
@@ -97,4 +112,4 @@ def download_file(filename):
         return jsonify({'error': 'File not found'}), 404
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 5000)))

 import os
 from flask import Flask, request, jsonify, send_file, send_from_directory
 import google.generativeai as genai
 from gtts import gTTS, lang
 import tempfile
+import soundfile as sf
+from kokoro import KPipeline
 from werkzeug.utils import secure_filename
 from flask_cors import CORS
+import requests
+app = Flask(__name__, static_folder='static')
 CORS(app)
 # Configure Gemini API
 genai.configure(api_key=GEMINI_API_KEY)
 # Language configurations
+KOKORO_LANGUAGES = {
+    "American English": "a",
+    "British English": "b",
+    "Japanese": "j",
+    "Mandarin Chinese": "z",
+    "Spanish": "e",
+    "French": "f",
+    "Hindi": "h",
+    "Italian": "i",
+    "Brazilian Portuguese": "p"
+}
 GTTS_LANGUAGES = lang.tts_langs()
+SUPPORTED_LANGUAGES = sorted(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values())
 @app.route('/')
 def serve_index():
         filename = secure_filename(audio_file.filename)
         temp_input_path = os.path.join(tempfile.gettempdir(), filename)
         audio_file.save(temp_input_path)
+        # Transcribe audio using Gemini
+        model = genai.GenerativeModel("gemini-2.0-flash")
         with open(temp_input_path, "rb") as f:
+            audio_data = f.read()
+        # Use Gemini to transcribe the audio
+        prompt = "Transcribe the following audio file into text. Return only the transcribed text with no additional commentary or explanations."
+        response = model.generate_content([prompt, audio_data])
         transcription = response.text.strip()
+        # Translate text using Gemini
+        prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances:\n\n{transcription}"
+        response = model.generate_content(prompt)
+        translated_text = response.text.strip()
         # Generate TTS
+        if target_language in KOKORO_LANGUAGES:
+            lang_code = KOKORO_LANGUAGES[target_language]
+            pipeline = KPipeline(lang_code=lang_code)
+            generator = pipeline(translated_text, voice="af_heart", speed=1)
+            audio_data = next((audio for _, _, audio in generator), None)
+            if audio_data:
+                _, temp_output_path = tempfile.mkstemp(suffix=".wav")
+                sf.write(temp_output_path, audio_data, 24000)
+        else:
+            lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
+            tts = gTTS(translated_text, lang=lang_code)
+            _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
+            tts.save(temp_output_path)
         return jsonify({
             'transcription': transcription,
         })
     except Exception as e:
+        app.logger.error(f"Error processing request: {str(e)}")
         return jsonify({'error': str(e)}), 500
 @app.route('/download/<filename>')
         return jsonify({'error': 'File not found'}), 404
 if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000, debug=True)