Spaces:

Athspi-ai
/

Audio-translation

Running

App Files Files Community

Athspi commited on Mar 9

Commit

75b45e0

verified ·

1 Parent(s): d0dd39c

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -30

app.py CHANGED Viewed

@@ -4,11 +4,10 @@ from flask import Flask, request, jsonify, send_file, send_from_directory
 import google.generativeai as genai
 from gtts import gTTS, lang
 import tempfile
-import soundfile as sf
 from werkzeug.utils import secure_filename
 from flask_cors import CORS
-app = Flask(__name__, static_folder='static')
 CORS(app)
 # Configure Gemini API
@@ -18,20 +17,8 @@ if not GEMINI_API_KEY:
 genai.configure(api_key=GEMINI_API_KEY)
 # Language configurations
-KOKORO_LANGUAGES = {
-    "American English": "a",
-    "British English": "b",
-    "Japanese": "j",
-    "Mandarin Chinese": "z",
-    "Spanish": "e",
-    "French": "f",
-    "Hindi": "h",
-    "Italian": "i",
-    "Brazilian Portuguese": "p"
-}
 GTTS_LANGUAGES = lang.tts_langs()
-SUPPORTED_LANGUAGES = sorted(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))
 @app.route('/')
 def serve_index():
@@ -64,8 +51,7 @@ def translate_audio():
         # Transcribe with Gemini
         model = genai.GenerativeModel("gemini-1.5-pro-latest")
-        prompt = """Accurately transcribe this audio file. Return only the raw text without any formatting,
-                   punctuation, or additional commentary. Preserve the original language and meaning."""
         response = model.generate_content(
             [
@@ -79,23 +65,15 @@ def translate_audio():
         transcription = response.text.strip()
         # Translate with Gemini
-        translate_prompt = f"""Translate this text to {target_language} preserving exact meaning and cultural nuances.
-                            Return only the translated text without any explanations or formatting: {transcription}"""
         translated_response = model.generate_content(translate_prompt)
         translated_text = translated_response.text.strip()
         # Generate TTS
-        if target_language in KOKORO_LANGUAGES:
-            lang_code = KOKORO_LANGUAGES[target_language]
-            # Kokoro TTS implementation
-            _, temp_output_path = tempfile.mkstemp(suffix=".wav")
-            # Add actual Kokoro synthesis here
-        else:
-            lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
-            tts = gTTS(translated_text, lang=lang_code)
-            _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
-            tts.save(temp_output_path)
         return jsonify({
             'transcription': transcription,

 import google.generativeai as genai
 from gtts import gTTS, lang
 import tempfile
 from werkzeug.utils import secure_filename
 from flask_cors import CORS
+app = Flask(__name__, static_folder='static', static_url_path='')
 CORS(app)
 # Configure Gemini API
 genai.configure(api_key=GEMINI_API_KEY)
 # Language configurations
 GTTS_LANGUAGES = lang.tts_langs()
+SUPPORTED_LANGUAGES = sorted(GTTS_LANGUAGES.values())
 @app.route('/')
 def serve_index():
         # Transcribe with Gemini
         model = genai.GenerativeModel("gemini-1.5-pro-latest")
+        prompt = """Accurately transcribe this audio file. Return only the raw text without formatting."""
         response = model.generate_content(
             [
         transcription = response.text.strip()
         # Translate with Gemini
+        translate_prompt = f"Translate to {target_language} preserving meaning: {transcription}"
         translated_response = model.generate_content(translate_prompt)
         translated_text = translated_response.text.strip()
         # Generate TTS
+        lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
+        tts = gTTS(translated_text, lang=lang_code)
+        _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
+        tts.save(temp_output_path)
         return jsonify({
             'transcription': transcription,