File size: 4,110 Bytes
96e4b05
d0dd39c
ab0df5d
dbe8a71
c07d698
6ebed08
 
7cc4829
 
dbe8a71
6ebed08
7cc4829
 
 
413a70d
 
7cc4829
ab0df5d
dbe8a71
7cc4829
6ebed08
 
 
 
 
 
 
 
 
 
 
 
7cc4829
038c430
7cc4829
 
 
 
 
 
 
 
dbe8a71
7cc4829
 
dbe8a71
7cc4829
 
 
 
 
 
 
 
 
 
 
 
 
6ebed08
 
 
d0dd39c
6ebed08
11a3089
6ebed08
 
 
c07d698
6ebed08
 
 
 
 
7cc4829
 
6ebed08
 
 
 
 
 
 
 
 
 
 
 
 
7cc4829
 
 
 
 
 
 
dbe8a71
6ebed08
7cc4829
dbe8a71
7cc4829
 
 
 
 
 
 
 
ef2c8e0
7cc4829
 
dbe8a71
7cc4829
6ebed08
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
 import os
from flask import Flask, request, jsonify, send_file, send_from_directory
import google.generativeai as genai
from gtts import gTTS, lang
import tempfile
import soundfile as sf
from kokoro import KPipeline
from werkzeug.utils import secure_filename
from flask_cors import CORS

app = Flask(__name__, static_folder='static')
CORS(app)

# Configure Gemini API
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY environment variable not set")
genai.configure(api_key=GEMINI_API_KEY)

# Language configurations
KOKORO_LANGUAGES = {
    "American English": "a",
    "British English": "b",
    "Japanese": "j",
    "Mandarin Chinese": "z",
    "Spanish": "e",
    "French": "f",
    "Hindi": "h",
    "Italian": "i",
    "Brazilian Portuguese": "p"
}

GTTS_LANGUAGES = lang.tts_langs()
SUPPORTED_LANGUAGES = sorted(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))

@app.route('/')
def serve_index():
    return send_from_directory(app.static_folder, 'index.html')

@app.route('/languages')
def get_languages():
    return jsonify(SUPPORTED_LANGUAGES)

@app.route('/translate', methods=['POST'])
def translate_audio():
    try:
        if 'audio' not in request.files:
            return jsonify({'error': 'No audio file uploaded'}), 400
            
        audio_file = request.files['audio']
        target_language = request.form.get('language', 'English')
        
        if not audio_file or audio_file.filename == '':
            return jsonify({'error': 'Invalid audio file'}), 400

        # Save temporary audio file
        filename = secure_filename(audio_file.filename)
        temp_input_path = os.path.join(tempfile.gettempdir(), filename)
        audio_file.save(temp_input_path)

        # Transcribe audio using Gemini
        model = genai.GenerativeModel("gemini-2.0-flash")
        with open(temp_input_path, "rb") as f:
            audio_data = f.read()
        
        # Use Gemini to transcribe the audio
        prompt = "Transcribe the following audio file into text. Return only the transcribed text with no additional commentary or explanations."
        response = model.generate_content([prompt, audio_data])
        transcription = response.text.strip()

        # Translate text using Gemini
        prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances:\n\n{transcription}"
        response = model.generate_content(prompt)
        translated_text = response.text.strip()
        
        # Generate TTS
        if target_language in KOKORO_LANGUAGES:
            lang_code = KOKORO_LANGUAGES[target_language]
            pipeline = KPipeline(lang_code=lang_code)
            generator = pipeline(translated_text, voice="af_heart", speed=1)
            audio_data = next((audio for _, _, audio in generator), None)
            if audio_data:
                _, temp_output_path = tempfile.mkstemp(suffix=".wav")
                sf.write(temp_output_path, audio_data, 24000)
        else:
            lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
            tts = gTTS(translated_text, lang=lang_code)
            _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
            tts.save(temp_output_path)
        
        return jsonify({
            'transcription': transcription,
            'translation': translated_text,
            'audio_url': f'/download/{os.path.basename(temp_output_path)}'
        })
        
    except Exception as e:
        app.logger.error(f"Error processing request: {str(e)}")
        return jsonify({'error': str(e)}), 500

@app.route('/download/<filename>')
def download_file(filename):
    try:
        return send_file(
            os.path.join(tempfile.gettempdir(), filename),
            mimetype="audio/mpeg",
            as_attachment=True,
            download_name=f"translated_{filename}"
        )
    except FileNotFoundError:
        return jsonify({'error': 'File not found'}), 404

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)