File size: 5,705 Bytes
5f33e0e
7582b7f
d0dd39c
ab0df5d
dbe8a71
c07d698
6ebed08
 
7cc4829
 
dbe8a71
6ebed08
5ddb059
63a0fca
7cc4829
 
5ddb059
 
 
 
dbe8a71
7cc4829
6ebed08
 
 
 
 
 
 
 
 
 
 
7cc4829
6c131f6
317b2f2
63a0fca
 
 
6ebed08
63a0fca
 
 
 
 
6c131f6
63a0fca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c131f6
63a0fca
 
 
 
 
 
 
 
 
 
 
 
 
965bd2d
5ddb059
 
63a0fca
 
6c131f6
 
5ddb059
 
 
 
 
 
 
 
 
 
63a0fca
 
5ddb059
63a0fca
 
6c131f6
63a0fca
 
 
 
 
 
 
 
 
 
5ddb059
63a0fca
5ddb059
63a0fca
 
6ebed08
 
63a0fca
 
6ebed08
63a0fca
6ebed08
 
 
7582b7f
 
 
70e979d
7582b7f
 
63a0fca
7582b7f
63a0fca
 
 
 
6ebed08
63a0fca
6ebed08
 
63a0fca
 
 
5ddb059
7cc4829
 
63a0fca
5ddb059
63a0fca
5ddb059
63a0fca
5ddb059
6c131f6
63a0fca
dbe8a71
7cc4829
 
 
 
 
 
 
 
ef2c8e0
63a0fca
 
dbe8a71
7cc4829
63a0fca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import os
import numpy as np
from flask import Flask, request, jsonify, send_file, send_from_directory
import google.generativeai as genai
from gtts import gTTS, lang
import tempfile
import soundfile as sf
from kokoro import KPipeline
from werkzeug.utils import secure_filename
from flask_cors import CORS

app = Flask(__name__, static_folder='static')
CORS(app)
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 100MB limit

# Configure Gemini API
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY environment variable not set")
genai.configure(api_key=GEMINI_API_KEY)

# Language configurations
KOKORO_LANGUAGES = {
    "American English": "a",
    "British English": "b",
    "Mandarin Chinese": "z",
    "Spanish": "e",
    "French": "f",
    "Hindi": "h",
    "Italian": "i",
    "Brazilian Portuguese": "p"
}

GTTS_LANGUAGES = lang.tts_langs()
GTTS_LANGUAGES['ja'] = 'Japanese'

SUPPORTED_LANGUAGES = sorted(
    list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))
)

MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB Gemini limit
CHUNK_SIZE = 20 * 1024 * 1024  # 20MB chunks

def process_large_audio(file_path):
    """Process large audio files in chunks"""
    try:
        file_size = os.path.getsize(file_path)
        if file_size <= MAX_FILE_SIZE:
            # Process small files normally
            uploaded_file = genai.upload_file(file_path)
            return [uploaded_file]
        
        # Split large files into chunks
        chunks = []
        with open(file_path, 'rb') as f:
            chunk_num = 0
            while chunk_data := f.read(CHUNK_SIZE):
                chunk_path = f"{file_path}_chunk_{chunk_num}"
                with open(chunk_path, 'wb') as chunk_file:
                    chunk_file.write(chunk_data)
                chunks.append(genai.upload_file(chunk_path))
                chunk_num += 1
        return chunks
    except Exception as e:
        raise RuntimeError(f"File processing failed: {str(e)}")

def cleanup_files(file_path, chunks):
    """Cleanup temporary files and uploaded chunks"""
    try:
        if os.path.exists(file_path):
            os.remove(file_path)
        for chunk in chunks:
            if os.path.exists(chunk.name):
                os.remove(chunk.name)
            chunk.delete()
    except Exception as e:
        app.logger.error(f"Cleanup error: {str(e)}")

@app.route('/translate', methods=['POST'])
def translate_audio():
    temp_path = None
    uploaded_chunks = []
    
    try:
        if 'audio' not in request.files:
            return jsonify({'error': 'No audio file uploaded'}), 400

        audio_file = request.files['audio']
        target_language = request.form.get('language', 'English')

        if not audio_file or audio_file.filename == '':
            return jsonify({'error': 'Invalid audio file'}), 400

        # Save to temp file
        temp_path = os.path.join(tempfile.gettempdir(), secure_filename(audio_file.filename))
        audio_file.save(temp_path)

        # Process file in chunks if needed
        uploaded_chunks = process_large_audio(temp_path)
        
        # Transcribe chunks
        model = genai.GenerativeModel("gemini-2.0-flash")
        transcripts = []
        
        for chunk in uploaded_chunks:
            response = model.generate_content(
                ["Transcribe this audio chunk verbatim. Respond only with the transcription:", chunk]
            )
            transcripts.append(response.text.strip())
            chunk.delete()

        transcription = " ".join(transcripts)

        # Translation
        prompt = f"Translate to {target_language} preserving meaning:\n\n{transcription}"
        response = model.generate_content(prompt)
        translated_text = response.text.strip()

        # TTS Generation
        if target_language in KOKORO_LANGUAGES:
            # Kokoro processing
            lang_code = KOKORO_LANGUAGES[target_language]
            pipeline = KPipeline(lang_code=lang_code)
            generator = pipeline(translated_text, voice="af_heart", speed=1)
            
            audio_segments = []
            for _, _, audio in generator:
                if audio is not None:
                    audio_segments.append(audio)
            
            if not audio_segments:
                raise ValueError("No audio generated by Kokoro")
            
            audio_data = np.concatenate(audio_segments)
            _, output_path = tempfile.mkstemp(suffix=".wav")
            sf.write(output_path, audio_data, 24000)
        else:
            # gTTS processing
            lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
            tts = gTTS(translated_text, lang=lang_code)
            _, output_path = tempfile.mkstemp(suffix=".mp3")
            tts.save(output_path)

        return jsonify({
            'transcription': transcription,
            'translation': translated_text,
            'audio_url': f'/download/{os.path.basename(output_path)}'
        })

    except Exception as e:
        app.logger.error(f"Processing error: {str(e)}")
        return jsonify({'error': str(e)}), 500
    finally:
        cleanup_files(temp_path, uploaded_chunks)

@app.route('/download/<filename>')
def download_file(filename):
    try:
        return send_file(
            os.path.join(tempfile.gettempdir(), filename),
            mimetype="audio/mpeg",
            as_attachment=True,
            download_name=f"translated_{filename}"
        )
    except Exception as e:
        return jsonify({'error': str(e)}), 404

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)