Spaces:
Running
Running
File size: 5,411 Bytes
6f05665 6c131f6 7582b7f d0dd39c ab0df5d dbe8a71 c07d698 6ebed08 7cc4829 6c131f6 dbe8a71 6ebed08 6c131f6 7cc4829 6c131f6 dbe8a71 7cc4829 6ebed08 7cc4829 6c131f6 317b2f2 70e979d 6c131f6 70e979d 7cc4829 dbe8a71 6c131f6 dbe8a71 6c131f6 7cc4829 6c131f6 6ebed08 6c131f6 11a3089 6c131f6 965bd2d 6c131f6 33b1fce 6c131f6 c07d698 6c131f6 6ebed08 7cc4829 70e979d 6ebed08 7582b7f 70e979d 7582b7f 6ebed08 7582b7f 6ebed08 6c131f6 6ebed08 7cc4829 6c131f6 7cc4829 6c131f6 dbe8a71 7cc4829 ef2c8e0 7cc4829 dbe8a71 7cc4829 33b1fce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import os
import hashlib
import numpy as np
from flask import Flask, request, jsonify, send_file, send_from_directory
import google.generativeai as genai
from gtts import gTTS, lang
import tempfile
import soundfile as sf
from kokoro import KPipeline
from werkzeug.utils import secure_filename
from flask_cors import CORS
from werkzeug.middleware.proxy_fix import ProxyFix
app = Flask(__name__, static_folder='static')
CORS(app, supports_credentials=True)
app.config.update(
MAX_CONTENT_LENGTH=100 * 1024 * 1024, # 100MB
SECRET_KEY=os.urandom(24),
SESSION_COOKIE_SAMESITE='Lax'
)
app.wsgi_app = ProxyFix(app.wsgi_app)
# Configure Gemini API
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
# Language configurations
KOKORO_LANGUAGES = {
"American English": "a",
"British English": "b",
"Mandarin Chinese": "z",
"Spanish": "e",
"French": "f",
"Hindi": "h",
"Italian": "i",
"Brazilian Portuguese": "p"
}
GTTS_LANGUAGES = lang.tts_langs()
GTTS_LANGUAGES['ja'] = 'Japanese'
SUPPORTED_LANGUAGES = sorted(
list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))
)
@app.route('/')
def serve_index():
return send_from_directory(app.static_folder, 'index.html')
@app.route('/languages')
def get_languages():
return jsonify(SUPPORTED_LANGUAGES)
@app.route('/upload-chunk', methods=['POST'])
def upload_chunk():
try:
file = request.files['file']
chunk_index = int(request.form['chunkIndex'])
total_chunks = int(request.form['totalChunks'])
file_hash = request.form['fileHash']
# Save chunk to temp directory
chunk_dir = os.path.join(tempfile.gettempdir(), file_hash)
os.makedirs(chunk_dir, exist_ok=True)
chunk_path = os.path.join(chunk_dir, f"{chunk_index:04d}")
file.save(chunk_path)
return jsonify({'status': 'success', 'received': chunk_index})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/process-file', methods=['POST'])
def process_file():
try:
file_hash = request.json['fileHash']
target_language = request.json['language']
chunk_dir = os.path.join(tempfile.gettempdir(), file_hash)
# Reassemble file
final_path = os.path.join(tempfile.gettempdir(), file_hash + ".wav")
with open(final_path, 'wb') as output_file:
for chunk_name in sorted(os.listdir(chunk_dir)):
with open(os.path.join(chunk_dir, chunk_name), 'rb') as chunk_file:
output_file.write(chunk_file.read())
# Process file
result = process_audio(final_path, target_language)
# Cleanup
os.remove(final_path)
for f in os.listdir(chunk_dir):
os.remove(os.path.join(chunk_dir, f))
os.rmdir(chunk_dir)
return jsonify(result)
except Exception as e:
return jsonify({'error': str(e)}), 500
def process_audio(file_path, target_language):
# Transcribe using Gemini
model = genai.GenerativeModel("gemini-2.0-flash")
uploaded_file = genai.upload_file(path=file_path)
try:
response = model.generate_content(["Transcribe this audio file:", uploaded_file])
transcription = response.text.strip()
# Translate
prompt = f"Translate to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
response = model.generate_content(prompt)
translated_text = response.text.strip()
# Generate TTS
if target_language in KOKORO_LANGUAGES:
lang_code = KOKORO_LANGUAGES[target_language]
pipeline = KPipeline(lang_code=lang_code)
generator = pipeline(translated_text, voice="af_heart", speed=1)
# Collect all audio segments
audio_segments = []
for _, _, audio in generator:
if audio is not None:
audio_segments.append(audio)
if audio_segments:
audio_data = np.concatenate(audio_segments)
_, temp_output_path = tempfile.mkstemp(suffix=".wav")
sf.write(temp_output_path, audio_data, 24000)
else:
raise ValueError("No audio generated by Kokoro")
else:
# Fallback to gTTS
lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
tts = gTTS(translated_text, lang=lang_code)
_, temp_output_path = tempfile.mkstemp(suffix=".mp3")
tts.save(temp_output_path)
return {
'transcription': transcription,
'translation': translated_text,
'audio_url': f'/download/{os.path.basename(temp_output_path)}'
}
finally:
uploaded_file.delete()
@app.route('/download/<filename>')
def download_file(filename):
try:
return send_file(
os.path.join(tempfile.gettempdir(), filename),
mimetype="audio/mpeg",
as_attachment=True,
download_name=f"translated_{filename}"
)
except FileNotFoundError:
return jsonify({'error': 'File not found'}), 404
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True) |