Athspi commited on
Commit
5ddb059
·
verified ·
1 Parent(s): a14dcb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -78
app.py CHANGED
@@ -1,5 +1,4 @@
1
- import os
2
- import hashlib
3
  import numpy as np
4
  from flask import Flask, request, jsonify, send_file, send_from_directory
5
  import google.generativeai as genai
@@ -9,19 +8,16 @@ import soundfile as sf
9
  from kokoro import KPipeline
10
  from werkzeug.utils import secure_filename
11
  from flask_cors import CORS
12
- from werkzeug.middleware.proxy_fix import ProxyFix
13
 
14
  app = Flask(__name__, static_folder='static')
15
- CORS(app, supports_credentials=True)
16
- app.config.update(
17
- MAX_CONTENT_LENGTH=100 * 1024 * 1024, # 100MB
18
- SECRET_KEY=os.urandom(24),
19
- SESSION_COOKIE_SAMESITE='Lax'
20
- )
21
- app.wsgi_app = ProxyFix(app.wsgi_app)
22
 
23
  # Configure Gemini API
24
- genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 
 
 
25
 
26
  # Language configurations
27
  KOKORO_LANGUAGES = {
@@ -38,75 +34,77 @@ KOKORO_LANGUAGES = {
38
  GTTS_LANGUAGES = lang.tts_langs()
39
  GTTS_LANGUAGES['ja'] = 'Japanese'
40
 
41
- SUPPORTED_LANGUAGES = sorted(list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))))
42
-
43
-
44
- @app.route('/')
45
- def serve_index():
46
- return send_from_directory(app.static_folder, 'index.html')
47
-
48
- @app.route('/languages')
49
- def get_languages():
50
- return jsonify(SUPPORTED_LANGUAGES)
51
-
52
- @app.route('/upload-chunk', methods=['POST'])
53
- def upload_chunk():
54
- try:
55
- file = request.files['file']
56
- chunk_index = int(request.form['chunkIndex'])
57
- total_chunks = int(request.form['totalChunks'])
58
- file_hash = request.form['fileHash']
59
-
60
- # Save chunk to temp directory
61
- chunk_dir = os.path.join(tempfile.gettempdir(), file_hash)
62
- os.makedirs(chunk_dir, exist_ok=True)
63
- chunk_path = os.path.join(chunk_dir, f"{chunk_index:04d}")
64
- file.save(chunk_path)
65
-
66
- return jsonify({'status': 'success', 'received': chunk_index})
67
-
68
- except Exception as e:
69
- return jsonify({'error': str(e)}), 500
70
 
71
- @app.route('/process-file', methods=['POST'])
72
- def process_file():
73
  try:
74
- file_hash = request.json['fileHash']
75
- target_language = request.json['language']
76
- chunk_dir = os.path.join(tempfile.gettempdir(), file_hash)
77
-
78
- # Reassemble file
79
- final_path = os.path.join(tempfile.gettempdir(), file_hash + ".wav")
80
- with open(final_path, 'wb') as output_file:
81
- for chunk_name in sorted(os.listdir(chunk_dir)):
82
- with open(os.path.join(chunk_dir, chunk_name), 'rb') as chunk_file:
83
- output_file.write(chunk_file.read())
84
-
85
- # Process file
86
- result = process_audio(final_path, target_language)
87
-
88
- # Cleanup
89
- os.remove(final_path)
90
- for f in os.listdir(chunk_dir):
91
- os.remove(os.path.join(chunk_dir, f))
92
- os.rmdir(chunk_dir)
93
-
94
- return jsonify(result)
95
-
96
  except Exception as e:
97
- return jsonify({'error': str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- def process_audio(file_path, target_language):
100
- # Transcribe using Gemini
101
- model = genai.GenerativeModel("gemini-2.0-flash")
102
- uploaded_file = genai.upload_file(path=file_path)
103
 
104
  try:
105
- response = model.generate_content(["Transcribe this audio file:", uploaded_file])
106
- transcription = response.text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
- # Translate
109
- prompt = f"Translate to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  response = model.generate_content(prompt)
111
  translated_text = response.text.strip()
112
 
@@ -129,20 +127,33 @@ def process_audio(file_path, target_language):
129
  else:
130
  raise ValueError("No audio generated by Kokoro")
131
  else:
132
- # Fallback to gTTS
133
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
134
  tts = gTTS(translated_text, lang=lang_code)
135
  _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
136
  tts.save(temp_output_path)
137
 
138
- return {
139
  'transcription': transcription,
140
  'translation': translated_text,
141
  'audio_url': f'/download/{os.path.basename(temp_output_path)}'
142
- }
143
-
 
 
 
 
 
 
 
 
 
 
 
 
144
  finally:
145
- uploaded_file.delete()
 
146
 
147
  @app.route('/download/<filename>')
148
  def download_file(filename):
 
1
+ import os
 
2
  import numpy as np
3
  from flask import Flask, request, jsonify, send_file, send_from_directory
4
  import google.generativeai as genai
 
8
  from kokoro import KPipeline
9
  from werkzeug.utils import secure_filename
10
  from flask_cors import CORS
 
11
 
12
  app = Flask(__name__, static_folder='static')
13
+ CORS(app)
14
+ app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB limit
 
 
 
 
 
15
 
16
  # Configure Gemini API
17
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
18
+ if not GEMINI_API_KEY:
19
+ raise ValueError("GEMINI_API_KEY environment variable not set")
20
+ genai.configure(api_key=GEMINI_API_KEY)
21
 
22
  # Language configurations
23
  KOKORO_LANGUAGES = {
 
34
  GTTS_LANGUAGES = lang.tts_langs()
35
  GTTS_LANGUAGES['ja'] = 'Japanese'
36
 
37
+ SUPPORTED_LANGUAGES = sorted(
38
+ list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))
39
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ def upload_large_file(file_path):
42
+ """Handle large file uploads with chunking"""
43
  try:
44
+ return genai.upload_file(path=file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  except Exception as e:
46
+ if "payload size exceeds" in str(e).lower():
47
+ # Chunking strategy for large files
48
+ chunk_size = 20 * 1024 * 1024 # 20MB chunks
49
+ file_parts = []
50
+
51
+ with open(file_path, 'rb') as f:
52
+ i = 0
53
+ while chunk := f.read(chunk_size):
54
+ part_path = f"{file_path}_part{i}"
55
+ with open(part_path, 'wb') as part_file:
56
+ part_file.write(chunk)
57
+ part = genai.upload_file(path=part_path)
58
+ file_parts.append(part)
59
+ os.remove(part_path)
60
+ i += 1
61
+
62
+ return file_parts
63
+ raise
64
 
65
+ @app.route('/translate', methods=['POST'])
66
+ def translate_audio():
67
+ temp_input_path = None
68
+ uploaded_file = None
69
 
70
  try:
71
+ if 'audio' not in request.files:
72
+ return jsonify({'error': 'No audio file uploaded'}), 400
73
+
74
+ audio_file = request.files['audio']
75
+ target_language = request.form.get('language', 'English')
76
+
77
+ if not audio_file or audio_file.filename == '':
78
+ return jsonify({'error': 'Invalid audio file'}), 400
79
+
80
+ # Save to temp file
81
+ temp_input_path = os.path.join(tempfile.gettempdir(), secure_filename(audio_file.filename))
82
+ audio_file.save(temp_input_path)
83
+
84
+ # Upload using File API
85
+ uploaded_file = upload_large_file(temp_input_path)
86
+
87
+ # Get transcription
88
+ model = genai.GenerativeModel("gemini-2.0-flash")
89
 
90
+ if isinstance(uploaded_file, list):
91
+ # Handle chunked files
92
+ transcripts = []
93
+ for chunk in uploaded_file:
94
+ response = model.generate_content(["Transcribe this audio chunk:", chunk])
95
+ transcripts.append(response.text)
96
+ chunk.delete() # Clean up each chunk
97
+ transcription = " ".join(transcripts)
98
+ else:
99
+ response = model.generate_content(["Transcribe this audio file:", uploaded_file])
100
+ transcription = response.text
101
+
102
+ # Clean up main file
103
+ if uploaded_file and not isinstance(uploaded_file, list):
104
+ uploaded_file.delete()
105
+
106
+ # Translate text using Gemini
107
+ prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
108
  response = model.generate_content(prompt)
109
  translated_text = response.text.strip()
110
 
 
127
  else:
128
  raise ValueError("No audio generated by Kokoro")
129
  else:
130
+ # Standard gTTS handling
131
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
132
  tts = gTTS(translated_text, lang=lang_code)
133
  _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
134
  tts.save(temp_output_path)
135
 
136
+ return jsonify({
137
  'transcription': transcription,
138
  'translation': translated_text,
139
  'audio_url': f'/download/{os.path.basename(temp_output_path)}'
140
+ })
141
+
142
+ except Exception as e:
143
+ # Cleanup resources on error
144
+ if uploaded_file:
145
+ if isinstance(uploaded_file, list):
146
+ for f in uploaded_file:
147
+ f.delete()
148
+ else:
149
+ uploaded_file.delete()
150
+ if temp_input_path and os.path.exists(temp_input_path):
151
+ os.remove(temp_input_path)
152
+ app.logger.error(f"Error processing request: {str(e)}")
153
+ return jsonify({'error': str(e)}), 500
154
  finally:
155
+ if temp_input_path and os.path.exists(temp_input_path):
156
+ os.remove(temp_input_path)
157
 
158
  @app.route('/download/<filename>')
159
  def download_file(filename):