Athspi commited on
Commit
6c131f6
·
verified ·
1 Parent(s): a5c5569

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -42
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import numpy as np
3
  from flask import Flask, request, jsonify, send_file, send_from_directory
4
  import google.generativeai as genai
@@ -8,15 +9,19 @@ import soundfile as sf
8
  from kokoro import KPipeline
9
  from werkzeug.utils import secure_filename
10
  from flask_cors import CORS
 
11
 
12
  app = Flask(__name__, static_folder='static')
13
- CORS(app)
 
 
 
 
 
 
14
 
15
  # Configure Gemini API
16
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
17
- if not GEMINI_API_KEY:
18
- raise ValueError("GEMINI_API_KEY environment variable not set")
19
- genai.configure(api_key=GEMINI_API_KEY)
20
 
21
  # Language configurations
22
  KOKORO_LANGUAGES = {
@@ -31,10 +36,10 @@ KOKORO_LANGUAGES = {
31
  }
32
 
33
  GTTS_LANGUAGES = lang.tts_langs()
34
- GTTS_LANGUAGES['ja'] = 'Japanese' # Explicit Japanese support
35
 
36
  SUPPORTED_LANGUAGES = sorted(
37
- list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values())))
38
  )
39
 
40
  @app.route('/')
@@ -45,40 +50,64 @@ def serve_index():
45
  def get_languages():
46
  return jsonify(SUPPORTED_LANGUAGES)
47
 
48
- @app.route('/translate', methods=['POST'])
49
- def translate_audio():
50
  try:
51
- if 'audio' not in request.files:
52
- return jsonify({'error': 'No audio file uploaded'}), 400
53
-
54
- audio_file = request.files['audio']
55
- target_language = request.form.get('language', 'English')
56
 
57
- if not audio_file or audio_file.filename == '':
58
- return jsonify({'error': 'Invalid audio file'}), 400
59
-
60
- # Validate MIME type
61
- allowed_mime_types = ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']
62
- if audio_file.mimetype not in allowed_mime_types:
63
- return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
 
 
 
64
 
65
- # Transcribe audio using Gemini
66
- model = genai.GenerativeModel("gemini-2.0-flash-lite")
 
 
 
 
67
 
68
- # Create proper audio blob
69
- audio_blob = {
70
- 'mime_type': audio_file.mimetype,
71
- 'data': audio_file.read()
72
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- # Get transcription
75
- convo = model.start_chat()
76
- convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
77
- response = convo.send_message(audio_blob)
 
 
 
78
  transcription = response.text.strip()
79
-
80
- # Translate text using Gemini
81
- prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
82
  response = model.generate_content(prompt)
83
  translated_text = response.text.strip()
84
 
@@ -101,21 +130,20 @@ def translate_audio():
101
  else:
102
  raise ValueError("No audio generated by Kokoro")
103
  else:
104
- # Standard gTTS handling
105
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
106
  tts = gTTS(translated_text, lang=lang_code)
107
  _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
108
  tts.save(temp_output_path)
109
 
110
- return jsonify({
111
  'transcription': transcription,
112
  'translation': translated_text,
113
  'audio_url': f'/download/{os.path.basename(temp_output_path)}'
114
- })
115
-
116
- except Exception as e:
117
- app.logger.error(f"Error processing request: {str(e)}")
118
- return jsonify({'error': str(e)}), 500
119
 
120
  @app.route('/download/<filename>')
121
  def download_file(filename):
 
1
  import os
2
+ import hashlib
3
  import numpy as np
4
  from flask import Flask, request, jsonify, send_file, send_from_directory
5
  import google.generativeai as genai
 
9
  from kokoro import KPipeline
10
  from werkzeug.utils import secure_filename
11
  from flask_cors import CORS
12
+ from werkzeug.middleware.proxy_fix import ProxyFix
13
 
14
  app = Flask(__name__, static_folder='static')
15
+ CORS(app, supports_credentials=True)
16
+ app.config.update(
17
+ MAX_CONTENT_LENGTH=100 * 1024 * 1024, # 100MB
18
+ SECRET_KEY=os.urandom(24),
19
+ SESSION_COOKIE_SAMESITE='Lax'
20
+ )
21
+ app.wsgi_app = ProxyFix(app.wsgi_app)
22
 
23
  # Configure Gemini API
24
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 
 
 
25
 
26
  # Language configurations
27
  KOKORO_LANGUAGES = {
 
36
  }
37
 
38
  GTTS_LANGUAGES = lang.tts_langs()
39
+ GTTS_LANGUAGES['ja'] = 'Japanese'
40
 
41
  SUPPORTED_LANGUAGES = sorted(
42
+ list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))
43
  )
44
 
45
  @app.route('/')
 
50
  def get_languages():
51
  return jsonify(SUPPORTED_LANGUAGES)
52
 
53
+ @app.route('/upload-chunk', methods=['POST'])
54
+ def upload_chunk():
55
  try:
56
+ file = request.files['file']
57
+ chunk_index = int(request.form['chunkIndex'])
58
+ total_chunks = int(request.form['totalChunks'])
59
+ file_hash = request.form['fileHash']
 
60
 
61
+ # Save chunk to temp directory
62
+ chunk_dir = os.path.join(tempfile.gettempdir(), file_hash)
63
+ os.makedirs(chunk_dir, exist_ok=True)
64
+ chunk_path = os.path.join(chunk_dir, f"{chunk_index:04d}")
65
+ file.save(chunk_path)
66
+
67
+ return jsonify({'status': 'success', 'received': chunk_index})
68
+
69
+ except Exception as e:
70
+ return jsonify({'error': str(e)}), 500
71
 
72
+ @app.route('/process-file', methods=['POST'])
73
+ def process_file():
74
+ try:
75
+ file_hash = request.json['fileHash']
76
+ target_language = request.json['language']
77
+ chunk_dir = os.path.join(tempfile.gettempdir(), file_hash)
78
 
79
+ # Reassemble file
80
+ final_path = os.path.join(tempfile.gettempdir(), file_hash + ".wav")
81
+ with open(final_path, 'wb') as output_file:
82
+ for chunk_name in sorted(os.listdir(chunk_dir)):
83
+ with open(os.path.join(chunk_dir, chunk_name), 'rb') as chunk_file:
84
+ output_file.write(chunk_file.read())
85
+
86
+ # Process file
87
+ result = process_audio(final_path, target_language)
88
+
89
+ # Cleanup
90
+ os.remove(final_path)
91
+ for f in os.listdir(chunk_dir):
92
+ os.remove(os.path.join(chunk_dir, f))
93
+ os.rmdir(chunk_dir)
94
+
95
+ return jsonify(result)
96
+
97
+ except Exception as e:
98
+ return jsonify({'error': str(e)}), 500
99
 
100
+ def process_audio(file_path, target_language):
101
+ # Transcribe using Gemini
102
+ model = genai.GenerativeModel("gemini-2.0-flash-lite")
103
+ uploaded_file = genai.upload_file(path=file_path)
104
+
105
+ try:
106
+ response = model.generate_content(["Transcribe this audio file:", uploaded_file])
107
  transcription = response.text.strip()
108
+
109
+ # Translate
110
+ prompt = f"Translate to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
111
  response = model.generate_content(prompt)
112
  translated_text = response.text.strip()
113
 
 
130
  else:
131
  raise ValueError("No audio generated by Kokoro")
132
  else:
133
+ # Fallback to gTTS
134
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
135
  tts = gTTS(translated_text, lang=lang_code)
136
  _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
137
  tts.save(temp_output_path)
138
 
139
+ return {
140
  'transcription': transcription,
141
  'translation': translated_text,
142
  'audio_url': f'/download/{os.path.basename(temp_output_path)}'
143
+ }
144
+
145
+ finally:
146
+ uploaded_file.delete()
 
147
 
148
  @app.route('/download/<filename>')
149
  def download_file(filename):