Athspi commited on
Commit
6ebed08
·
verified ·
1 Parent(s): 75b45e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -29
app.py CHANGED
@@ -1,13 +1,15 @@
1
  import os
2
- import base64
3
  from flask import Flask, request, jsonify, send_file, send_from_directory
4
  import google.generativeai as genai
5
  from gtts import gTTS, lang
6
  import tempfile
 
 
7
  from werkzeug.utils import secure_filename
8
  from flask_cors import CORS
 
9
 
10
- app = Flask(__name__, static_folder='static', static_url_path='')
11
  CORS(app)
12
 
13
  # Configure Gemini API
@@ -17,8 +19,20 @@ if not GEMINI_API_KEY:
17
  genai.configure(api_key=GEMINI_API_KEY)
18
 
19
  # Language configurations
 
 
 
 
 
 
 
 
 
 
 
 
20
  GTTS_LANGUAGES = lang.tts_langs()
21
- SUPPORTED_LANGUAGES = sorted(GTTS_LANGUAGES.values())
22
 
23
  @app.route('/')
24
  def serve_index():
@@ -44,36 +58,36 @@ def translate_audio():
44
  filename = secure_filename(audio_file.filename)
45
  temp_input_path = os.path.join(tempfile.gettempdir(), filename)
46
  audio_file.save(temp_input_path)
47
-
48
- # Read audio file as base64
 
49
  with open(temp_input_path, "rb") as f:
50
- audio_data = base64.b64encode(f.read()).decode("utf-8")
51
-
52
- # Transcribe with Gemini
53
- model = genai.GenerativeModel("gemini-1.5-pro-latest")
54
- prompt = """Accurately transcribe this audio file. Return only the raw text without formatting."""
55
 
56
- response = model.generate_content(
57
- [
58
- prompt,
59
- {
60
- "mime_type": "audio/" + filename.split('.')[-1],
61
- "data": audio_data
62
- }
63
- ]
64
- )
65
  transcription = response.text.strip()
66
-
67
- # Translate with Gemini
68
- translate_prompt = f"Translate to {target_language} preserving meaning: {transcription}"
69
- translated_response = model.generate_content(translate_prompt)
70
- translated_text = translated_response.text.strip()
71
 
72
  # Generate TTS
73
- lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
74
- tts = gTTS(translated_text, lang=lang_code)
75
- _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
76
- tts.save(temp_output_path)
 
 
 
 
 
 
 
 
 
77
 
78
  return jsonify({
79
  'transcription': transcription,
@@ -82,6 +96,7 @@ def translate_audio():
82
  })
83
 
84
  except Exception as e:
 
85
  return jsonify({'error': str(e)}), 500
86
 
87
  @app.route('/download/<filename>')
@@ -97,4 +112,4 @@ def download_file(filename):
97
  return jsonify({'error': 'File not found'}), 404
98
 
99
  if __name__ == '__main__':
100
- app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 5000)))
 
1
  import os
 
2
  from flask import Flask, request, jsonify, send_file, send_from_directory
3
  import google.generativeai as genai
4
  from gtts import gTTS, lang
5
  import tempfile
6
+ import soundfile as sf
7
+ from kokoro import KPipeline
8
  from werkzeug.utils import secure_filename
9
  from flask_cors import CORS
10
+ import requests
11
 
12
+ app = Flask(__name__, static_folder='static')
13
  CORS(app)
14
 
15
  # Configure Gemini API
 
19
  genai.configure(api_key=GEMINI_API_KEY)
20
 
21
  # Language configurations
22
+ KOKORO_LANGUAGES = {
23
+ "American English": "a",
24
+ "British English": "b",
25
+ "Japanese": "j",
26
+ "Mandarin Chinese": "z",
27
+ "Spanish": "e",
28
+ "French": "f",
29
+ "Hindi": "h",
30
+ "Italian": "i",
31
+ "Brazilian Portuguese": "p"
32
+ }
33
+
34
  GTTS_LANGUAGES = lang.tts_langs()
35
+ SUPPORTED_LANGUAGES = sorted(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values())
36
 
37
  @app.route('/')
38
  def serve_index():
 
58
  filename = secure_filename(audio_file.filename)
59
  temp_input_path = os.path.join(tempfile.gettempdir(), filename)
60
  audio_file.save(temp_input_path)
61
+
62
+ # Transcribe audio using Gemini
63
+ model = genai.GenerativeModel("gemini-2.0-flash")
64
  with open(temp_input_path, "rb") as f:
65
+ audio_data = f.read()
 
 
 
 
66
 
67
+ # Use Gemini to transcribe the audio
68
+ prompt = "Transcribe the following audio file into text. Return only the transcribed text with no additional commentary or explanations."
69
+ response = model.generate_content([prompt, audio_data])
 
 
 
 
 
 
70
  transcription = response.text.strip()
71
+
72
+ # Translate text using Gemini
73
+ prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances:\n\n{transcription}"
74
+ response = model.generate_content(prompt)
75
+ translated_text = response.text.strip()
76
 
77
  # Generate TTS
78
+ if target_language in KOKORO_LANGUAGES:
79
+ lang_code = KOKORO_LANGUAGES[target_language]
80
+ pipeline = KPipeline(lang_code=lang_code)
81
+ generator = pipeline(translated_text, voice="af_heart", speed=1)
82
+ audio_data = next((audio for _, _, audio in generator), None)
83
+ if audio_data:
84
+ _, temp_output_path = tempfile.mkstemp(suffix=".wav")
85
+ sf.write(temp_output_path, audio_data, 24000)
86
+ else:
87
+ lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
88
+ tts = gTTS(translated_text, lang=lang_code)
89
+ _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
90
+ tts.save(temp_output_path)
91
 
92
  return jsonify({
93
  'transcription': transcription,
 
96
  })
97
 
98
  except Exception as e:
99
+ app.logger.error(f"Error processing request: {str(e)}")
100
  return jsonify({'error': str(e)}), 500
101
 
102
  @app.route('/download/<filename>')
 
112
  return jsonify({'error': 'File not found'}), 404
113
 
114
  if __name__ == '__main__':
115
+ app.run(host='0.0.0.0', port=5000, debug=True)