Athspi commited on
Commit
c07d698
·
verified ·
1 Parent(s): 073ce19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -51
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import os
2
- import tempfile
3
  import base64
4
- from flask import Flask, request, jsonify, send_file, send_from_directory
5
  import google.generativeai as genai
6
- from google.generativeai.types import Content, Part, GenerateContentConfig
7
  from gtts import gTTS, lang
 
 
8
  from kokoro import KPipeline
9
  from werkzeug.utils import secure_filename
10
  from flask_cors import CORS
@@ -16,8 +16,6 @@ CORS(app)
16
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
17
  if not GEMINI_API_KEY:
18
  raise ValueError("GEMINI_API_KEY environment variable not set")
19
-
20
- # Initialize Gemini client
21
  genai.configure(api_key=GEMINI_API_KEY)
22
 
23
  # Language configurations
@@ -61,59 +59,32 @@ def translate_audio():
61
  temp_input_path = os.path.join(tempfile.gettempdir(), filename)
62
  audio_file.save(temp_input_path)
63
 
64
- # Transcribe audio using Gemini
65
  with open(temp_input_path, "rb") as audio_file:
66
  audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
67
 
68
- # Upload file to Gemini
69
- uploaded_file = genai.upload_file(path=temp_input_path)
 
 
70
 
71
- # Generate transcription
72
- transcription = ""
73
- response = genai.generate_content(
74
- model="gemini-2.0-flash-lite",
75
- contents=[
76
- Content(
77
- role="user",
78
- parts=[
79
- Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
80
- Part.from_text(text="Transcript the audio and provide only the text. Do not include any explanations or additional information."),
81
- ],
82
- ),
83
- ],
84
- config=GenerateContentConfig(
85
- temperature=1,
86
- top_p=0.95,
87
- top_k=40,
88
- max_output_tokens=8192,
89
- response_mime_type="text/plain",
90
- ),
91
  )
92
- transcription = response.text
93
 
94
- # Translate text using Gemini
95
- translate_prompt = f"Translate the following text to {target_language} and return only the translated text with no additional explanation or commentary:\n\n{transcription}"
 
96
 
97
- translated_text = ""
98
- response = genai.generate_content(
99
- model="gemini-2.0-flash-lite",
100
- contents=[
101
- Content(
102
- role="user",
103
- parts=[
104
- Part.from_text(text=translate_prompt),
105
- ],
106
- ),
107
- ],
108
- config=GenerateContentConfig(
109
- temperature=1,
110
- top_p=0.95,
111
- top_k=40,
112
- max_output_tokens=8192,
113
- response_mime_type="text/plain",
114
- ),
115
- )
116
- translated_text = response.text
117
 
118
  # Generate TTS
119
  if target_language in KOKORO_LANGUAGES:
 
1
  import os
 
2
  import base64
3
+ from flask import Flask, request, jsonify, send_file
4
  import google.generativeai as genai
 
5
  from gtts import gTTS, lang
6
+ import tempfile
7
+ import soundfile as sf
8
  from kokoro import KPipeline
9
  from werkzeug.utils import secure_filename
10
  from flask_cors import CORS
 
16
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
17
  if not GEMINI_API_KEY:
18
  raise ValueError("GEMINI_API_KEY environment variable not set")
 
 
19
  genai.configure(api_key=GEMINI_API_KEY)
20
 
21
  # Language configurations
 
59
  temp_input_path = os.path.join(tempfile.gettempdir(), filename)
60
  audio_file.save(temp_input_path)
61
 
62
+ # Read audio file as base64
63
  with open(temp_input_path, "rb") as audio_file:
64
  audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
65
 
66
+ # Transcribe with Gemini
67
+ model = genai.GenerativeModel("gemini-1.5-pro-latest")
68
+ prompt = """Accurately transcribe this audio file. Return only the raw text without any formatting,
69
+ punctuation, or additional commentary. Preserve the original language and meaning."""
70
 
71
+ response = model.generate_content(
72
+ [
73
+ prompt,
74
+ {
75
+ "mime_type": "audio/" + filename.split('.')[-1],
76
+ "data": audio_data
77
+ }
78
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
79
  )
80
+ transcription = response.text.strip()
81
 
82
+ # Translate with Gemini
83
+ translate_prompt = f"""Translate this text to {target_language} preserving exact meaning and cultural nuances.
84
+ Return only the translated text without any explanations or formatting: {transcription}"""
85
 
86
+ translated_response = model.generate_content(translate_prompt)
87
+ translated_text = translated_response.text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  # Generate TTS
90
  if target_language in KOKORO_LANGUAGES: