Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import os
|
2 |
-
import tempfile
|
3 |
import base64
|
4 |
-
from flask import Flask, request, jsonify, send_file
|
5 |
import google.generativeai as genai
|
6 |
-
from google.generativeai.types import Content, Part, GenerateContentConfig
|
7 |
from gtts import gTTS, lang
|
|
|
|
|
8 |
from kokoro import KPipeline
|
9 |
from werkzeug.utils import secure_filename
|
10 |
from flask_cors import CORS
|
@@ -16,8 +16,6 @@ CORS(app)
|
|
16 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
17 |
if not GEMINI_API_KEY:
|
18 |
raise ValueError("GEMINI_API_KEY environment variable not set")
|
19 |
-
|
20 |
-
# Initialize Gemini client
|
21 |
genai.configure(api_key=GEMINI_API_KEY)
|
22 |
|
23 |
# Language configurations
|
@@ -61,59 +59,32 @@ def translate_audio():
|
|
61 |
temp_input_path = os.path.join(tempfile.gettempdir(), filename)
|
62 |
audio_file.save(temp_input_path)
|
63 |
|
64 |
-
#
|
65 |
with open(temp_input_path, "rb") as audio_file:
|
66 |
audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
|
67 |
|
68 |
-
#
|
69 |
-
|
|
|
|
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
|
80 |
-
Part.from_text(text="Transcript the audio and provide only the text. Do not include any explanations or additional information."),
|
81 |
-
],
|
82 |
-
),
|
83 |
-
],
|
84 |
-
config=GenerateContentConfig(
|
85 |
-
temperature=1,
|
86 |
-
top_p=0.95,
|
87 |
-
top_k=40,
|
88 |
-
max_output_tokens=8192,
|
89 |
-
response_mime_type="text/plain",
|
90 |
-
),
|
91 |
)
|
92 |
-
transcription = response.text
|
93 |
|
94 |
-
# Translate
|
95 |
-
translate_prompt = f"Translate
|
|
|
96 |
|
97 |
-
|
98 |
-
|
99 |
-
model="gemini-2.0-flash-lite",
|
100 |
-
contents=[
|
101 |
-
Content(
|
102 |
-
role="user",
|
103 |
-
parts=[
|
104 |
-
Part.from_text(text=translate_prompt),
|
105 |
-
],
|
106 |
-
),
|
107 |
-
],
|
108 |
-
config=GenerateContentConfig(
|
109 |
-
temperature=1,
|
110 |
-
top_p=0.95,
|
111 |
-
top_k=40,
|
112 |
-
max_output_tokens=8192,
|
113 |
-
response_mime_type="text/plain",
|
114 |
-
),
|
115 |
-
)
|
116 |
-
translated_text = response.text
|
117 |
|
118 |
# Generate TTS
|
119 |
if target_language in KOKORO_LANGUAGES:
|
|
|
1 |
import os
|
|
|
2 |
import base64
|
3 |
+
from flask import Flask, request, jsonify, send_file
|
4 |
import google.generativeai as genai
|
|
|
5 |
from gtts import gTTS, lang
|
6 |
+
import tempfile
|
7 |
+
import soundfile as sf
|
8 |
from kokoro import KPipeline
|
9 |
from werkzeug.utils import secure_filename
|
10 |
from flask_cors import CORS
|
|
|
16 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
17 |
if not GEMINI_API_KEY:
|
18 |
raise ValueError("GEMINI_API_KEY environment variable not set")
|
|
|
|
|
19 |
genai.configure(api_key=GEMINI_API_KEY)
|
20 |
|
21 |
# Language configurations
|
|
|
59 |
temp_input_path = os.path.join(tempfile.gettempdir(), filename)
|
60 |
audio_file.save(temp_input_path)
|
61 |
|
62 |
+
# Read audio file as base64
|
63 |
with open(temp_input_path, "rb") as audio_file:
|
64 |
audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
|
65 |
|
66 |
+
# Transcribe with Gemini
|
67 |
+
model = genai.GenerativeModel("gemini-1.5-pro-latest")
|
68 |
+
prompt = """Accurately transcribe this audio file. Return only the raw text without any formatting,
|
69 |
+
punctuation, or additional commentary. Preserve the original language and meaning."""
|
70 |
|
71 |
+
response = model.generate_content(
|
72 |
+
[
|
73 |
+
prompt,
|
74 |
+
{
|
75 |
+
"mime_type": "audio/" + filename.split('.')[-1],
|
76 |
+
"data": audio_data
|
77 |
+
}
|
78 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
)
|
80 |
+
transcription = response.text.strip()
|
81 |
|
82 |
+
# Translate with Gemini
|
83 |
+
translate_prompt = f"""Translate this text to {target_language} preserving exact meaning and cultural nuances.
|
84 |
+
Return only the translated text without any explanations or formatting: {transcription}"""
|
85 |
|
86 |
+
translated_response = model.generate_content(translate_prompt)
|
87 |
+
translated_text = translated_response.text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
# Generate TTS
|
90 |
if target_language in KOKORO_LANGUAGES:
|