Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import os
|
|
|
|
|
2 |
from flask import Flask, request, jsonify, send_file, send_from_directory
|
3 |
-
from
|
4 |
-
|
5 |
from gtts import gTTS, lang
|
6 |
-
import tempfile
|
7 |
-
import soundfile as sf
|
8 |
from kokoro import KPipeline
|
9 |
from werkzeug.utils import secure_filename
|
10 |
from flask_cors import CORS
|
@@ -16,14 +16,9 @@ CORS(app)
|
|
16 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
17 |
if not GEMINI_API_KEY:
|
18 |
raise ValueError("GEMINI_API_KEY environment variable not set")
|
19 |
-
genai.configure(api_key=GEMINI_API_KEY)
|
20 |
|
21 |
-
# Initialize
|
22 |
-
|
23 |
-
try:
|
24 |
-
whisper_model = WhisperModel(model_size, device="auto", compute_type="float16")
|
25 |
-
except ValueError:
|
26 |
-
whisper_model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
27 |
|
28 |
# Language configurations
|
29 |
KOKORO_LANGUAGES = {
|
@@ -66,15 +61,60 @@ def translate_audio():
|
|
66 |
temp_input_path = os.path.join(tempfile.gettempdir(), filename)
|
67 |
audio_file.save(temp_input_path)
|
68 |
|
69 |
-
# Transcribe audio
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
78 |
|
79 |
# Generate TTS
|
80 |
if target_language in KOKORO_LANGUAGES:
|
|
|
1 |
import os
|
2 |
+
import tempfile
|
3 |
+
import base64
|
4 |
from flask import Flask, request, jsonify, send_file, send_from_directory
|
5 |
+
from google import genai
|
6 |
+
from google.genai import types
|
7 |
from gtts import gTTS, lang
|
|
|
|
|
8 |
from kokoro import KPipeline
|
9 |
from werkzeug.utils import secure_filename
|
10 |
from flask_cors import CORS
|
|
|
16 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
17 |
if not GEMINI_API_KEY:
|
18 |
raise ValueError("GEMINI_API_KEY environment variable not set")
|
|
|
19 |
|
20 |
+
# Initialize Gemini client
|
21 |
+
client = genai.Client(api_key=GEMINI_API_KEY)
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# Language configurations
|
24 |
KOKORO_LANGUAGES = {
|
|
|
61 |
temp_input_path = os.path.join(tempfile.gettempdir(), filename)
|
62 |
audio_file.save(temp_input_path)
|
63 |
|
64 |
+
# Transcribe audio using Gemini
|
65 |
+
with open(temp_input_path, "rb") as audio_file:
|
66 |
+
audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
|
67 |
+
|
68 |
+
files = [client.files.upload(file=temp_input_path)]
|
69 |
+
|
70 |
+
contents = [
|
71 |
+
types.Content(
|
72 |
+
role="user",
|
73 |
+
parts=[
|
74 |
+
types.Part.from_uri(
|
75 |
+
file_uri=files[0].uri,
|
76 |
+
mime_type=files[0].mime_type,
|
77 |
+
),
|
78 |
+
types.Part.from_text(text="Transcript the audio and provide only the text. Do not include any explanations or additional information."),
|
79 |
+
],
|
80 |
+
),
|
81 |
+
]
|
82 |
+
|
83 |
+
generate_content_config = types.GenerateContentConfig(
|
84 |
+
temperature=1,
|
85 |
+
top_p=0.95,
|
86 |
+
top_k=40,
|
87 |
+
max_output_tokens=8192,
|
88 |
+
response_mime_type="text/plain",
|
89 |
+
)
|
90 |
+
|
91 |
+
transcription = ""
|
92 |
+
for chunk in client.models.generate_content_stream(
|
93 |
+
model="gemini-2.0-flash-lite",
|
94 |
+
contents=contents,
|
95 |
+
config=generate_content_config,
|
96 |
+
):
|
97 |
+
transcription += chunk.text
|
98 |
+
|
99 |
+
# Translate text using Gemini
|
100 |
+
translate_prompt = f"Translate the following text to {target_language} and return only the translated text with no additional explanation or commentary:\n\n{transcription}"
|
101 |
+
|
102 |
+
translate_contents = [
|
103 |
+
types.Content(
|
104 |
+
role="user",
|
105 |
+
parts=[
|
106 |
+
types.Part.from_text(text=translate_prompt),
|
107 |
+
],
|
108 |
+
),
|
109 |
+
]
|
110 |
|
111 |
+
translated_text = ""
|
112 |
+
for chunk in client.models.generate_content_stream(
|
113 |
+
model="gemini-2.0-flash-lite",
|
114 |
+
contents=translate_contents,
|
115 |
+
config=generate_content_config,
|
116 |
+
):
|
117 |
+
translated_text += chunk.text
|
118 |
|
119 |
# Generate TTS
|
120 |
if target_language in KOKORO_LANGUAGES:
|