DHEIVER's picture
Update app.py
6418068 verified
raw
history blame
6.81 kB
import gradio as gr
import torch
import torchaudio
import numpy as np
from transformers import AutoProcessor, SeamlessM4Tv2Model
from datetime import datetime
class SeamlessTranslator:
def __init__(self, model_name: str = "facebook/seamless-m4t-v2-large"):
self.processor = AutoProcessor.from_pretrained(model_name)
self.model = SeamlessM4Tv2Model.from_pretrained(model_name)
self.sample_rate = self.model.config.sampling_rate
self.language_codes = {
"English": "eng",
"Spanish": "spa",
"French": "fra",
"German": "deu",
"Italian": "ita",
"Portuguese": "por",
"Russian": "rus",
"Chinese": "cmn",
"Japanese": "jpn"
}
def translate(self, text: str, src_lang: str, tgt_lang: str) -> tuple[int, np.ndarray]:
try:
inputs = self.processor(text=text, src_lang=self.language_codes[src_lang], return_tensors="pt")
audio_array = self.model.generate(**inputs, tgt_lang=self.language_codes[tgt_lang])[0].cpu().numpy().squeeze()
return self.sample_rate, audio_array
except Exception as e:
raise gr.Error(f"Translation failed: {str(e)}")
# Custom CSS for Jarvis theme
css = """
#jarvis-interface {
background-color: black !important;
background-image: radial-gradient(circle at center, #00303030 0%, #00000080 100%);
min-height: 100vh;
font-family: 'Courier New', monospace;
}
#status-ring {
width: 300px;
height: 300px;
border: 4px solid #00ffff;
border-radius: 50%;
margin: 20px auto;
position: relative;
animation: pulse 2s infinite;
display: flex;
align-items: center;
justify-content: center;
}
@keyframes pulse {
0% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0.4); }
70% { box-shadow: 0 0 0 20px rgba(0, 255, 255, 0); }
100% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0); }
}
#inner-ring {
width: 200px;
height: 200px;
border: 2px solid #00ffff;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
}
#core {
width: 100px;
height: 100px;
border: 3px solid #00ffff;
border-radius: 50%;
background-color: black;
display: flex;
align-items: center;
justify-content: center;
color: #00ffff;
text-align: center;
padding: 10px;
}
.jarvis-textbox {
background-color: black !important;
border: 2px solid #00ffff !important;
color: #00ffff !important;
font-family: 'Courier New', monospace !important;
}
.jarvis-button {
background-color: transparent !important;
border: 2px solid #00ffff !important;
color: #00ffff !important;
font-family: 'Courier New', monospace !important;
}
.jarvis-button:hover {
background-color: rgba(0, 255, 255, 0.1) !important;
}
.status-box {
background-color: black !important;
border: 2px solid #00ffff !important;
color: #00ffff !important;
padding: 10px !important;
border-radius: 5px !important;
margin: 5px !important;
text-align: center !important;
}
"""
def create_interface():
translator = SeamlessTranslator()
def translate_text(text: str, src_lang: str, tgt_lang: str, progress=gr.Progress()):
progress(0, desc="Initializing...")
progress(0.3, desc="Processing text...")
sample_rate, audio = translator.translate(text, src_lang, tgt_lang)
progress(0.7, desc="Generating audio...")
progress(1.0, desc="Complete!")
return audio
with gr.Blocks(css=css, title="J.A.R.V.I.S Translator") as demo:
gr.Markdown(
"""
# J.A.R.V.I.S TRANSLATION SYSTEM
### Powered by SeamlessM4T
"""
)
# Jarvis interface container
with gr.Column(elem_id="jarvis-interface"):
# Status Ring
gr.HTML("""
<div id="status-ring">
<div id="inner-ring">
<div id="core">
<div>JARVIS</div>
<div>ACTIVE</div>
</div>
</div>
</div>
""")
# Input controls
with gr.Row():
text_input = gr.Textbox(
label="Command Input",
placeholder="Enter text to translate...",
elem_classes=["jarvis-textbox"],
lines=3
)
with gr.Row():
src_lang = gr.Dropdown(
choices=list(translator.language_codes.keys()),
value="English",
label="Source Language",
elem_classes=["jarvis-textbox"]
)
tgt_lang = gr.Dropdown(
choices=list(translator.language_codes.keys()),
value="Spanish",
label="Target Language",
elem_classes=["jarvis-textbox"]
)
translate_btn = gr.Button(
"▶ EXECUTE TRANSLATION",
elem_classes=["jarvis-button"]
)
# Output audio
audio_output = gr.Audio(
label="Translated Speech",
type="numpy"
)
# Status boxes
with gr.Row():
with gr.Column():
gr.Markdown(
"""
<div class="status-box">
SYSTEM STATUS<br>
<strong>ACTIVE</strong>
</div>
"""
)
with gr.Column():
gr.Markdown(
"""
<div class="status-box">
AUDIO SYSTEM<br>
<strong>READY</strong>
</div>
"""
)
with gr.Column():
gr.Markdown(
"""
<div class="status-box">
TRANSLATION<br>
<strong>ONLINE</strong>
</div>
"""
)
# Event handler
translate_btn.click(
fn=translate_text,
inputs=[text_input, src_lang, tgt_lang],
outputs=audio_output
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.queue()
demo.launch()