|
import streamlit as st |
|
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer |
|
import speech_recognition as sr |
|
from gtts import gTTS |
|
from pydub import AudioSegment |
|
import os |
|
import tempfile |
|
|
|
|
|
@st.cache_resource |
|
def load_m2m_model(): |
|
model_name = "facebook/m2m100_418M" |
|
model = M2M100ForConditionalGeneration.from_pretrained(model_name) |
|
tokenizer = M2M100Tokenizer.from_pretrained(model_name) |
|
return model, tokenizer |
|
|
|
model, tokenizer = load_m2m_model() |
|
|
|
|
|
language_codes = { |
|
"English": "en", |
|
"Hindi": "hi", |
|
"Marathi": "mr", |
|
"Chinese": "zh", |
|
"Japanese": "ja", |
|
"Spanish": "es", |
|
"Korean": "ko" |
|
} |
|
|
|
|
|
st.markdown("<h1 style='text-align: center;'>π PolyglotPal π</h1>", unsafe_allow_html=True) |
|
st.markdown("<h5 style='text-align: center;'>Your friendly multilingual assistant</h5>", unsafe_allow_html=True) |
|
st.sidebar.header("Language Translation Options") |
|
|
|
|
|
input_language = st.sidebar.selectbox("Select Input Language", list(language_codes.keys())) |
|
|
|
|
|
target_language = st.sidebar.selectbox( |
|
"Select Target Language", |
|
[lang for lang in language_codes.keys() if lang != input_language] |
|
) |
|
|
|
|
|
input_method = st.radio("Select Input Method", ["Text Input", "Speech Input"]) |
|
|
|
if input_method == "Speech Input": |
|
st.subheader("Upload Audio File for Translation") |
|
audio_file = st.file_uploader("Upload an MP3 file", type=["mp3"]) |
|
|
|
|
|
input_text = "" |
|
if input_method == "Text Input": |
|
st.subheader(f"Input Text ({input_language})") |
|
input_text = st.text_area("Enter text to translate:", height=150) |
|
elif input_method == "Speech Input" and audio_file: |
|
|
|
try: |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav_file: |
|
audio = AudioSegment.from_file(audio_file, format="mp3") |
|
audio.export(tmp_wav_file.name, format="wav") |
|
|
|
|
|
recognizer = sr.Recognizer() |
|
with sr.AudioFile(tmp_wav_file.name) as source: |
|
audio_data = recognizer.record(source) |
|
input_text = recognizer.recognize_google(audio_data, language=language_codes[input_language]) |
|
st.success(f"Recognized Speech: {input_text}") |
|
except Exception as e: |
|
st.error(f"Error processing audio: {e}") |
|
|
|
if st.button("Translate"): |
|
if input_text.strip(): |
|
try: |
|
|
|
tokenizer.src_lang = language_codes[input_language] |
|
encoded_input = tokenizer(input_text, return_tensors="pt") |
|
|
|
|
|
generated_tokens = model.generate( |
|
**encoded_input, |
|
forced_bos_token_id=tokenizer.lang_code_to_id[language_codes[target_language]] |
|
) |
|
translated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True) |
|
|
|
|
|
st.subheader(f"Translated Text ({target_language})") |
|
st.text_area("Translation Result:", value=translated_text, height=150, disabled=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
except: |
|
|
|
pass |
|
else: |
|
st.error("Please provide text or speech input for translation.") |
|
|