GradioTranslate / app.py
TiberiuCristianLeon's picture
Update app.py
e647eeb verified
raw
history blame contribute delete
7.33 kB
import gradio as gr
import spaces
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, pipeline
import languagecodes
favourite_langs = {"German": "de", "Romanian": "ro", "English": "en", "-----": "-----"}
all_langs = languagecodes.iso_languages
# Language options as list, add favourite languages first
options = list(favourite_langs.keys())
options.extend(list(all_langs.keys()))
models = ["Helsinki-NLP",
"t5-base", "t5-small", "t5-large",
"facebook/nllb-200-distilled-600M",
"facebook/nllb-200-distilled-1.3B",
"facebook/mbart-large-50-many-to-many-mmt",
"utter-project/EuroLLM-1.7B",
"Unbabel/TowerInstruct-7B-v0.2",
"Unbabel/TowerInstruct-Mistral-7B-v0.2"
]
def model_to_cuda(model):
# Move the model to GPU if available
if torch.cuda.is_available():
model = model.to('cuda')
print("CUDA is available! Using GPU.")
else:
print("CUDA not available! Using CPU.")
return model
def eurollm(model_name, sl, tl, input_text):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
prompt = f"{sl}: {input_text} {tl}:"
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=512)
output = tokenizer.decode(outputs[0], skip_special_tokens=True)
result = output.rsplit(f'{tl}:')[-1].strip()
return result
def nllb(model_name, sl, tl, input_text):
tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang=sl)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=sl, tgt_lang=tl)
translated_text = translator(input_text, max_length=512)
return translated_text[0]['translation_text']
@spaces.GPU
def translate_text(input_text, sselected_language, tselected_language, model_name):
sl = all_langs[sselected_language]
tl = all_langs[tselected_language]
message_text = f'Translated from {sselected_language} to {tselected_language} with {model_name}'
print(message_text)
if model_name == "Helsinki-NLP":
try:
model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
except EnvironmentError:
try:
model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
except EnvironmentError as error:
return f"Error finding model: {model_name}! Try other available language combination.", error
if 'eurollm' in model_name.lower():
translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
return translated_text, message_text
if 'nllb' in model_name.lower():
nnlbsl, nnlbtl = languagecodes.nllb_language_codes[sselected_language], languagecodes.nllb_language_codes[tselected_language]
translated_text = nllb(model_name, nnlbsl, nnlbtl, input_text)
return translated_text, message_text
if model_name.startswith('facebook/mbart-large'):
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
# translate source to target
tokenizer.src_lang = languagecodes.mbart_large_languages[sselected_language]
encoded = tokenizer(input_text, return_tensors="pt")
generated_tokens = model.generate(
**encoded,
forced_bos_token_id=tokenizer.lang_code_to_id[languagecodes.mbart_large_languages[tselected_language]]
)
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0], message_text
if 'Unbabel' in model_name:
pipe = pipeline("text-generation", model=model_name, torch_dtype=torch.bfloat16, device_map="auto")
messages = [{"role": "user",
"content": f"Translate the following text from {sselected_language} into {tselected_language}.\n{sselected_language}: {input_text}.\n{tselected_language}:"}]
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
outputs = pipe(prompt, max_new_tokens=256, do_sample=False)
translated_text = outputs[0]["generated_text"]
return translated_text, message_text
if model_name.startswith('t5'):
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto")
if model_name.startswith("Helsinki-NLP"):
prompt = input_text
else:
prompt = f"translate {sselected_language} to {tselected_language}: {input_text}"
input_ids = tokenizer.encode(prompt, return_tensors="pt")
output_ids = model.generate(input_ids, max_length=512)
translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
message_text = f'Translated from {sselected_language} to {tselected_language} with {model_name}'
print(f'Translating from {sselected_language} to {tselected_language} with {model_name}:', f'{input_text} = {translated_text}', sep='\n')
return translated_text, message_text
# Define a function to swap dropdown values
def swap_languages(src_lang, tgt_lang):
return tgt_lang, src_lang
def create_interface():
with gr.Blocks() as interface:
gr.Markdown("### Machine Text Translation")
with gr.Row():
input_text = gr.Textbox(label="Enter text to translate:", placeholder="Type your text here, maximum 512 tokens")
with gr.Row():
sselected_language = gr.Dropdown(choices=options, value = options[0], label="Source language", interactive=True)
tselected_language = gr.Dropdown(choices=options, value = options[1], label="Target language", interactive=True)
swap_button = gr.Button("Swap Languages")
swap_button.click(fn=swap_languages, inputs=[sselected_language, tselected_language], outputs=[sselected_language, tselected_language])
model_name = gr.Dropdown(choices=models, label="Select a model", value = models[4], interactive=True)
translate_button = gr.Button("Translate")
translated_text = gr.Textbox(label="Translated text:", placeholder="Display field for translation", interactive=False, show_copy_button=True)
message_text = gr.Textbox(label="Messages:", placeholder="Display field for status and error messages", interactive=False)
translate_button.click(
translate_text,
inputs=[input_text, sselected_language, tselected_language, model_name],
outputs=[translated_text, message_text]
)
return interface
interface = create_interface()
interface.launch()