Krishna086 commited on
Commit
02a14aa
·
verified ·
1 Parent(s): a977cc2

Update translation.py

Browse files
Files changed (1) hide show
  1. translation.py +13 -38
translation.py CHANGED
@@ -1,58 +1,33 @@
1
  import streamlit as st
2
  from transformers import MarianTokenizer, MarianMTModel
3
 
4
- # Preload default model for English to French
5
  @st.cache_resource
6
  def _load_default_model():
7
- """Load default MarianMT model (en-fr)."""
8
  model_name = "Helsinki-NLP/opus-mt-en-fr"
9
- tokenizer = MarianTokenizer.from_pretrained(model_name)
10
- model = MarianMTModel.from_pretrained(model_name)
11
- return tokenizer, model
12
 
13
- # Cache other models dynamically
14
  @st.cache_resource
15
  def load_model(src_lang, tgt_lang):
16
- """Load the MarianMT model and tokenizer for a language pair."""
17
- model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
18
  try:
19
- tokenizer = MarianTokenizer.from_pretrained(model_name)
20
- model = MarianMTModel.from_pretrained(model_name)
21
- return tokenizer, model
22
- except Exception as e:
23
- st.warning(f"Model for {src_lang} to {tgt_lang} not available. Falling back to en-fr.")
24
- return _load_default_model() # Fallback to preloaded en-fr model
25
 
26
- # Preload default model globally
27
  DEFAULT_TOKENIZER, DEFAULT_MODEL = _load_default_model()
28
 
29
  def translate(text, source_lang, target_lang):
30
- """Translate text from source to target language."""
31
  if not text:
32
- return "Please provide text to translate."
33
-
34
- src_code = LANGUAGES.get(source_lang, "en")
35
- tgt_code = LANGUAGES.get(target_lang, "fr")
36
-
37
- # Attempt to load the specific model, fall back to en-fr if it fails
38
  tokenizer, model = load_model(src_code, tgt_code)
39
-
40
- # Perform translation
41
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=400)
42
  translated = model.generate(**inputs)
43
  return tokenizer.decode(translated[0], skip_special_tokens=True)
44
 
45
- # Dictionary of supported languages with MarianMT codes
46
- LANGUAGES = {
47
- "English": "en",
48
- "French": "fr",
49
- "Spanish": "es",
50
- "German": "de",
51
- "Chinese": "zh",
52
- "Arabic": "ar",
53
- "Russian": "ru",
54
- "Hindi": "hi",
55
- "Japanese": "ja"
56
- }
57
-
58
- # Removed SUPPORTED_PAIRS to revert to original behavior
 
1
  import streamlit as st
2
  from transformers import MarianTokenizer, MarianMTModel
3
 
 
4
  @st.cache_resource
5
  def _load_default_model():
 
6
  model_name = "Helsinki-NLP/opus-mt-en-fr"
7
+ return MarianTokenizer.from_pretrained(model_name), MarianMTModel.from_pretrained(model_name)
 
 
8
 
 
9
  @st.cache_resource
10
  def load_model(src_lang, tgt_lang):
 
 
11
  try:
12
+ model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
13
+ return MarianTokenizer.from_pretrained(model_name), MarianMTModel.from_pretrained(model_name)
14
+ except:
15
+ st.warning(f"No model for {src_lang} to {tgt_lang}. Using en-fr.")
16
+ return _load_default_model()
 
17
 
 
18
  DEFAULT_TOKENIZER, DEFAULT_MODEL = _load_default_model()
19
 
20
  def translate(text, source_lang, target_lang):
 
21
  if not text:
22
+ return "No text provided."
23
+ src_code = {"English": "en", "French": "fr", "Spanish": "es", "German": "de",
24
+ "Hindi": "hi", "Chinese": "zh", "Arabic": "ar", "Russian": "ru", "Japanese": "ja"}.get(source_lang, "en")
25
+ tgt_code = {"English": "en", "French": "fr", "Spanish": "es", "German": "de",
26
+ "Hindi": "hi", "Chinese": "zh", "Arabic": "ar", "Russian": "ru", "Japanese": "ja"}.get(target_lang, "fr")
 
27
  tokenizer, model = load_model(src_code, tgt_code)
 
 
28
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=400)
29
  translated = model.generate(**inputs)
30
  return tokenizer.decode(translated[0], skip_special_tokens=True)
31
 
32
+ LANGUAGES = {"English": "en", "French": "fr", "Spanish": "es", "German": "de",
33
+ "Hindi": "hi", "Chinese": "zh", "Arabic": "ar", "Russian": "ru", "Japanese": "ja"}