Update translation.py
Browse files- translation.py +20 -3
translation.py
CHANGED
@@ -30,14 +30,18 @@ def translate(text, source_lang, target_lang):
|
|
30 |
if not text:
|
31 |
return "Please provide text to translate."
|
32 |
|
33 |
-
src_code = LANGUAGES.get(source_lang)
|
34 |
-
tgt_code = LANGUAGES.get(target_lang)
|
|
|
|
|
|
|
|
|
35 |
|
36 |
# Use preloaded model if en-fr, else load dynamically
|
37 |
if src_code == "en" and tgt_code == "fr":
|
38 |
tokenizer, model = DEFAULT_TOKENIZER, DEFAULT_MODEL
|
39 |
else:
|
40 |
-
tokenizer, model = load_model(src_code, tgt_code)
|
41 |
|
42 |
# Perform translation
|
43 |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=400)
|
@@ -55,4 +59,17 @@ LANGUAGES = {
|
|
55 |
"Russian": "ru",
|
56 |
"Hindi": "hi",
|
57 |
"Japanese": "ja"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
}
|
|
|
30 |
if not text:
|
31 |
return "Please provide text to translate."
|
32 |
|
33 |
+
src_code = LANGUAGES.get(source_lang, "en")
|
34 |
+
tgt_code = LANGUAGES.get(target_lang, "fr")
|
35 |
+
|
36 |
+
# Check if the language pair is supported
|
37 |
+
if tgt_code not in SUPPORTED_PAIRS.get(src_code, []):
|
38 |
+
raise Exception(f"Translation from {source_lang} to {target_lang} is not supported. Supported pairs: {SUPPORTED_PAIRS.get(src_code, [])}")
|
39 |
|
40 |
# Use preloaded model if en-fr, else load dynamically
|
41 |
if src_code == "en" and tgt_code == "fr":
|
42 |
tokenizer, model = DEFAULT_TOKENIZER, DEFAULT_MODEL
|
43 |
else:
|
44 |
+
tokenizer, model = load_model(src_code, tgt_code)
|
45 |
|
46 |
# Perform translation
|
47 |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=400)
|
|
|
59 |
"Russian": "ru",
|
60 |
"Hindi": "hi",
|
61 |
"Japanese": "ja"
|
62 |
+
}
|
63 |
+
|
64 |
+
# Dictionary of supported source-target pairs (based on available MarianMT models)
|
65 |
+
SUPPORTED_PAIRS = {
|
66 |
+
"en": ["fr", "es", "de", "zh", "ru"], # English to French, Spanish, German, Chinese, Russian
|
67 |
+
"fr": ["en"], # French to English (limited support)
|
68 |
+
"es": ["en"], # Spanish to English
|
69 |
+
"de": ["en"], # German to English
|
70 |
+
"zh": ["en"], # Chinese to English
|
71 |
+
"ru": ["en"], # Russian to English
|
72 |
+
"hi": [], # Hindi not supported as source
|
73 |
+
"ar": [], # Arabic not supported as source
|
74 |
+
"ja": [], # Japanese not supported as source
|
75 |
}
|