Update lang_detect.py
Browse files- lang_detect.py +36 -17
lang_detect.py
CHANGED
@@ -1,21 +1,40 @@
|
|
1 |
-
from langdetect import
|
2 |
|
3 |
def detect_language(text):
|
4 |
-
"""Detect the language of input text."""
|
5 |
try:
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
"
|
15 |
-
"
|
16 |
-
"
|
17 |
-
"
|
|
|
|
|
|
|
|
|
|
|
18 |
}
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langdetect import detect_langs
|
2 |
|
3 |
def detect_language(text):
|
4 |
+
"""Detect the language of input text with confidence scores and robust native mapping."""
|
5 |
try:
|
6 |
+
if len(text) < 10: # Minimum length for reliable detection
|
7 |
+
return [("English", 1.0, "English")]
|
8 |
+
|
9 |
+
# Get list of detected languages with confidence scores
|
10 |
+
lang_detections = detect_langs(text)
|
11 |
+
|
12 |
+
# Mapping of detected codes to native language names and supported languages
|
13 |
+
native_lang_map = {
|
14 |
+
"en": ("English", "English"),
|
15 |
+
"fr": ("Français", "French"),
|
16 |
+
"es": ("Español", "Spanish"),
|
17 |
+
"de": ("Deutsch", "German"),
|
18 |
+
"hi": ("हिन्दी", "Hindi"),
|
19 |
+
"zh": ("中文", "Chinese"),
|
20 |
+
"ar": ("العربية", "Arabic"),
|
21 |
+
"ru": ("Русский", "Russian"),
|
22 |
+
"ja": ("日本語", "Japanese"),
|
23 |
}
|
24 |
+
|
25 |
+
detected_options = []
|
26 |
+
for lang in lang_detections:
|
27 |
+
lang_code = lang.lang
|
28 |
+
confidence = lang.prob
|
29 |
+
if confidence >= 0.7: # Confidence threshold
|
30 |
+
native_name, mapped_lang = native_lang_map.get(lang_code, ("Unknown", "English"))
|
31 |
+
detected_options.append((mapped_lang, confidence, native_name))
|
32 |
+
|
33 |
+
# Return top options or default to English if none meet threshold
|
34 |
+
if not detected_options:
|
35 |
+
detected_options = [("English", 0.5, "English")] # Low confidence fallback
|
36 |
+
return [(lang, conf, native) for lang, conf, native in detected_options[:3]] # Return top 3 options
|
37 |
+
|
38 |
+
except Exception as e:
|
39 |
+
st.error(f"Language detection failed: {str(e)}")
|
40 |
+
return [("English", 0.5, "English")] # Fallback with low confidence
|