Krishna086's picture
Update lang_detect.py
b937c3e verified
raw
history blame
1.77 kB
from langdetect import detect_langs
def detect_language(text):
"""Detect the language of input text with confidence scores and robust native mapping."""
try:
if len(text) < 10: # Minimum length for reliable detection
return [("English", 1.0, "English")]
# Get list of detected languages with confidence scores
lang_detections = detect_langs(text)
# Mapping of detected codes to native language names and supported languages
native_lang_map = {
"en": ("English", "English"),
"fr": ("Français", "French"),
"es": ("Español", "Spanish"),
"de": ("Deutsch", "German"),
"hi": ("हिन्दी", "Hindi"),
"zh": ("中文", "Chinese"),
"ar": ("العربية", "Arabic"),
"ru": ("Русский", "Russian"),
"ja": ("日本語", "Japanese"),
}
detected_options = []
for lang in lang_detections:
lang_code = lang.lang
confidence = lang.prob
if confidence >= 0.7: # Confidence threshold
native_name, mapped_lang = native_lang_map.get(lang_code, ("Unknown", "English"))
detected_options.append((mapped_lang, confidence, native_name))
# Return top options or default to English if none meet threshold
if not detected_options:
detected_options = [("English", 0.5, "English")] # Low confidence fallback
return [(lang, conf, native) for lang, conf, native in detected_options[:3]] # Return top 3 options
except Exception as e:
st.error(f"Language detection failed: {str(e)}")
return [("English", 0.5, "English")] # Fallback with low confidence