|
from langdetect import detect_langs |
|
|
|
def detect_language(text): |
|
"""Detect the language of input text with confidence scores and robust native mapping.""" |
|
try: |
|
if len(text) < 10: |
|
return [("English", 1.0, "English")] |
|
|
|
|
|
lang_detections = detect_langs(text) |
|
|
|
|
|
native_lang_map = { |
|
"en": ("English", "English"), |
|
"fr": ("Français", "French"), |
|
"es": ("Español", "Spanish"), |
|
"de": ("Deutsch", "German"), |
|
"hi": ("हिन्दी", "Hindi"), |
|
"zh": ("中文", "Chinese"), |
|
"ar": ("العربية", "Arabic"), |
|
"ru": ("Русский", "Russian"), |
|
"ja": ("日本語", "Japanese"), |
|
} |
|
|
|
detected_options = [] |
|
for lang in lang_detections: |
|
lang_code = lang.lang |
|
confidence = lang.prob |
|
if confidence >= 0.7: |
|
native_name, mapped_lang = native_lang_map.get(lang_code, ("Unknown", "English")) |
|
detected_options.append((mapped_lang, confidence, native_name)) |
|
|
|
|
|
if not detected_options: |
|
detected_options = [("English", 0.5, "English")] |
|
return [(lang, conf, native) for lang, conf, native in detected_options[:3]] |
|
|
|
except Exception as e: |
|
st.error(f"Language detection failed: {str(e)}") |
|
return [("English", 0.5, "English")] |