Krishna086 commited on
Commit
b937c3e
·
verified ·
1 Parent(s): 7377987

Update lang_detect.py

Browse files
Files changed (1) hide show
  1. lang_detect.py +36 -17
lang_detect.py CHANGED
@@ -1,21 +1,40 @@
1
- from langdetect import detect
2
 
3
  def detect_language(text):
4
- """Detect the language of input text."""
5
  try:
6
- lang_code = detect(text)
7
- # Map langdetect codes to our LANGUAGES
8
- lang_map = {
9
- "English": "en",
10
- "French": "fr",
11
- "Spanish": "es",
12
- "German": "de",
13
- "Chinese": "zh",
14
- "Arabic": "ar",
15
- "Russian": "ru",
16
- "Hindi": "hi",
17
- "Japanese": "ja"
 
 
 
 
 
18
  }
19
- return lang_map.get(lang_code, "English") # Default to English if not mapped
20
- except:
21
- return "English" # Fallback if detection fails
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langdetect import detect_langs
2
 
3
  def detect_language(text):
4
+ """Detect the language of input text with confidence scores and robust native mapping."""
5
  try:
6
+ if len(text) < 10: # Minimum length for reliable detection
7
+ return [("English", 1.0, "English")]
8
+
9
+ # Get list of detected languages with confidence scores
10
+ lang_detections = detect_langs(text)
11
+
12
+ # Mapping of detected codes to native language names and supported languages
13
+ native_lang_map = {
14
+ "en": ("English", "English"),
15
+ "fr": ("Français", "French"),
16
+ "es": ("Español", "Spanish"),
17
+ "de": ("Deutsch", "German"),
18
+ "hi": ("हिन्दी", "Hindi"),
19
+ "zh": ("中文", "Chinese"),
20
+ "ar": ("العربية", "Arabic"),
21
+ "ru": ("Русский", "Russian"),
22
+ "ja": ("日本語", "Japanese"),
23
  }
24
+
25
+ detected_options = []
26
+ for lang in lang_detections:
27
+ lang_code = lang.lang
28
+ confidence = lang.prob
29
+ if confidence >= 0.7: # Confidence threshold
30
+ native_name, mapped_lang = native_lang_map.get(lang_code, ("Unknown", "English"))
31
+ detected_options.append((mapped_lang, confidence, native_name))
32
+
33
+ # Return top options or default to English if none meet threshold
34
+ if not detected_options:
35
+ detected_options = [("English", 0.5, "English")] # Low confidence fallback
36
+ return [(lang, conf, native) for lang, conf, native in detected_options[:3]] # Return top 3 options
37
+
38
+ except Exception as e:
39
+ st.error(f"Language detection failed: {str(e)}")
40
+ return [("English", 0.5, "English")] # Fallback with low confidence