Spaces:
Running
on
Zero
Running
on
Zero
Riddhi Bhagwat
commited on
Commit
·
e5ddfb2
1
Parent(s):
e72de4a
Revert "auto detection of language input"
Browse filesThis reverts commit 4b9fc1415d3348bfb68381cc37ebc89a7ecfb9ad.
- app/.DS_Store +0 -0
- app/app.py +5 -24
- app/lang_model_router.py +0 -35
app/.DS_Store
DELETED
Binary file (6.15 kB)
|
|
app/app.py
CHANGED
@@ -25,7 +25,6 @@ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
|
25 |
import threading
|
26 |
from collections import defaultdict
|
27 |
from datasets import load_dataset
|
28 |
-
from lang_model_router import detect_language_code, get_language_name_and_model
|
29 |
|
30 |
|
31 |
BASE_MODEL = os.getenv("MODEL", "google/gemma-3-12b-pt")
|
@@ -397,29 +396,12 @@ def respond(
|
|
397 |
language: str,
|
398 |
temperature: Optional[float] = None,
|
399 |
seed: Optional[int] = None,
|
400 |
-
auto_detect: bool = True,
|
401 |
) -> list:
|
402 |
-
"""Respond to the user message with system
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
user_input = msg["content"]
|
408 |
-
break
|
409 |
-
|
410 |
-
# Determine language
|
411 |
-
if auto_detect:
|
412 |
-
lang_code = detect_language_code(user_input)
|
413 |
-
language, _ = get_language_name_and_model(lang_code)
|
414 |
-
|
415 |
-
# Load system prompt
|
416 |
-
system_prompt = LANGUAGES.get(language, LANGUAGES["English"])
|
417 |
-
|
418 |
-
# Format message list with system prompt prepended
|
419 |
-
messages = [{"role": "system", "content": system_prompt}]
|
420 |
-
messages.extend(format_history_as_messages(history))
|
421 |
-
|
422 |
-
# Generate response
|
423 |
if ZERO_GPU:
|
424 |
content = call_pipeline(messages)
|
425 |
else:
|
@@ -434,7 +416,6 @@ def respond(
|
|
434 |
)
|
435 |
content = response.choices[0].message.content
|
436 |
|
437 |
-
# Add response to history
|
438 |
message = gr.ChatMessage(role="assistant", content=content)
|
439 |
history.append(message)
|
440 |
return history
|
|
|
25 |
import threading
|
26 |
from collections import defaultdict
|
27 |
from datasets import load_dataset
|
|
|
28 |
|
29 |
|
30 |
BASE_MODEL = os.getenv("MODEL", "google/gemma-3-12b-pt")
|
|
|
396 |
language: str,
|
397 |
temperature: Optional[float] = None,
|
398 |
seed: Optional[int] = None,
|
|
|
399 |
) -> list:
|
400 |
+
"""Respond to the user message with a system message
|
401 |
+
|
402 |
+
Return the history with the new message"""
|
403 |
+
messages = format_history_as_messages(history)
|
404 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
if ZERO_GPU:
|
406 |
content = call_pipeline(messages)
|
407 |
else:
|
|
|
416 |
)
|
417 |
content = response.choices[0].message.content
|
418 |
|
|
|
419 |
message = gr.ChatMessage(role="assistant", content=content)
|
420 |
history.append(message)
|
421 |
return history
|
app/lang_model_router.py
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
|
2 |
-
from langdetect import detect, DetectorFactory
|
3 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
-
import os
|
5 |
-
DetectorFactory.seed = 0
|
6 |
-
LANGUAGE_MAP = {
|
7 |
-
"en": {"name": "English", "model": "openai-community/gpt2"},
|
8 |
-
"fr": {"name": "French", "model": "dbddv01/gpt2-french-small"},
|
9 |
-
"es": {"name": "Spanish", "model": "datificate/gpt2-small-spanish"},
|
10 |
-
"de": {"name": "German", "model": "deepset/gbert-base"},
|
11 |
-
"hi": {"name": "Hindi", "model": "ai4bharat/indic-bert"},
|
12 |
-
"mr": {"name": "Marathi", "model": "ai4bharat/indic-bert"},
|
13 |
-
"ja": {"name": "Japanese", "model": "rinna/japanese-gpt2-medium"},
|
14 |
-
"zh-cn": {"name": "Chinese", "model": "uer/gpt2-chinese-cluecorpusswwm"},
|
15 |
-
"ru": {"name": "Russian", "model": "sberbank-ai/rugpt3small_based_on_gpt2"},
|
16 |
-
"pt": {"name": "Portuguese", "model": "pierreguillou/gpt2-small-portuguese"},
|
17 |
-
"it": {"name": "Italian", "model": "dbddv01/gpt2-italian"},
|
18 |
-
"nl": {"name": "Dutch", "model": "GroNLP/gpt2-small-dutch"}
|
19 |
-
}
|
20 |
-
|
21 |
-
|
22 |
-
def detect_language_code(text: str) -> str:
|
23 |
-
try:
|
24 |
-
return detect(text)
|
25 |
-
except Exception:
|
26 |
-
return "en" # fallback
|
27 |
-
|
28 |
-
def get_language_name_and_model(lang_code: str) -> tuple[str, str]:
|
29 |
-
return LANGUAGE_MAP.get(lang_code, LANGUAGE_MAP["en"])
|
30 |
-
|
31 |
-
def get_model_by_name(language_name: str) -> str:
|
32 |
-
for code, (name, model) in LANGUAGE_MAP.items():
|
33 |
-
if name.lower() == language_name.lower():
|
34 |
-
return model
|
35 |
-
return LANGUAGE_MODEL_MAP["en"][1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|