|
""" |
|
Configuration for Hugging Face API integration. |
|
Contains model IDs, API endpoints, and other configuration parameters. |
|
""" |
|
|
|
|
|
LLM_MODELS = { |
|
"normistral": { |
|
"model_id": "norallm/normistral-7b-scratch", |
|
"description": "NorMistral 7B - Norwegian language model based on Mistral architecture" |
|
}, |
|
"viking": { |
|
"model_id": "silo-ai/viking-7b", |
|
"description": "Viking 7B - Multilingual model for Nordic languages" |
|
}, |
|
"norskgpt": { |
|
"model_id": "NbAiLab/NorskGPT", |
|
"description": "NorskGPT - Norwegian language model" |
|
} |
|
} |
|
|
|
|
|
DEFAULT_LLM_MODEL = "normistral" |
|
|
|
|
|
EMBEDDING_MODELS = { |
|
"nb-sbert": { |
|
"model_id": "NbAiLab/nb-sbert-base", |
|
"description": "NB-SBERT-BASE - Norwegian sentence embedding model" |
|
}, |
|
"simcse": { |
|
"model_id": "FFI/SimCSE-NB-BERT-large", |
|
"description": "SimCSE-NB-BERT-large - Norwegian sentence embedding model" |
|
} |
|
} |
|
|
|
|
|
DEFAULT_EMBEDDING_MODEL = "nb-sbert" |
|
|
|
|
|
HF_API_ENDPOINTS = { |
|
"inference": "https://api-inference.huggingface.co/models/", |
|
"feature-extraction": "https://api-inference.huggingface.co/pipeline/feature-extraction/" |
|
} |
|
|
|
|
|
API_PARAMS = { |
|
"max_length": 512, |
|
"temperature": 0.7, |
|
"top_p": 0.9, |
|
"top_k": 50, |
|
"repetition_penalty": 1.1 |
|
} |
|
|
|
|
|
CHUNK_SIZE = 512 |
|
CHUNK_OVERLAP = 100 |
|
|
|
|
|
MAX_CHUNKS_TO_RETRIEVE = 5 |
|
SIMILARITY_THRESHOLD = 0.75 |
|
|