File size: 1,592 Bytes
b34efa5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
"""
Configuration for Hugging Face API integration.
Contains model IDs, API endpoints, and other configuration parameters.
"""
# Norwegian LLM options
LLM_MODELS = {
"normistral": {
"model_id": "norallm/normistral-7b-scratch",
"description": "NorMistral 7B - Norwegian language model based on Mistral architecture"
},
"viking": {
"model_id": "silo-ai/viking-7b",
"description": "Viking 7B - Multilingual model for Nordic languages"
},
"norskgpt": {
"model_id": "NbAiLab/NorskGPT",
"description": "NorskGPT - Norwegian language model"
}
}
# Default LLM model
DEFAULT_LLM_MODEL = "normistral"
# Norwegian embedding models
EMBEDDING_MODELS = {
"nb-sbert": {
"model_id": "NbAiLab/nb-sbert-base",
"description": "NB-SBERT-BASE - Norwegian sentence embedding model"
},
"simcse": {
"model_id": "FFI/SimCSE-NB-BERT-large",
"description": "SimCSE-NB-BERT-large - Norwegian sentence embedding model"
}
}
# Default embedding model
DEFAULT_EMBEDDING_MODEL = "nb-sbert"
# Hugging Face API endpoints
HF_API_ENDPOINTS = {
"inference": "https://api-inference.huggingface.co/models/",
"feature-extraction": "https://api-inference.huggingface.co/pipeline/feature-extraction/"
}
# API request parameters
API_PARAMS = {
"max_length": 512,
"temperature": 0.7,
"top_p": 0.9,
"top_k": 50,
"repetition_penalty": 1.1
}
# Document processing parameters
CHUNK_SIZE = 512
CHUNK_OVERLAP = 100
# RAG parameters
MAX_CHUNKS_TO_RETRIEVE = 5
SIMILARITY_THRESHOLD = 0.75
|