Clémentine
added a singleton-like class to manage all managers per session, plus session state management. Also fixes secret passing to the new leaderboard space
133c6d8
raw
history blame
3.76 kB
import yaml
from loguru import logger
from yourbench_space.utils import CONFIG_PATH
def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
"""Creates the base config dictionary"""
return {
"hf_configuration": {
"token": "$HF_TOKEN",
"private": True,
"hf_organization": hf_org,
"hf_dataset_name": hf_dataset_name,
},
"model_list": [
{
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
"provider": "novita",
"max_concurrent_requests": 32,
},
{
"model_name": "Qwen/Qwen2.5-72B-Instruct",
"provider": "novita",
"max_concurrent_requests": 32,
}
],
"model_roles": {
"ingestion": ["meta-llama/Llama-3.3-70B-Instruct"],
"summarization": ["Qwen/Qwen2.5-72B-Instruct"],
"single_shot_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
"multi_hop_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
"answer_generation": ["Qwen/Qwen2.5-72B-Instruct"],
"judge_answers": ["meta-llama/Llama-3.3-70B-Instruct"],
},
"pipeline": {
"ingestion": {
"source_documents_dir": f"/app/{session_uid}/uploaded_files/",
"output_dir": f"/app/{session_uid}/ingested",
"run": True,
},
"upload_ingest_to_hub": {
"source_documents_dir": f"/app/{session_uid}/ingested",
"run": True,
},
"summarization": {"run": True},
"chunking": {
"chunking_configuration": {
"l_min_tokens": 64,
"l_max_tokens": 128,
"tau_threshold": 0.3,
"h_min": 2,
"h_max": 4,
},
"run": True,
},
"single_shot_question_generation": {
"diversification_seed": "24 year old adult",
"run": True,
},
"multi_hop_question_generation": {"run": False},
"answer_generation": {
"question_type": "single_shot",
"run": True,
"strategies": [
{
"name": "zeroshot",
"prompt": "ZEROSHOT_QA_USER_PROMPT",
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
},
{
"name": "gold",
"prompt": "GOLD_QA_USER_PROMPT",
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
},
],
},
"judge_answers": {
"run": False, # to change when fixed
"comparing_strategies": [["zeroshot", "gold"]],
"chunk_column_index": 0,
"random_seed": 42,
},
},
}
def save_yaml_file(config: str, path: str):
"""Saves the given config dictionary to a YAML file"""
with open(path, "w") as file:
yaml.dump(config, file, default_flow_style=False, sort_keys=False)
return path
def generate_and_save_config(hf_org: str, hf_name: str, session_uid: str, config_path: str):
"""Generates and saves the YAML configuration file"""
logger.debug(f"Generating config with org: {hf_org}, dataset name: {hf_name}")
config = generate_base_config(hf_org, hf_name, session_uid)
file_path = save_yaml_file(config, config_path)
logger.success(f"Config saved at: {file_path}")
return file_path