Spaces:
Sleeping
Sleeping
File size: 3,477 Bytes
bae4131 7ccf9d4 6454c0e 3adea5e 089a447 133c6d8 7ccf9d4 6454c0e 5289522 089a447 ea047ad 133c6d8 ea047ad bae4131 ea047ad 25580aa 089a447 25580aa 089a447 ea047ad bae4131 ea047ad 089a447 ea047ad 6454c0e 3adea5e 089a447 6454c0e 3adea5e ea047ad 089a447 6454c0e ea047ad 6454c0e ea047ad 6454c0e ea047ad 6454c0e 089a447 ea047ad 6454c0e ea047ad 3d76e98 ea047ad 6454c0e ea047ad 089a447 6454c0e bae4131 089a447 133c6d8 7ccf9d4 133c6d8 7ccf9d4 133c6d8 7ccf9d4 089a447 133c6d8 7ccf9d4 133c6d8 7ccf9d4 089a447 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import yaml
from loguru import logger
from yourbench_space import PATH
def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
"""Creates the base config dictionary"""
return {
"hf_configuration": {
"token": "$HF_TOKEN",
"hf_organization": hf_org,
"private": True,
"hf_dataset_name": hf_dataset_name,
"concat_if_exist": False,
},
"model_list": [
{
"model_name": "Qwen/Qwen2.5-VL-72B-Instruct",
"provider": "novita",
"max_concurrent_requests": 32,
},
{
"model_name": "Qwen/Qwen2.5-72B-Instruct",
"provider": "novita",
"max_concurrent_requests": 32,
},
],
"model_roles": {
"ingestion": ["Qwen/Qwen2.5-VL-72B-Instruct"],
"summarization": ["Qwen/Qwen2.5-72B-Instruct"],
"chunking": ["intfloat/multilingual-e5-large-instruct"],
"single_shot_question_generation": ["Qwen/Qwen2.5-72B-Instruct"],
"multi_hop_question_generation": ["Qwen/Qwen2.5-72B-Instruct"],
},
"pipeline": {
"ingestion": {
"source_documents_dir": f"{PATH}/{session_uid}/uploaded_files/",
"output_dir": f"{PATH}/{session_uid}/ingested",
"run": True,
},
"upload_ingest_to_hub": {
"source_documents_dir": f"{PATH}/{session_uid}/ingested",
"run": True,
},
"summarization": {
"run": True,
},
"chunking": {
"run": True,
"chunking_configuration": {
"l_min_tokens": 64,
"l_max_tokens": 128,
"tau_threshold": 0.8,
"h_min": 2,
"h_max": 5,
"num_multihops_factor": 2,
},
},
"single_shot_question_generation": {
"run": True,
"additional_instructions": "Generate questions to test a curious adult",
"chunk_sampling": {
"mode": "count",
"value": 5,
"random_seed": 123,
},
},
"multi_hop_question_generation": {
"run": True,
"additional_instructions": "Generate questions to test a curious adult",
"chunk_sampling": {
"mode": "percentage",
"value": 0.3,
"random_seed": 42,
},
},
"lighteval": {
"run": True,
},
},
}
def save_yaml_file(config: str, path: str):
"""Saves the given config dictionary to a YAML file"""
with open(path, "w") as file:
yaml.dump(config, file, default_flow_style=False, sort_keys=False)
return path
def generate_and_save_config(hf_org: str, hf_name: str, session_uid: str, config_path: str):
"""Generates and saves the YAML configuration file"""
logger.debug(f"Generating config with org: {hf_org}, dataset name: {hf_name}")
config = generate_base_config(hf_org, hf_name, session_uid)
file_path = save_yaml_file(config, config_path)
logger.success(f"Config saved at: {file_path}")
return file_path
|