Spaces:
Sleeping
Sleeping
import yaml | |
from yourbench_space.utils import CONFIG_PATH | |
def generate_base_config( | |
hf_org, | |
model_name, | |
provider, | |
base_url, | |
model_api_key, | |
max_concurrent_requests, | |
hf_dataset_prefix, | |
private_dataset, | |
ingestion_model, | |
summarization_model, | |
single_shot_question_generation_model, | |
multi_hop_question_generation_model, | |
answer_generation_model, | |
judge_answers_model, | |
): | |
config = { | |
"hf_configuration": { | |
"token": "$HF_TOKEN", | |
"private": private_dataset, | |
"hf_organization": hf_org, | |
}, | |
"model_list": [ | |
{ | |
"model_name": model_name, | |
"provider": provider, | |
"base_url": base_url, | |
"api_key": "$MODEL_API_KEY", | |
"max_concurrent_requests": max_concurrent_requests, | |
} | |
], | |
"model_roles": { | |
role: [model_name] | |
for role in [ | |
"ingestion", | |
"summarization", | |
"single_shot_question_generation", | |
"multi_hop_question_generation", | |
"answer_generation", | |
"judge_answers", | |
] | |
}, | |
"inference_config": {"max_concurrent_requests": 16}, | |
"pipeline": { | |
"ingestion": { | |
"source_documents_dir": "/app/uploaded_files", | |
"output_dir": "/app/ingested", | |
"run": True, | |
}, | |
"upload_ingest_to_hub": { | |
"source_documents_dir": "/app/ingested", | |
"hub_dataset_name": f"{hf_dataset_prefix}_ingested_documents", | |
"run": True, | |
}, | |
"summarization": { | |
"source_dataset_name": f"{hf_dataset_prefix}_ingested_documents", | |
"output_dataset_name": f"{hf_dataset_prefix}_summaries", | |
"concat_existing_dataset": False, | |
"run": True, | |
}, | |
"chunking": { | |
"source_dataset_name": f"{hf_dataset_prefix}_summaries", | |
"output_dataset_name": f"{hf_dataset_prefix}_chunked_documents", | |
"concat_existing_dataset": False, | |
"chunking_configuration": { | |
"l_min_tokens": 64, | |
"l_max_tokens": 128, | |
"tau_threshold": 0.3, | |
"h_min": 2, | |
"h_max": 4, | |
}, | |
"run": True, | |
}, | |
"single_shot_question_generation": { | |
"source_dataset_name": f"{hf_dataset_prefix}_chunked_documents", | |
"output_dataset_name": f"{hf_dataset_prefix}_single_shot_questions", | |
"diversification_seed": "24 year old adult", | |
"concat_existing_dataset": False, | |
"run": True, | |
}, | |
"multi_hop_question_generation": { | |
"source_dataset_name": f"{hf_dataset_prefix}_chunked_documents", | |
"output_dataset_name": f"{hf_dataset_prefix}_multi_hop_questions", | |
"concat_existing_dataset": False, | |
"run": True, | |
}, | |
"answer_generation": { | |
"question_dataset_name": f"{hf_dataset_prefix}_single_shot_questions", | |
"output_dataset_name": f"{hf_dataset_prefix}_answered_questions", | |
"concat_existing_dataset": False, | |
"strategies": [ | |
{ | |
"name": "zeroshot", | |
"prompt": "ZEROSHOT_QA_USER_PROMPT", | |
"model_name": model_name, | |
}, | |
{ | |
"name": "gold", | |
"prompt": "GOLD_QA_USER_PROMPT", | |
"model_name": model_name, | |
}, | |
], | |
"run": True, | |
}, | |
"judge_answers": { | |
"source_judge_dataset_name": f"{hf_dataset_prefix}_answered_questions", | |
"output_judged_dataset_name": f"{hf_dataset_prefix}_judged_comparisons", | |
"concat_existing_dataset": False, | |
"comparing_strategies": [["zeroshot", "gold"]], | |
"chunk_column_index": 0, | |
"random_seed": 42, | |
"run": True, | |
}, | |
}, | |
} | |
return yaml.dump(config, sort_keys=False) | |
def save_config(yaml_text): | |
with open(CONFIG_PATH, "w") as file: | |
file.write(yaml_text) | |
return "✅ Config saved!" | |