Alina Lozovskaya
UI improvement
bae4131
raw
history blame
4.56 kB
import yaml
from yourbench_space.utils import CONFIG_PATH
def generate_base_config(
hf_org,
model_name,
provider,
base_url,
model_api_key,
max_concurrent_requests,
hf_dataset_prefix,
private_dataset,
ingestion_model,
summarization_model,
single_shot_question_generation_model,
multi_hop_question_generation_model,
answer_generation_model,
judge_answers_model,
):
config = {
"hf_configuration": {
"token": "$HF_TOKEN",
"private": private_dataset,
"hf_organization": hf_org,
},
"model_list": [
{
"model_name": model_name,
"provider": provider,
"base_url": base_url,
"api_key": "$MODEL_API_KEY",
"max_concurrent_requests": max_concurrent_requests,
}
],
"model_roles": {
role: [model_name]
for role in [
"ingestion",
"summarization",
"single_shot_question_generation",
"multi_hop_question_generation",
"answer_generation",
"judge_answers",
]
},
"inference_config": {"max_concurrent_requests": 16},
"pipeline": {
"ingestion": {
"source_documents_dir": "/app/uploaded_files",
"output_dir": "/app/ingested",
"run": True,
},
"upload_ingest_to_hub": {
"source_documents_dir": "/app/ingested",
"hub_dataset_name": f"{hf_dataset_prefix}_ingested_documents",
"run": True,
},
"summarization": {
"source_dataset_name": f"{hf_dataset_prefix}_ingested_documents",
"output_dataset_name": f"{hf_dataset_prefix}_summaries",
"concat_existing_dataset": False,
"run": True,
},
"chunking": {
"source_dataset_name": f"{hf_dataset_prefix}_summaries",
"output_dataset_name": f"{hf_dataset_prefix}_chunked_documents",
"concat_existing_dataset": False,
"chunking_configuration": {
"l_min_tokens": 64,
"l_max_tokens": 128,
"tau_threshold": 0.3,
"h_min": 2,
"h_max": 4,
},
"run": True,
},
"single_shot_question_generation": {
"source_dataset_name": f"{hf_dataset_prefix}_chunked_documents",
"output_dataset_name": f"{hf_dataset_prefix}_single_shot_questions",
"diversification_seed": "24 year old adult",
"concat_existing_dataset": False,
"run": True,
},
"multi_hop_question_generation": {
"source_dataset_name": f"{hf_dataset_prefix}_chunked_documents",
"output_dataset_name": f"{hf_dataset_prefix}_multi_hop_questions",
"concat_existing_dataset": False,
"run": True,
},
"answer_generation": {
"question_dataset_name": f"{hf_dataset_prefix}_single_shot_questions",
"output_dataset_name": f"{hf_dataset_prefix}_answered_questions",
"concat_existing_dataset": False,
"strategies": [
{
"name": "zeroshot",
"prompt": "ZEROSHOT_QA_USER_PROMPT",
"model_name": model_name,
},
{
"name": "gold",
"prompt": "GOLD_QA_USER_PROMPT",
"model_name": model_name,
},
],
"run": True,
},
"judge_answers": {
"source_judge_dataset_name": f"{hf_dataset_prefix}_answered_questions",
"output_judged_dataset_name": f"{hf_dataset_prefix}_judged_comparisons",
"concat_existing_dataset": False,
"comparing_strategies": [["zeroshot", "gold"]],
"chunk_column_index": 0,
"random_seed": 42,
"run": True,
},
},
}
return yaml.dump(config, sort_keys=False)
def save_config(yaml_text):
with open(CONFIG_PATH, "w") as file:
file.write(yaml_text)
return "✅ Config saved!"