advanced

Sleeping

Alina Lozovskaya

UI improvement

bae4131 about 2 months ago

4.56 kB


	import yaml
	from yourbench_space.utils import CONFIG_PATH



	def generate_base_config(
	hf_org,
	model_name,
	provider,
	base_url,
	model_api_key,
	max_concurrent_requests,
	hf_dataset_prefix,
	private_dataset,
	ingestion_model,
	summarization_model,
	single_shot_question_generation_model,
	multi_hop_question_generation_model,
	answer_generation_model,
	judge_answers_model,
	):
	config = {
	"hf_configuration": {
	"token": "$HF_TOKEN",
	"private": private_dataset,
	"hf_organization": hf_org,
	},
	"model_list": [
	{
	"model_name": model_name,
	"provider": provider,
	"base_url": base_url,
	"api_key": "$MODEL_API_KEY",
	"max_concurrent_requests": max_concurrent_requests,
	}
	],
	"model_roles": {
	role: [model_name]
	for role in [
	"ingestion",
	"summarization",
	"single_shot_question_generation",
	"multi_hop_question_generation",
	"answer_generation",
	"judge_answers",
	]
	},
	"inference_config": {"max_concurrent_requests": 16},
	"pipeline": {
	"ingestion": {
	"source_documents_dir": "/app/uploaded_files",
	"output_dir": "/app/ingested",
	"run": True,
	},
	"upload_ingest_to_hub": {
	"source_documents_dir": "/app/ingested",
	"hub_dataset_name": f"{hf_dataset_prefix}_ingested_documents",
	"run": True,
	},
	"summarization": {
	"source_dataset_name": f"{hf_dataset_prefix}_ingested_documents",
	"output_dataset_name": f"{hf_dataset_prefix}_summaries",
	"concat_existing_dataset": False,
	"run": True,
	},
	"chunking": {
	"source_dataset_name": f"{hf_dataset_prefix}_summaries",
	"output_dataset_name": f"{hf_dataset_prefix}_chunked_documents",
	"concat_existing_dataset": False,
	"chunking_configuration": {
	"l_min_tokens": 64,
	"l_max_tokens": 128,
	"tau_threshold": 0.3,
	"h_min": 2,
	"h_max": 4,
	},
	"run": True,
	},
	"single_shot_question_generation": {
	"source_dataset_name": f"{hf_dataset_prefix}_chunked_documents",
	"output_dataset_name": f"{hf_dataset_prefix}_single_shot_questions",
	"diversification_seed": "24 year old adult",
	"concat_existing_dataset": False,
	"run": True,
	},
	"multi_hop_question_generation": {
	"source_dataset_name": f"{hf_dataset_prefix}_chunked_documents",
	"output_dataset_name": f"{hf_dataset_prefix}_multi_hop_questions",
	"concat_existing_dataset": False,
	"run": True,
	},
	"answer_generation": {
	"question_dataset_name": f"{hf_dataset_prefix}_single_shot_questions",
	"output_dataset_name": f"{hf_dataset_prefix}_answered_questions",
	"concat_existing_dataset": False,
	"strategies": [
	{
	"name": "zeroshot",
	"prompt": "ZEROSHOT_QA_USER_PROMPT",
	"model_name": model_name,
	},
	{
	"name": "gold",
	"prompt": "GOLD_QA_USER_PROMPT",
	"model_name": model_name,
	},
	],
	"run": True,
	},
	"judge_answers": {
	"source_judge_dataset_name": f"{hf_dataset_prefix}_answered_questions",
	"output_judged_dataset_name": f"{hf_dataset_prefix}_judged_comparisons",
	"concat_existing_dataset": False,
	"comparing_strategies": [["zeroshot", "gold"]],
	"chunk_column_index": 0,
	"random_seed": 42,
	"run": True,
	},
	},
	}
	return yaml.dump(config, sort_keys=False)


	def save_config(yaml_text):
	with open(CONFIG_PATH, "w") as file:
	file.write(yaml_text)
	return "✅ Config saved!"