advanced

Sleeping

Alina Lozovskaya

Auth

6454c0e about 2 months ago

4.36 kB

	import yaml
	import gradio as gr

	from yourbench_space.utils import CONFIG_PATH


	def generate_config(hf_token: gr.OAuthToken \| None, hf_org, model_name, provider, base_url, api_key, max_concurrent_requests):
	config = {
	"hf_configuration": {
	"token": hf_token,
	"private": True,
	"hf_organization": hf_org
	},
	"model_list": [{
	"model_name": model_name,
	"provider": provider,
	"base_url": base_url,
	"api_key": api_key,
	"max_concurrent_requests": max_concurrent_requests
	}],
	"model_roles": {role: [model_name] for role in [
	"ingestion", "summarization", "single_shot_question_generation",
	"multi_hop_question_generation", "answer_generation", "judge_answers"
	]},
	"inference_config": {"max_concurrent_requests": 16},
	"pipeline": {
	"ingestion": {
	"source_documents_dir": "/app/uploaded_files",
	"output_dir": "/app/ingested",
	"run": True
	},
	"upload_ingest_to_hub": {
	"source_documents_dir": "/app/ingested",
	"hub_dataset_name": "test_ingested_documents",
	"local_dataset_path": "/app/ingested_dataset",
	"run": True
	},
	"summarization": {
	"source_dataset_name": "test_ingested_documents",
	"output_dataset_name": "test_summaries",
	"local_dataset_path": "/results/test_summaries",
	"concat_existing_dataset": False,
	"run": True
	},
	"chunking": {
	"source_dataset_name": "test_summaries",
	"output_dataset_name": "test_chunked_documents",
	"local_dataset_path": "/results/test_chunked_documents",
	"concat_existing_dataset": False,
	"chunking_configuration": {
	"l_min_tokens": 64,
	"l_max_tokens": 128,
	"tau_threshold": 0.3,
	"h_min": 2,
	"h_max": 4
	},
	"run": True
	},
	"single_shot_question_generation": {
	"source_dataset_name": "test_chunked_documents",
	"output_dataset_name": "test_single_shot_questions",
	"local_dataset_path": "/results/test_single_shot_questions",
	"diversification_seed": "24 year old adult",
	"concat_existing_dataset": False,
	"run": True
	},
	"multi_hop_question_generation": {
	"source_dataset_name": "test_chunked_documents",
	"output_dataset_name": "test_multi_hop_questions",
	"local_dataset_path": "/results/test_multi_hop_questions",
	"concat_existing_dataset": False,
	"run": True
	},
	"answer_generation": {
	"run": True,
	"question_dataset_name": "test_single_shot_questions",
	"output_dataset_name": "test_answered_questions",
	"local_dataset_path": "/results/test_answered_questions",
	"concat_existing_dataset": False,
	"strategies": [{
	"name": "zeroshot",
	"prompt": "ZEROSHOT_QA_USER_PROMPT",
	"model_name": model_name
	}, {
	"name": "gold",
	"prompt": "GOLD_QA_USER_PROMPT",
	"model_name": model_name
	}]
	},
	"judge_answers": {
	"run": True,
	"source_judge_dataset_name": "test_answered_questions",
	"output_judged_dataset_name": "test_judged_comparisons",
	"local_dataset_path": "/results/test_judged_comparisons",
	"concat_existing_dataset": False,
	"comparing_strategies": [["zeroshot", "gold"]],
	"chunk_column_index": 0,
	"random_seed": 42
	}
	}
	}
	return yaml.dump(config, default_flow_style=False)

	def save_config(yaml_text):
	with open(CONFIG_PATH, "w") as file:
	file.write(yaml_text)
	return "✅ Config saved!"