Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,717 Bytes
bae4131 7ccf9d4 6454c0e 089a447 133c6d8 7ccf9d4 6454c0e 5289522 7ccf9d4 089a447 133c6d8 bae4131 7ccf9d4 25580aa 089a447 25580aa 089a447 bae4131 7ccf9d4 089a447 7ccf9d4 089a447 6454c0e 133c6d8 089a447 6454c0e 133c6d8 089a447 6454c0e 3d76e98 6454c0e 089a447 6454c0e 089a447 6454c0e 089a447 6454c0e 73b392f 6454c0e 3d76e98 bae4131 089a447 6454c0e 25580aa 6454c0e 089a447 6454c0e bae4131 089a447 133c6d8 7ccf9d4 133c6d8 7ccf9d4 133c6d8 7ccf9d4 089a447 133c6d8 7ccf9d4 133c6d8 7ccf9d4 089a447 570d85c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import yaml
from loguru import logger
def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
"""Creates the base config dictionary"""
return {
"hf_configuration": {
"token": "$HF_TOKEN",
"private": True,
"hf_organization": hf_org,
"hf_dataset_name": hf_dataset_name,
},
"model_list": [
{
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
"provider": "novita",
"max_concurrent_requests": 32,
},
{
"model_name": "Qwen/Qwen2.5-72B-Instruct",
"provider": "novita",
"max_concurrent_requests": 32,
}
],
"model_roles": {
"ingestion": ["meta-llama/Llama-3.3-70B-Instruct"],
"summarization": ["Qwen/Qwen2.5-72B-Instruct"],
"single_shot_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
"multi_hop_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
"answer_generation": ["Qwen/Qwen2.5-72B-Instruct"],
"judge_answers": ["meta-llama/Llama-3.3-70B-Instruct"],
},
"pipeline": {
"ingestion": {
"source_documents_dir": f"/app/{session_uid}/uploaded_files/",
"output_dir": f"/app/{session_uid}/ingested",
"run": True,
},
"upload_ingest_to_hub": {
"source_documents_dir": f"/app/{session_uid}/ingested",
"run": True,
},
"summarization": {"run": True},
"chunking": {
"chunking_configuration": {
"l_min_tokens": 64,
"l_max_tokens": 128,
"tau_threshold": 0.3,
"h_min": 2,
"h_max": 4,
},
"run": True,
},
"single_shot_question_generation": {
"diversification_seed": "24 year old adult",
"run": True,
},
"multi_hop_question_generation": {"run": False},
"answer_generation": {
"question_type": "single_shot",
"run": True,
"strategies": [
{
"name": "zeroshot",
"prompt": "ZEROSHOT_QA_USER_PROMPT",
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
},
{
"name": "gold",
"prompt": "GOLD_QA_USER_PROMPT",
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
},
],
},
"judge_answers": {
"run": False, # to change when fixed
"comparing_strategies": [["zeroshot", "gold"]],
"chunk_column_index": 0,
"random_seed": 42,
},
},
}
def save_yaml_file(config: str, path: str):
"""Saves the given config dictionary to a YAML file"""
with open(path, "w") as file:
yaml.dump(config, file, default_flow_style=False, sort_keys=False)
return path
def generate_and_save_config(hf_org: str, hf_name: str, session_uid: str, config_path: str):
"""Generates and saves the YAML configuration file"""
logger.debug(f"Generating config with org: {hf_org}, dataset name: {hf_name}")
config = generate_base_config(hf_org, hf_name, session_uid)
file_path = save_yaml_file(config, config_path)
logger.success(f"Config saved at: {file_path}")
return file_path
|