File size: 3,399 Bytes
bae4131
7ccf9d4
6454c0e
 
7ccf9d4
 
 
6454c0e
5289522
7ccf9d4
 
 
bae4131
7ccf9d4
bae4131
 
7ccf9d4
 
 
 
 
bae4131
 
 
7ccf9d4
 
 
 
 
 
6454c0e
 
 
cf69537
 
7ccf9d4
6454c0e
 
cf69537
7ccf9d4
6454c0e
3d76e98
6454c0e
 
 
 
 
 
7ccf9d4
6454c0e
7ccf9d4
6454c0e
 
 
7ccf9d4
6454c0e
3d76e98
6454c0e
3d76e98
 
bae4131
7ccf9d4
 
 
6454c0e
 
3d76e98
6454c0e
 
7ccf9d4
 
 
6454c0e
bae4131
7ccf9d4
 
6454c0e
7ccf9d4
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import yaml
from loguru import logger
from yourbench_space.utils import CONFIG_PATH

def generate_base_config(hf_org, hf_prefix):
    """Creates the base config dictionary"""
    return {
        "hf_configuration": {
            "token": "$HF_TOKEN",
            "private": True,
            "hf_organization": hf_org,  
            "hf_dataset_name": hf_prefix
        },
        "local_dataset_dir": "results/",
        "model_list": [
            {
                "model_name": "meta-llama/Llama-3.3-70B-Instruct",
                "provider": "huggingface",
                "base_url": "https://jsq69lxgkhvpnliw.us-east-1.aws.endpoints.huggingface.cloud",
                "api_key": "$HF_TOKEN",
                "max_concurrent_requests": 16
            }
        ],
        "model_roles": {
            "ingestion": ["meta-llama/Llama-3.3-70B-Instruct"],
            "summarization": ["meta-llama/Llama-3.3-70B-Instruct"],
            "single_shot_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
            "multi_hop_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
            "answer_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
            "judge_answers": ["meta-llama/Llama-3.3-70B-Instruct"]
        },
        "pipeline": {
            "ingestion": {
                "source_documents_dir": "/app/uploaded_files",
                "output_dir": "/app/ingested",
                "run": True
            },
            "upload_ingest_to_hub": {
                "source_documents_dir": "/app/ingested",
                "run": True
            },
            "summarization": {"run": True},
            "chunking": {
                "chunking_configuration": {
                    "l_min_tokens": 64,
                    "l_max_tokens": 128,
                    "tau_threshold": 0.3,
                    "h_min": 2,
                    "h_max": 4
                },
                "run": True
            },
            "single_shot_question_generation": {
                "diversification_seed": "24 year old adult",
                "run": True
            },
            "multi_hop_question_generation": {"run": True},
            "answer_generation": {
                "question_type": "single_shot",
                "run": True,
                "strategies": [
                    {"name": "zeroshot", "prompt": "ZEROSHOT_QA_USER_PROMPT", "model_name": "meta-llama/Llama-3.3-70B-Instruct"},
                    {"name": "gold", "prompt": "GOLD_QA_USER_PROMPT", "model_name": "meta-llama/Llama-3.3-70B-Instruct"}
                ]
            },
            "judge_answers": {
                "run": True,
                "comparing_strategies": [["zeroshot", "gold"]],
                "chunk_column_index": 0,
                "random_seed": 42
            }
        }
    }

def save_yaml_file(config):
    """Saves the given config dictionary to a YAML file"""
    with open(CONFIG_PATH, "w") as file:
        yaml.dump(config, file, default_flow_style=False, sort_keys=False)
    return CONFIG_PATH

def generate_and_save_config(hf_org, hf_prefix):
    """Generates and saves the YAML configuration file"""
    logger.debug(f"Generating config with org: {hf_org}, prefix: {hf_prefix}")
    config = generate_base_config(hf_org, hf_prefix)
    file_path = save_yaml_file(config)
    logger.success(f"Config saved at: {file_path}")
    return file_path