File size: 789 Bytes
8e80adf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# puzzle_dataset.py
import pandas as pd
import os
from dotenv import load_dotenv
from huggingface_hub import login
from deep_convert import deep_convert

df = None

def init_dataset():
    global df
    if df is not None:
        return

    load_dotenv()
    hf_token = os.getenv("HF_TOKEN")
    login(token=hf_token)

    # 加载 parquet 数据
    df = pd.read_parquet("hf://datasets/WildEval/ZebraLogic/grid_mode/test-00000-of-00001.parquet")

def get_puzzle_by_index(idx: int):
    """
    返回 (puzzle_text, expected_solution) 二元组
    """
    global df
    if df is None:
        init_dataset()

    # 简单判断 index 是否越界
    if idx < 0 or idx >= len(df):
        return None, None

    row = df.iloc[idx]
    return row['puzzle'], deep_convert(row['solution'])