File size: 993 Bytes
e9abf2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from datasets import load_dataset

# Load a dataset from Hugging Face's dataset hub
dataset = load_dataset("glue", "mrpc")

# Print information about the dataset
print(dataset)

# Access a specific split (e.g., train)
train_dataset = dataset["train"]

# Print the first example
print(train_dataset[0])

# Load a dataset from a local file
local_dataset = load_dataset('csv', data_files='path/to/your/file.csv')

# Load a dataset from a custom function
def generate_examples():
    for i in range(100):
        yield i, {"text": f"This is example {i}", "label": i % 2}

custom_dataset = load_dataset("generator", gen_kwargs={"generator": generate_examples})

# Load a specific subset of a large dataset
subset_dataset = load_dataset("glue", "mrpc", split="train[:1000]")

# Load a dataset and apply a preprocessing function
def preprocess_function(examples):
    return {"length": [len(text) for text in examples["sentence1"]]}

processed_dataset = dataset.map(preprocess_function, batched=True)