training-data / load.py
ishitatyagi14's picture
Create load.py
e9abf2c verified
raw
history blame contribute delete
993 Bytes
from datasets import load_dataset
# Load a dataset from Hugging Face's dataset hub
dataset = load_dataset("glue", "mrpc")
# Print information about the dataset
print(dataset)
# Access a specific split (e.g., train)
train_dataset = dataset["train"]
# Print the first example
print(train_dataset[0])
# Load a dataset from a local file
local_dataset = load_dataset('csv', data_files='path/to/your/file.csv')
# Load a dataset from a custom function
def generate_examples():
for i in range(100):
yield i, {"text": f"This is example {i}", "label": i % 2}
custom_dataset = load_dataset("generator", gen_kwargs={"generator": generate_examples})
# Load a specific subset of a large dataset
subset_dataset = load_dataset("glue", "mrpc", split="train[:1000]")
# Load a dataset and apply a preprocessing function
def preprocess_function(examples):
return {"length": [len(text) for text in examples["sentence1"]]}
processed_dataset = dataset.map(preprocess_function, batched=True)