Spaces:

ishitatyagi14
/

training-data

No application file

File size: 993 Bytes

e9abf2c

from datasets import load_dataset

# Load a dataset from Hugging Face's dataset hub
dataset = load_dataset("glue", "mrpc")

# Print information about the dataset
print(dataset)

# Access a specific split (e.g., train)
train_dataset = dataset["train"]

# Print the first example
print(train_dataset[0])

# Load a dataset from a local file
local_dataset = load_dataset('csv', data_files='path/to/your/file.csv')

# Load a dataset from a custom function
def generate_examples():
    for i in range(100):
        yield i, {"text": f"This is example {i}", "label": i % 2}

custom_dataset = load_dataset("generator", gen_kwargs={"generator": generate_examples})

# Load a specific subset of a large dataset
subset_dataset = load_dataset("glue", "mrpc", split="train[:1000]")

# Load a dataset and apply a preprocessing function
def preprocess_function(examples):
    return {"length": [len(text) for text in examples["sentence1"]]}

processed_dataset = dataset.map(preprocess_function, batched=True)