from datasets import load_dataset # Load a dataset from Hugging Face's dataset hub dataset = load_dataset("glue", "mrpc") # Print information about the dataset print(dataset) # Access a specific split (e.g., train) train_dataset = dataset["train"] # Print the first example print(train_dataset[0]) # Load a dataset from a local file local_dataset = load_dataset('csv', data_files='path/to/your/file.csv') # Load a dataset from a custom function def generate_examples(): for i in range(100): yield i, {"text": f"This is example {i}", "label": i % 2} custom_dataset = load_dataset("generator", gen_kwargs={"generator": generate_examples}) # Load a specific subset of a large dataset subset_dataset = load_dataset("glue", "mrpc", split="train[:1000]") # Load a dataset and apply a preprocessing function def preprocess_function(examples): return {"length": [len(text) for text in examples["sentence1"]]} processed_dataset = dataset.map(preprocess_function, batched=True)