Spaces:

ishitatyagi14
/

training-data

No application file

training-data / load.py

Create load.py

e9abf2c verified 8 months ago

993 Bytes

	from datasets import load_dataset

	# Load a dataset from Hugging Face's dataset hub
	dataset = load_dataset("glue", "mrpc")

	# Print information about the dataset
	print(dataset)

	# Access a specific split (e.g., train)
	train_dataset = dataset["train"]

	# Print the first example
	print(train_dataset[0])

	# Load a dataset from a local file
	local_dataset = load_dataset('csv', data_files='path/to/your/file.csv')

	# Load a dataset from a custom function
	def generate_examples():
	for i in range(100):
	yield i, {"text": f"This is example {i}", "label": i % 2}

	custom_dataset = load_dataset("generator", gen_kwargs={"generator": generate_examples})

	# Load a specific subset of a large dataset
	subset_dataset = load_dataset("glue", "mrpc", split="train[:1000]")

	# Load a dataset and apply a preprocessing function
	def preprocess_function(examples):
	return {"length": [len(text) for text in examples["sentence1"]]}

	processed_dataset = dataset.map(preprocess_function, batched=True)