ishitatyagi14 commited on
Commit
e9abf2c
·
verified ·
1 Parent(s): 9c00876

Create load.py

Browse files
Files changed (1) hide show
  1. load.py +32 -0
load.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+
3
+ # Load a dataset from Hugging Face's dataset hub
4
+ dataset = load_dataset("glue", "mrpc")
5
+
6
+ # Print information about the dataset
7
+ print(dataset)
8
+
9
+ # Access a specific split (e.g., train)
10
+ train_dataset = dataset["train"]
11
+
12
+ # Print the first example
13
+ print(train_dataset[0])
14
+
15
+ # Load a dataset from a local file
16
+ local_dataset = load_dataset('csv', data_files='path/to/your/file.csv')
17
+
18
+ # Load a dataset from a custom function
19
+ def generate_examples():
20
+ for i in range(100):
21
+ yield i, {"text": f"This is example {i}", "label": i % 2}
22
+
23
+ custom_dataset = load_dataset("generator", gen_kwargs={"generator": generate_examples})
24
+
25
+ # Load a specific subset of a large dataset
26
+ subset_dataset = load_dataset("glue", "mrpc", split="train[:1000]")
27
+
28
+ # Load a dataset and apply a preprocessing function
29
+ def preprocess_function(examples):
30
+ return {"length": [len(text) for text in examples["sentence1"]]}
31
+
32
+ processed_dataset = dataset.map(preprocess_function, batched=True)