Spaces:
No application file
No application file
Create load.py
Browse files
load.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import load_dataset
|
2 |
+
|
3 |
+
# Load a dataset from Hugging Face's dataset hub
|
4 |
+
dataset = load_dataset("glue", "mrpc")
|
5 |
+
|
6 |
+
# Print information about the dataset
|
7 |
+
print(dataset)
|
8 |
+
|
9 |
+
# Access a specific split (e.g., train)
|
10 |
+
train_dataset = dataset["train"]
|
11 |
+
|
12 |
+
# Print the first example
|
13 |
+
print(train_dataset[0])
|
14 |
+
|
15 |
+
# Load a dataset from a local file
|
16 |
+
local_dataset = load_dataset('csv', data_files='path/to/your/file.csv')
|
17 |
+
|
18 |
+
# Load a dataset from a custom function
|
19 |
+
def generate_examples():
|
20 |
+
for i in range(100):
|
21 |
+
yield i, {"text": f"This is example {i}", "label": i % 2}
|
22 |
+
|
23 |
+
custom_dataset = load_dataset("generator", gen_kwargs={"generator": generate_examples})
|
24 |
+
|
25 |
+
# Load a specific subset of a large dataset
|
26 |
+
subset_dataset = load_dataset("glue", "mrpc", split="train[:1000]")
|
27 |
+
|
28 |
+
# Load a dataset and apply a preprocessing function
|
29 |
+
def preprocess_function(examples):
|
30 |
+
return {"length": [len(text) for text in examples["sentence1"]]}
|
31 |
+
|
32 |
+
processed_dataset = dataset.map(preprocess_function, batched=True)
|