Spaces:
Build error
Build error
Quentin Gallouédec
commited on
Commit
·
ce89e6e
1
Parent(s):
e6fad20
private
Browse files
app.py
CHANGED
@@ -68,12 +68,13 @@ def clean(text):
|
|
68 |
return text
|
69 |
|
70 |
|
71 |
-
def pdf2dataset(pathes, user_id, dataset_id, token, progress=gr.Progress()):
|
72 |
if any([user_id, dataset_id, token]) and not all([user_id, dataset_id, token]):
|
73 |
raise gr.Error("Please provide all three: User ID, Dataset ID, and API token.")
|
74 |
|
75 |
if user_id == "":
|
76 |
user_id = "pdf2dataset"
|
|
|
77 |
if dataset_id == "":
|
78 |
dataset_id = f"{random.getrandbits(128):x}"
|
79 |
if token == "":
|
@@ -104,7 +105,7 @@ def pdf2dataset(pathes, user_id, dataset_id, token, progress=gr.Progress()):
|
|
104 |
# Upload the dataset to Hugging Face
|
105 |
progress(0, desc="Uploading to Hugging Face...")
|
106 |
dataset = Dataset.from_dict({"text": page_texts, "source": page_filenames})
|
107 |
-
dataset.push_to_hub(f"{user_id}/{dataset_id}", token=token)
|
108 |
progress(1, desc="Done!")
|
109 |
|
110 |
instructions = instructions_template.substitute(user_id=user_id, dataset_id=dataset_id)
|
@@ -164,7 +165,7 @@ with gr.Blocks() as demo:
|
|
164 |
gr.Markdown("## 1️⃣ Upload PDFs")
|
165 |
file = gr.File(file_types=["pdf"], file_count="multiple")
|
166 |
gr.Markdown(caution_text)
|
167 |
-
with gr.Accordion("🔒 Pushing to my personal Hugging Face
|
168 |
gr.Markdown(
|
169 |
"""Recommended for API token
|
170 |
- Go to https://huggingface.co/settings/tokens?new_token=true
|
@@ -176,6 +177,7 @@ with gr.Blocks() as demo:
|
|
176 |
user_id = gr.Textbox(label="User ID", placeholder="Enter your Hugging Face user ID")
|
177 |
dataset_id = gr.Textbox(label="Dataset ID", placeholder="Enter the desired dataset ID")
|
178 |
token = gr.Textbox(label="API token", placeholder="Enter a Hugging Face API token")
|
|
|
179 |
|
180 |
gr.Markdown("## 2️⃣ Convert the PDFs and upload")
|
181 |
convert_button = gr.Button("🔄 Convert and upload")
|
@@ -189,7 +191,9 @@ with gr.Blocks() as demo:
|
|
189 |
delete_button = gr.Button("🗑️ Delete dataset")
|
190 |
|
191 |
# Define the actions
|
192 |
-
convert_button.click(
|
|
|
|
|
193 |
delete_button.click(delete_dataset, inputs=[dataset_id_to_delete], outputs=[delete_button])
|
194 |
dataset_id_to_delete.input(lambda: "🗑️ Delete dataset", outputs=[delete_button])
|
195 |
|
|
|
68 |
return text
|
69 |
|
70 |
|
71 |
+
def pdf2dataset(pathes, user_id, dataset_id, token, private, progress=gr.Progress()):
|
72 |
if any([user_id, dataset_id, token]) and not all([user_id, dataset_id, token]):
|
73 |
raise gr.Error("Please provide all three: User ID, Dataset ID, and API token.")
|
74 |
|
75 |
if user_id == "":
|
76 |
user_id = "pdf2dataset"
|
77 |
+
private = False
|
78 |
if dataset_id == "":
|
79 |
dataset_id = f"{random.getrandbits(128):x}"
|
80 |
if token == "":
|
|
|
105 |
# Upload the dataset to Hugging Face
|
106 |
progress(0, desc="Uploading to Hugging Face...")
|
107 |
dataset = Dataset.from_dict({"text": page_texts, "source": page_filenames})
|
108 |
+
dataset.push_to_hub(f"{user_id}/{dataset_id}", token=token, private=private)
|
109 |
progress(1, desc="Done!")
|
110 |
|
111 |
instructions = instructions_template.substitute(user_id=user_id, dataset_id=dataset_id)
|
|
|
165 |
gr.Markdown("## 1️⃣ Upload PDFs")
|
166 |
file = gr.File(file_types=["pdf"], file_count="multiple")
|
167 |
gr.Markdown(caution_text)
|
168 |
+
with gr.Accordion("🔒 Pushing to my personal Hugging Face namespace", open=False):
|
169 |
gr.Markdown(
|
170 |
"""Recommended for API token
|
171 |
- Go to https://huggingface.co/settings/tokens?new_token=true
|
|
|
177 |
user_id = gr.Textbox(label="User ID", placeholder="Enter your Hugging Face user ID")
|
178 |
dataset_id = gr.Textbox(label="Dataset ID", placeholder="Enter the desired dataset ID")
|
179 |
token = gr.Textbox(label="API token", placeholder="Enter a Hugging Face API token")
|
180 |
+
private = gr.Checkbox(label="Private", default=False)
|
181 |
|
182 |
gr.Markdown("## 2️⃣ Convert the PDFs and upload")
|
183 |
convert_button = gr.Button("🔄 Convert and upload")
|
|
|
191 |
delete_button = gr.Button("🗑️ Delete dataset")
|
192 |
|
193 |
# Define the actions
|
194 |
+
convert_button.click(
|
195 |
+
pdf2dataset, inputs=[file, user_id, dataset_id, token, private], outputs=[instructions, preview, dataset_id_to_delete]
|
196 |
+
)
|
197 |
delete_button.click(delete_dataset, inputs=[dataset_id_to_delete], outputs=[delete_button])
|
198 |
dataset_id_to_delete.input(lambda: "🗑️ Delete dataset", outputs=[delete_button])
|
199 |
|