Quentin Gallouédec commited on
Commit
ce89e6e
·
1 Parent(s): e6fad20
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -68,12 +68,13 @@ def clean(text):
68
  return text
69
 
70
 
71
- def pdf2dataset(pathes, user_id, dataset_id, token, progress=gr.Progress()):
72
  if any([user_id, dataset_id, token]) and not all([user_id, dataset_id, token]):
73
  raise gr.Error("Please provide all three: User ID, Dataset ID, and API token.")
74
 
75
  if user_id == "":
76
  user_id = "pdf2dataset"
 
77
  if dataset_id == "":
78
  dataset_id = f"{random.getrandbits(128):x}"
79
  if token == "":
@@ -104,7 +105,7 @@ def pdf2dataset(pathes, user_id, dataset_id, token, progress=gr.Progress()):
104
  # Upload the dataset to Hugging Face
105
  progress(0, desc="Uploading to Hugging Face...")
106
  dataset = Dataset.from_dict({"text": page_texts, "source": page_filenames})
107
- dataset.push_to_hub(f"{user_id}/{dataset_id}", token=token)
108
  progress(1, desc="Done!")
109
 
110
  instructions = instructions_template.substitute(user_id=user_id, dataset_id=dataset_id)
@@ -164,7 +165,7 @@ with gr.Blocks() as demo:
164
  gr.Markdown("## 1️⃣ Upload PDFs")
165
  file = gr.File(file_types=["pdf"], file_count="multiple")
166
  gr.Markdown(caution_text)
167
- with gr.Accordion("🔒 Pushing to my personal Hugging Face account", open=False):
168
  gr.Markdown(
169
  """Recommended for API token
170
  - Go to https://huggingface.co/settings/tokens?new_token=true
@@ -176,6 +177,7 @@ with gr.Blocks() as demo:
176
  user_id = gr.Textbox(label="User ID", placeholder="Enter your Hugging Face user ID")
177
  dataset_id = gr.Textbox(label="Dataset ID", placeholder="Enter the desired dataset ID")
178
  token = gr.Textbox(label="API token", placeholder="Enter a Hugging Face API token")
 
179
 
180
  gr.Markdown("## 2️⃣ Convert the PDFs and upload")
181
  convert_button = gr.Button("🔄 Convert and upload")
@@ -189,7 +191,9 @@ with gr.Blocks() as demo:
189
  delete_button = gr.Button("🗑️ Delete dataset")
190
 
191
  # Define the actions
192
- convert_button.click(pdf2dataset, inputs=[file, user_id, dataset_id, token], outputs=[instructions, preview, dataset_id_to_delete])
 
 
193
  delete_button.click(delete_dataset, inputs=[dataset_id_to_delete], outputs=[delete_button])
194
  dataset_id_to_delete.input(lambda: "🗑️ Delete dataset", outputs=[delete_button])
195
 
 
68
  return text
69
 
70
 
71
+ def pdf2dataset(pathes, user_id, dataset_id, token, private, progress=gr.Progress()):
72
  if any([user_id, dataset_id, token]) and not all([user_id, dataset_id, token]):
73
  raise gr.Error("Please provide all three: User ID, Dataset ID, and API token.")
74
 
75
  if user_id == "":
76
  user_id = "pdf2dataset"
77
+ private = False
78
  if dataset_id == "":
79
  dataset_id = f"{random.getrandbits(128):x}"
80
  if token == "":
 
105
  # Upload the dataset to Hugging Face
106
  progress(0, desc="Uploading to Hugging Face...")
107
  dataset = Dataset.from_dict({"text": page_texts, "source": page_filenames})
108
+ dataset.push_to_hub(f"{user_id}/{dataset_id}", token=token, private=private)
109
  progress(1, desc="Done!")
110
 
111
  instructions = instructions_template.substitute(user_id=user_id, dataset_id=dataset_id)
 
165
  gr.Markdown("## 1️⃣ Upload PDFs")
166
  file = gr.File(file_types=["pdf"], file_count="multiple")
167
  gr.Markdown(caution_text)
168
+ with gr.Accordion("🔒 Pushing to my personal Hugging Face namespace", open=False):
169
  gr.Markdown(
170
  """Recommended for API token
171
  - Go to https://huggingface.co/settings/tokens?new_token=true
 
177
  user_id = gr.Textbox(label="User ID", placeholder="Enter your Hugging Face user ID")
178
  dataset_id = gr.Textbox(label="Dataset ID", placeholder="Enter the desired dataset ID")
179
  token = gr.Textbox(label="API token", placeholder="Enter a Hugging Face API token")
180
+ private = gr.Checkbox(label="Private", default=False)
181
 
182
  gr.Markdown("## 2️⃣ Convert the PDFs and upload")
183
  convert_button = gr.Button("🔄 Convert and upload")
 
191
  delete_button = gr.Button("🗑️ Delete dataset")
192
 
193
  # Define the actions
194
+ convert_button.click(
195
+ pdf2dataset, inputs=[file, user_id, dataset_id, token, private], outputs=[instructions, preview, dataset_id_to_delete]
196
+ )
197
  delete_button.click(delete_dataset, inputs=[dataset_id_to_delete], outputs=[delete_button])
198
  dataset_id_to_delete.input(lambda: "🗑️ Delete dataset", outputs=[delete_button])
199