copperwiring commited on
Commit
654128b
·
1 Parent(s): 96e76ac

Add Hugging Face Dataset integration for storing submissions

Browse files
Files changed (1) hide show
  1. app.py +54 -1
app.py CHANGED
@@ -4,11 +4,36 @@ import json
4
  import uuid
5
  from datetime import datetime
6
  import shutil
 
7
 
8
  # Create directories for data storage
9
  os.makedirs("uploaded_images", exist_ok=True)
10
  os.makedirs("submissions", exist_ok=True)
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def process_submission(input_img, text_answer, multiple_choice, city, country, se_asia_relevance, culture_knowledge, native_caption, english_caption):
13
  # Generate unique ID for this submission
14
  submission_id = str(uuid.uuid4())
@@ -31,7 +56,7 @@ def process_submission(input_img, text_answer, multiple_choice, city, country, s
31
  submission_data = {
32
  "id": submission_id,
33
  "timestamp": timestamp,
34
- "image_filename": image_path,
35
  "cultural_relevance": text_answer,
36
  "continent": multiple_choice,
37
  "city": city,
@@ -48,9 +73,37 @@ def process_submission(input_img, text_answer, multiple_choice, city, country, s
48
  with open(json_path, "w") as f:
49
  json.dump(submission_data, f, indent=2)
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  # Return values to display in the interface
52
  return input_img, f"Your text response: {text_answer}", f"Your selected option: {multiple_choice}", f"Location: {city}, {country}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
53
 
 
 
 
54
  gradio_app = gr.Interface(
55
  process_submission,
56
  inputs=[
 
4
  import uuid
5
  from datetime import datetime
6
  import shutil
7
+ from huggingface_hub import HfApi, create_repo, upload_file, upload_folder
8
 
9
  # Create directories for data storage
10
  os.makedirs("uploaded_images", exist_ok=True)
11
  os.makedirs("submissions", exist_ok=True)
12
 
13
+ # Hugging Face Dataset configuration
14
+ HF_TOKEN = os.environ.get("HF_TOKEN") # You'll need to set this as a secret in your Space
15
+ DATASET_NAME = "srishtiy/se-culture-dataset-results" # Change to your username/dataset-name
16
+ DATASET_CREATED = False
17
+
18
+ def setup_hf_dataset():
19
+ """Initialize the Hugging Face dataset if it doesn't exist"""
20
+ global DATASET_CREATED
21
+ if not DATASET_CREATED and HF_TOKEN:
22
+ try:
23
+ api = HfApi()
24
+ create_repo(
25
+ DATASET_NAME,
26
+ repo_type="dataset",
27
+ token=HF_TOKEN,
28
+ exist_ok=True
29
+ )
30
+ DATASET_CREATED = True
31
+ print(f"Dataset {DATASET_NAME} is ready")
32
+ except Exception as e:
33
+ print(f"Error setting up dataset: {e}")
34
+ elif not HF_TOKEN:
35
+ print("Warning: HF_TOKEN not set. Data will be stored locally only.")
36
+
37
  def process_submission(input_img, text_answer, multiple_choice, city, country, se_asia_relevance, culture_knowledge, native_caption, english_caption):
38
  # Generate unique ID for this submission
39
  submission_id = str(uuid.uuid4())
 
56
  submission_data = {
57
  "id": submission_id,
58
  "timestamp": timestamp,
59
+ "image_filename": os.path.basename(image_path) if image_path else None,
60
  "cultural_relevance": text_answer,
61
  "continent": multiple_choice,
62
  "city": city,
 
73
  with open(json_path, "w") as f:
74
  json.dump(submission_data, f, indent=2)
75
 
76
+ # Upload to Hugging Face Dataset if token is available
77
+ if HF_TOKEN and DATASET_CREATED:
78
+ try:
79
+ api = HfApi()
80
+ # Upload the JSON data
81
+ api.upload_file(
82
+ path_or_fileobj=json_path,
83
+ path_in_repo=f"submissions/{json_filename}",
84
+ repo_id=DATASET_NAME,
85
+ repo_type="dataset",
86
+ token=HF_TOKEN
87
+ )
88
+ # Upload the image if it exists
89
+ if image_path and os.path.exists(image_path):
90
+ api.upload_file(
91
+ path_or_fileobj=image_path,
92
+ path_in_repo=f"images/{os.path.basename(image_path)}",
93
+ repo_id=DATASET_NAME,
94
+ repo_type="dataset",
95
+ token=HF_TOKEN
96
+ )
97
+ print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
98
+ except Exception as e:
99
+ print(f"Error uploading to dataset: {e}")
100
+
101
  # Return values to display in the interface
102
  return input_img, f"Your text response: {text_answer}", f"Your selected option: {multiple_choice}", f"Location: {city}, {country}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
103
 
104
+ # Initialize the dataset
105
+ setup_hf_dataset()
106
+
107
  gradio_app = gr.Interface(
108
  process_submission,
109
  inputs=[