Spaces:
Running
Running
copperwiring
commited on
Commit
·
654128b
1
Parent(s):
96e76ac
Add Hugging Face Dataset integration for storing submissions
Browse files
app.py
CHANGED
@@ -4,11 +4,36 @@ import json
|
|
4 |
import uuid
|
5 |
from datetime import datetime
|
6 |
import shutil
|
|
|
7 |
|
8 |
# Create directories for data storage
|
9 |
os.makedirs("uploaded_images", exist_ok=True)
|
10 |
os.makedirs("submissions", exist_ok=True)
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def process_submission(input_img, text_answer, multiple_choice, city, country, se_asia_relevance, culture_knowledge, native_caption, english_caption):
|
13 |
# Generate unique ID for this submission
|
14 |
submission_id = str(uuid.uuid4())
|
@@ -31,7 +56,7 @@ def process_submission(input_img, text_answer, multiple_choice, city, country, s
|
|
31 |
submission_data = {
|
32 |
"id": submission_id,
|
33 |
"timestamp": timestamp,
|
34 |
-
"image_filename": image_path,
|
35 |
"cultural_relevance": text_answer,
|
36 |
"continent": multiple_choice,
|
37 |
"city": city,
|
@@ -48,9 +73,37 @@ def process_submission(input_img, text_answer, multiple_choice, city, country, s
|
|
48 |
with open(json_path, "w") as f:
|
49 |
json.dump(submission_data, f, indent=2)
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
# Return values to display in the interface
|
52 |
return input_img, f"Your text response: {text_answer}", f"Your selected option: {multiple_choice}", f"Location: {city}, {country}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
|
53 |
|
|
|
|
|
|
|
54 |
gradio_app = gr.Interface(
|
55 |
process_submission,
|
56 |
inputs=[
|
|
|
4 |
import uuid
|
5 |
from datetime import datetime
|
6 |
import shutil
|
7 |
+
from huggingface_hub import HfApi, create_repo, upload_file, upload_folder
|
8 |
|
9 |
# Create directories for data storage
|
10 |
os.makedirs("uploaded_images", exist_ok=True)
|
11 |
os.makedirs("submissions", exist_ok=True)
|
12 |
|
13 |
+
# Hugging Face Dataset configuration
|
14 |
+
HF_TOKEN = os.environ.get("HF_TOKEN") # You'll need to set this as a secret in your Space
|
15 |
+
DATASET_NAME = "srishtiy/se-culture-dataset-results" # Change to your username/dataset-name
|
16 |
+
DATASET_CREATED = False
|
17 |
+
|
18 |
+
def setup_hf_dataset():
|
19 |
+
"""Initialize the Hugging Face dataset if it doesn't exist"""
|
20 |
+
global DATASET_CREATED
|
21 |
+
if not DATASET_CREATED and HF_TOKEN:
|
22 |
+
try:
|
23 |
+
api = HfApi()
|
24 |
+
create_repo(
|
25 |
+
DATASET_NAME,
|
26 |
+
repo_type="dataset",
|
27 |
+
token=HF_TOKEN,
|
28 |
+
exist_ok=True
|
29 |
+
)
|
30 |
+
DATASET_CREATED = True
|
31 |
+
print(f"Dataset {DATASET_NAME} is ready")
|
32 |
+
except Exception as e:
|
33 |
+
print(f"Error setting up dataset: {e}")
|
34 |
+
elif not HF_TOKEN:
|
35 |
+
print("Warning: HF_TOKEN not set. Data will be stored locally only.")
|
36 |
+
|
37 |
def process_submission(input_img, text_answer, multiple_choice, city, country, se_asia_relevance, culture_knowledge, native_caption, english_caption):
|
38 |
# Generate unique ID for this submission
|
39 |
submission_id = str(uuid.uuid4())
|
|
|
56 |
submission_data = {
|
57 |
"id": submission_id,
|
58 |
"timestamp": timestamp,
|
59 |
+
"image_filename": os.path.basename(image_path) if image_path else None,
|
60 |
"cultural_relevance": text_answer,
|
61 |
"continent": multiple_choice,
|
62 |
"city": city,
|
|
|
73 |
with open(json_path, "w") as f:
|
74 |
json.dump(submission_data, f, indent=2)
|
75 |
|
76 |
+
# Upload to Hugging Face Dataset if token is available
|
77 |
+
if HF_TOKEN and DATASET_CREATED:
|
78 |
+
try:
|
79 |
+
api = HfApi()
|
80 |
+
# Upload the JSON data
|
81 |
+
api.upload_file(
|
82 |
+
path_or_fileobj=json_path,
|
83 |
+
path_in_repo=f"submissions/{json_filename}",
|
84 |
+
repo_id=DATASET_NAME,
|
85 |
+
repo_type="dataset",
|
86 |
+
token=HF_TOKEN
|
87 |
+
)
|
88 |
+
# Upload the image if it exists
|
89 |
+
if image_path and os.path.exists(image_path):
|
90 |
+
api.upload_file(
|
91 |
+
path_or_fileobj=image_path,
|
92 |
+
path_in_repo=f"images/{os.path.basename(image_path)}",
|
93 |
+
repo_id=DATASET_NAME,
|
94 |
+
repo_type="dataset",
|
95 |
+
token=HF_TOKEN
|
96 |
+
)
|
97 |
+
print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
|
98 |
+
except Exception as e:
|
99 |
+
print(f"Error uploading to dataset: {e}")
|
100 |
+
|
101 |
# Return values to display in the interface
|
102 |
return input_img, f"Your text response: {text_answer}", f"Your selected option: {multiple_choice}", f"Location: {city}, {country}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
|
103 |
|
104 |
+
# Initialize the dataset
|
105 |
+
setup_hf_dataset()
|
106 |
+
|
107 |
gradio_app = gr.Interface(
|
108 |
process_submission,
|
109 |
inputs=[
|