Spaces:
Running
Running
import gradio as gr | |
import os | |
import json | |
import uuid | |
from datetime import datetime | |
import shutil | |
from huggingface_hub import HfApi, create_repo, upload_file, upload_folder | |
# Create directories for data storage | |
os.makedirs("uploaded_images", exist_ok=True) | |
os.makedirs("submissions", exist_ok=True) | |
# Hugging Face Dataset configuration | |
HF_TOKEN = os.environ.get("HF_TOKEN") # You'll need to set this as a secret in your Space | |
DATASET_NAME = "srishtiy/se-culture-dataset-results" # Change to your username/dataset-name | |
DATASET_CREATED = False | |
def setup_hf_dataset(): | |
"""Initialize the Hugging Face dataset if it doesn't exist""" | |
global DATASET_CREATED | |
if not DATASET_CREATED and HF_TOKEN: | |
try: | |
api = HfApi() | |
create_repo( | |
DATASET_NAME, | |
repo_type="dataset", | |
token=HF_TOKEN, | |
exist_ok=True | |
) | |
DATASET_CREATED = True | |
print(f"Dataset {DATASET_NAME} is ready") | |
except Exception as e: | |
print(f"Error setting up dataset: {e}") | |
elif not HF_TOKEN: | |
print("Warning: HF_TOKEN not set. Data will be stored locally only.") | |
def process_submission(input_img, text_answer, multiple_choice, city, country, se_asia_relevance, culture_knowledge, native_caption, english_caption): | |
# Generate unique ID for this submission | |
submission_id = str(uuid.uuid4()) | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
# Save the image if provided | |
image_path = None | |
if input_img is not None: | |
# Create filename with submission ID | |
image_filename = f"{timestamp}_{submission_id}.jpg" | |
image_path = os.path.join("uploaded_images", image_filename) | |
# Save the image | |
if isinstance(input_img, str): # If it's a file path | |
shutil.copy(input_img, image_path) | |
else: # If it's a PIL Image | |
input_img.save(image_path) | |
# Create a data structure for the submission | |
submission_data = { | |
"id": submission_id, | |
"timestamp": timestamp, | |
"image_filename": os.path.basename(image_path) if image_path else None, | |
"cultural_relevance": text_answer, | |
"continent": multiple_choice, | |
"city": city, | |
"country": country, | |
"se_asia_relevance": se_asia_relevance, | |
"cultural_knowledge_source": culture_knowledge, | |
"native_caption": native_caption, | |
"english_caption": english_caption | |
} | |
# Save the data as JSON | |
json_filename = f"{timestamp}_{submission_id}.json" | |
json_path = os.path.join("submissions", json_filename) | |
with open(json_path, "w") as f: | |
json.dump(submission_data, f, indent=2) | |
# Upload to Hugging Face Dataset if token is available | |
if HF_TOKEN and DATASET_CREATED: | |
try: | |
api = HfApi() | |
# Upload the JSON data | |
api.upload_file( | |
path_or_fileobj=json_path, | |
path_in_repo=f"submissions/{json_filename}", | |
repo_id=DATASET_NAME, | |
repo_type="dataset", | |
token=HF_TOKEN | |
) | |
# Upload the image if it exists | |
if image_path and os.path.exists(image_path): | |
api.upload_file( | |
path_or_fileobj=image_path, | |
path_in_repo=f"images/{os.path.basename(image_path)}", | |
repo_id=DATASET_NAME, | |
repo_type="dataset", | |
token=HF_TOKEN | |
) | |
print(f"Submission {submission_id} uploaded to Hugging Face Dataset") | |
except Exception as e: | |
print(f"Error uploading to dataset: {e}") | |
# Return values to display in the interface | |
return input_img, f"Your text response: {text_answer}", f"Your selected option: {multiple_choice}", f"Location: {city}, {country}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}" | |
# Initialize the dataset | |
setup_hf_dataset() | |
gradio_app = gr.Interface( | |
process_submission, | |
inputs=[ | |
with gr.Blocks() as gradio_app: | |
selected_country = gr.Dropdown( | |
choices=list(country_to_states.keys()), | |
label="Country where the image was taken:", | |
interactive=True | |
) | |
selected_state = gr.Dropdown( | |
choices=[], | |
label="State / Province", | |
interactive=True | |
) | |
city_field = gr.Textbox(label="City (optional)") | |
image_input = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil") | |
cultural_text = gr.Textbox(label="The image portrays culturally-relevant information in:") | |
sa_relevance = gr.Radio( | |
choices=[ | |
"Yes. Unique to South Asia", | |
"Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.", | |
"Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia", | |
"Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia", | |
"No. Totally unrelated to South Asia" | |
], | |
label="Is the image culturally relevant in South Asia?" | |
) | |
knowledge_source = gr.Radio( | |
choices=[ | |
"I'm from this country/culture", | |
"I checked online resources (e.g., Wikipedia, articles, blogs)" | |
], | |
label="How do you know about this culture?", | |
info="Please do not consult LLMs" | |
) | |
native_caption = gr.Textbox(label="Caption in Native Language:") | |
english_caption = gr.Textbox(label="English Caption:") | |
output_components = [ | |
gr.Image(label="Submitted Image"), | |
gr.Text(label="Text Response"), | |
gr.Text(label="Country and State"), | |
gr.Text(label="Location Information"), | |
gr.Text(label="South Asia Cultural Relevance"), | |
gr.Text(label="Cultural Knowledge Source"), | |
gr.Text(label="Native Language Caption"), | |
gr.Text(label="English Caption") | |
] | |
# Link dynamic dropdown update | |
def update_states(selected_country): | |
return gr.Dropdown.update(choices=country_to_states.get(selected_country, []), value=None) | |
selected_country.change(fn=update_states, inputs=selected_country, outputs=selected_state) | |
submit_btn = gr.Button("Submit") | |
submit_btn.click( | |
process_submission, | |
inputs=[ | |
image_input, | |
cultural_text, | |
selected_country, | |
selected_state, | |
city_field, | |
sa_relevance, | |
knowledge_source, | |
native_caption, | |
english_caption | |
], | |
outputs=output_components | |
) | |
gr.Markdown("## South Asian Image Data Collection") | |
gr.Markdown("Upload an image and answer questions about its cultural significance.") | |
], | |
outputs=[ | |
gr.Image(label="Submitted Image"), | |
gr.Text(label="Text Response"), | |
gr.Text(label="Multiple Choice Response"), | |
gr.Text(label="Location Information"), | |
gr.Text(label="South Asia Cultural Relevance"), | |
gr.Text(label="Cultural Knowledge Source"), | |
gr.Text(label="Native Language Caption"), | |
gr.Text(label="English Caption") | |
], | |
title="South Asian Image Data Collection", | |
description="Upload an image and answer questions about its cultural significance." | |
) | |
if __name__ == "__main__": | |
gradio_app.launch() |