Spaces:
Running
Running
File size: 7,940 Bytes
bddfb4b 96e76ac 654128b 96e76ac bddfb4b 654128b bddfb4b 96e76ac 654128b 96e76ac 654128b 96e76ac bddfb4b 654128b bddfb4b 0bd155b bddfb4b 1cf3975 bddfb4b 1cf3975 0bd155b bddfb4b 0bd155b bddfb4b 1cf3975 bddfb4b 1cf3975 bddfb4b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
import gradio as gr
import os
import json
import uuid
from datetime import datetime
import shutil
from huggingface_hub import HfApi, create_repo, upload_file, upload_folder
# Create directories for data storage
os.makedirs("uploaded_images", exist_ok=True)
os.makedirs("submissions", exist_ok=True)
# Hugging Face Dataset configuration
HF_TOKEN = os.environ.get("HF_TOKEN") # You'll need to set this as a secret in your Space
DATASET_NAME = "srishtiy/se-culture-dataset-results" # Change to your username/dataset-name
DATASET_CREATED = False
def setup_hf_dataset():
"""Initialize the Hugging Face dataset if it doesn't exist"""
global DATASET_CREATED
if not DATASET_CREATED and HF_TOKEN:
try:
api = HfApi()
create_repo(
DATASET_NAME,
repo_type="dataset",
token=HF_TOKEN,
exist_ok=True
)
DATASET_CREATED = True
print(f"Dataset {DATASET_NAME} is ready")
except Exception as e:
print(f"Error setting up dataset: {e}")
elif not HF_TOKEN:
print("Warning: HF_TOKEN not set. Data will be stored locally only.")
def process_submission(input_img, text_answer, multiple_choice, city, country, se_asia_relevance, culture_knowledge, native_caption, english_caption):
# Generate unique ID for this submission
submission_id = str(uuid.uuid4())
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Save the image if provided
image_path = None
if input_img is not None:
# Create filename with submission ID
image_filename = f"{timestamp}_{submission_id}.jpg"
image_path = os.path.join("uploaded_images", image_filename)
# Save the image
if isinstance(input_img, str): # If it's a file path
shutil.copy(input_img, image_path)
else: # If it's a PIL Image
input_img.save(image_path)
# Create a data structure for the submission
submission_data = {
"id": submission_id,
"timestamp": timestamp,
"image_filename": os.path.basename(image_path) if image_path else None,
"cultural_relevance": text_answer,
"continent": multiple_choice,
"city": city,
"country": country,
"se_asia_relevance": se_asia_relevance,
"cultural_knowledge_source": culture_knowledge,
"native_caption": native_caption,
"english_caption": english_caption
}
# Save the data as JSON
json_filename = f"{timestamp}_{submission_id}.json"
json_path = os.path.join("submissions", json_filename)
with open(json_path, "w") as f:
json.dump(submission_data, f, indent=2)
# Upload to Hugging Face Dataset if token is available
if HF_TOKEN and DATASET_CREATED:
try:
api = HfApi()
# Upload the JSON data
api.upload_file(
path_or_fileobj=json_path,
path_in_repo=f"submissions/{json_filename}",
repo_id=DATASET_NAME,
repo_type="dataset",
token=HF_TOKEN
)
# Upload the image if it exists
if image_path and os.path.exists(image_path):
api.upload_file(
path_or_fileobj=image_path,
path_in_repo=f"images/{os.path.basename(image_path)}",
repo_id=DATASET_NAME,
repo_type="dataset",
token=HF_TOKEN
)
print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
except Exception as e:
print(f"Error uploading to dataset: {e}")
# Return values to display in the interface
return input_img, f"Your text response: {text_answer}", f"Your selected option: {multiple_choice}", f"Location: {city}, {country}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
# Initialize the dataset
setup_hf_dataset()
gradio_app = gr.Interface(
process_submission,
inputs=[
with gr.Blocks() as gradio_app:
selected_country = gr.Dropdown(
choices=list(country_to_states.keys()),
label="Country where the image was taken:",
interactive=True
)
selected_state = gr.Dropdown(
choices=[],
label="State / Province",
interactive=True
)
city_field = gr.Textbox(label="City (optional)")
image_input = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
cultural_text = gr.Textbox(label="The image portrays culturally-relevant information in:")
sa_relevance = gr.Radio(
choices=[
"Yes. Unique to South Asia",
"Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.",
"Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia",
"Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia",
"No. Totally unrelated to South Asia"
],
label="Is the image culturally relevant in South Asia?"
)
knowledge_source = gr.Radio(
choices=[
"I'm from this country/culture",
"I checked online resources (e.g., Wikipedia, articles, blogs)"
],
label="How do you know about this culture?",
info="Please do not consult LLMs"
)
native_caption = gr.Textbox(label="Caption in Native Language:")
english_caption = gr.Textbox(label="English Caption:")
output_components = [
gr.Image(label="Submitted Image"),
gr.Text(label="Text Response"),
gr.Text(label="Country and State"),
gr.Text(label="Location Information"),
gr.Text(label="South Asia Cultural Relevance"),
gr.Text(label="Cultural Knowledge Source"),
gr.Text(label="Native Language Caption"),
gr.Text(label="English Caption")
]
# Link dynamic dropdown update
def update_states(selected_country):
return gr.Dropdown.update(choices=country_to_states.get(selected_country, []), value=None)
selected_country.change(fn=update_states, inputs=selected_country, outputs=selected_state)
submit_btn = gr.Button("Submit")
submit_btn.click(
process_submission,
inputs=[
image_input,
cultural_text,
selected_country,
selected_state,
city_field,
sa_relevance,
knowledge_source,
native_caption,
english_caption
],
outputs=output_components
)
gr.Markdown("## South Asian Image Data Collection")
gr.Markdown("Upload an image and answer questions about its cultural significance.")
],
outputs=[
gr.Image(label="Submitted Image"),
gr.Text(label="Text Response"),
gr.Text(label="Multiple Choice Response"),
gr.Text(label="Location Information"),
gr.Text(label="South Asia Cultural Relevance"),
gr.Text(label="Cultural Knowledge Source"),
gr.Text(label="Native Language Caption"),
gr.Text(label="English Caption")
],
title="South Asian Image Data Collection",
description="Upload an image and answer questions about its cultural significance."
)
if __name__ == "__main__":
gradio_app.launch() |