Spaces:
Running
Running
import gradio as gr | |
import os | |
import json | |
import uuid | |
from datetime import datetime | |
import shutil | |
from huggingface_hub import HfApi, create_repo, upload_file, upload_folder | |
# Create directories for data storage | |
os.makedirs("uploaded_images", exist_ok=True) | |
os.makedirs("submissions", exist_ok=True) | |
# Hugging Face Dataset configuration | |
HF_TOKEN = os.environ.get("HF_TOKEN") # You'll need to set this as a secret in your Space | |
DATASET_NAME = "srishtiy/se-culture-dataset-results" # Change to your username/dataset-name | |
DATASET_CREATED = False | |
# States by country dictionary | |
states_by_country = { | |
"India": [ | |
"Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana", | |
"Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur", | |
"Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana", | |
"Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal", "Andaman and Nicobar Islands", "Chandigarh", | |
"Dadra and Nagar Haveli and Daman and Diu", "Delhi", "Jammu and Kashmir", "Ladakh", "Lakshadweep", "Puducherry" | |
], | |
"Pakistan": [ | |
"Balochistan", "Khyber Pakhtunkhwa", "Punjab", "Sindh", "Islamabad Capital Territory", | |
"Azad Jammu and Kashmir", "Gilgit-Baltistan" | |
], | |
"Bangladesh": [ | |
"Barisal", "Chittagong", "Dhaka", "Khulna", "Mymensingh", "Rajshahi", "Rangpur", "Sylhet" | |
], | |
"Afghanistan": [ | |
"Badakhshan", "Badghis", "Baghlan", "Balkh", "Bamyan", "Daykundi", "Farah", "Faryab", "Ghazni", "Ghor", | |
"Helmand", "Herat", "Jowzjan", "Kabul", "Kandahar", "Kapisa", "Khost", "Kunar", "Kunduz", "Laghman", | |
"Logar", "Nangarhar", "Nimruz", "Nuristan", "Paktia", "Paktika", "Panjshir", "Parwan", "Samangan", | |
"Sar-e Pol", "Takhar", "Uruzgan", "Wardak", "Zabul" | |
], | |
"Bhutan": [ | |
"Bumthang", "Chukha", "Dagana", "Gasa", "Haa", "Lhuentse", "Mongar", "Paro", "Pemagatshel", "Punakha", | |
"Samdrup Jongkhar", "Samtse", "Sarpang", "Thimphu", "Trashigang", "Trashiyangtse", "Trongsa", "Tsirang", | |
"Wangdue Phodrang", "Zhemgang" | |
], | |
"Nepal": [ | |
"Bagmati", "Gandaki", "Karnali", "Koshi", "Lumbini", "Madhesh", "Sudurpashchim" | |
], | |
"Sri Lanka": [ | |
"Central", "Eastern", "North Central", "Northern", "North Western", "Sabaragamuwa", "Southern", "Uva", "Western" | |
] | |
} | |
def setup_hf_dataset(): | |
"""Initialize the Hugging Face dataset if it doesn't exist""" | |
global DATASET_CREATED | |
if not DATASET_CREATED and HF_TOKEN: | |
try: | |
api = HfApi() | |
create_repo( | |
DATASET_NAME, | |
repo_type="dataset", | |
token=HF_TOKEN, | |
exist_ok=True | |
) | |
DATASET_CREATED = True | |
print(f"Dataset {DATASET_NAME} is ready") | |
except Exception as e: | |
print(f"Error setting up dataset: {e}") | |
elif not HF_TOKEN: | |
print("Warning: HF_TOKEN not set. Data will be stored locally only.") | |
def update_state_dropdown(country): | |
"""Update state dropdown based on selected country""" | |
if country in states_by_country: | |
return gr.Dropdown(choices=states_by_country[country], label=f"State/Province in {country}:", interactive=True) | |
return gr.Dropdown(choices=[], label="State/Province:", interactive=True) | |
def process_submission(input_img, text_answer, country, state, city, se_asia_relevance, culture_knowledge, native_caption, english_caption): | |
# Generate unique ID for this submission | |
submission_id = str(uuid.uuid4()) | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
# Save the image if provided | |
image_path = None | |
if input_img is not None: | |
# Create filename with submission ID | |
image_filename = f"{timestamp}_{submission_id}.jpg" | |
image_path = os.path.join("uploaded_images", image_filename) | |
# Save the image | |
if isinstance(input_img, str): # If it's a file path | |
shutil.copy(input_img, image_path) | |
else: # If it's a PIL Image | |
input_img.save(image_path) | |
# Create a data structure for the submission | |
submission_data = { | |
"id": submission_id, | |
"timestamp": timestamp, | |
"image_filename": os.path.basename(image_path) if image_path else None, | |
"cultural_relevance": text_answer, | |
"country": country, | |
"state": state, | |
"city": city, | |
"se_asia_relevance": se_asia_relevance, | |
"cultural_knowledge_source": culture_knowledge, | |
"native_caption": native_caption, | |
"english_caption": english_caption | |
} | |
# Save the data as JSON | |
json_filename = f"{timestamp}_{submission_id}.json" | |
json_path = os.path.join("submissions", json_filename) | |
with open(json_path, "w") as f: | |
json.dump(submission_data, f, indent=2) | |
# Upload to Hugging Face Dataset if token is available | |
if HF_TOKEN and DATASET_CREATED: | |
try: | |
api = HfApi() | |
# Upload the JSON data | |
api.upload_file( | |
path_or_fileobj=json_path, | |
path_in_repo=f"submissions/{json_filename}", | |
repo_id=DATASET_NAME, | |
repo_type="dataset", | |
token=HF_TOKEN | |
) | |
# Upload the image if it exists | |
if image_path and os.path.exists(image_path): | |
api.upload_file( | |
path_or_fileobj=image_path, | |
path_in_repo=f"images/{os.path.basename(image_path)}", | |
repo_id=DATASET_NAME, | |
repo_type="dataset", | |
token=HF_TOKEN | |
) | |
print(f"Submission {submission_id} uploaded to Hugging Face Dataset") | |
except Exception as e: | |
print(f"Error uploading to dataset: {e}") | |
# Return values to display in the interface | |
location_info = f"Location: {city}, {state}, {country}" if state else f"Location: {city}, {country}" | |
return input_img, f"Your text response: {text_answer}", f"Selected location: {location_info}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}" | |
def clear_inputs(): | |
return None, "", None, None, "", None, None, "", "" | |
# Initialize the dataset | |
setup_hf_dataset() | |
with gr.Blocks() as gradio_app: | |
gr.Markdown("# South Asian Image Data Collection") | |
gr.Markdown("Upload an image and answer questions about its cultural significance.") | |
with gr.Row(): | |
input_img = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil") | |
with gr.Row(): | |
text_answer = gr.Textbox(label="The image portrays culturally-relevant information in:", placeholder="what culture does this image represent?") | |
with gr.Row(): | |
country_dropdown = gr.Dropdown( | |
choices=["India", "Pakistan", "Bangladesh", "Afghanistan", "Bhutan", "Nepal", "Sri Lanka"], | |
label="Country where the image was taken:", | |
interactive=True | |
) | |
with gr.Row(): | |
state_dropdown = gr.Dropdown( | |
choices=[], | |
label="State/Province:", | |
interactive=True | |
) | |
with gr.Row(): | |
city_textbox = gr.Textbox(label="City where the image was taken:", placeholder="Enter city name") | |
with gr.Row(): | |
se_asia_relevance = gr.Radio( | |
choices=[ | |
"Yes. Unique to South Asia", | |
"Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.", | |
"Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia", | |
"Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia", | |
"No. Totally unrelated to South Asia" | |
], | |
label="Is the image culturally relevant in South Asia?" | |
) | |
with gr.Row(): | |
culture_knowledge = gr.Radio( | |
choices=[ | |
"I'm from this country/culture", | |
"I checked online resources (e.g., Wikipedia, articles, blogs)" | |
], | |
label="How do you know about this culture?", | |
info="Please do not consult LLMs (e.g., GPT-4o, Claude, Command-R, etc.)" | |
) | |
with gr.Row(): | |
native_caption = gr.Textbox(label="Caption in Native Language:", placeholder="Enter caption in the native language of the culture depicted") | |
with gr.Row(): | |
english_caption = gr.Textbox(label="English Caption:", placeholder="Enter caption in English") | |
# Add two rows of buttons as shown in the image | |
with gr.Row(): | |
clear_btn = gr.Button("Clear") | |
submit_btn = gr.Button("Submit") | |
with gr.Row(): | |
share_btn = gr.Button("Share via Link", variant="secondary", elem_classes=["share-button"]) | |
with gr.Row(): | |
output_img = gr.Image(label="Submitted Image") | |
with gr.Row(): | |
output_text = gr.Text(label="Text Response") | |
with gr.Row(): | |
output_location = gr.Text(label="Location Information") | |
with gr.Row(): | |
output_relevance = gr.Text(label="South Asia Cultural Relevance") | |
with gr.Row(): | |
output_knowledge = gr.Text(label="Cultural Knowledge Source") | |
with gr.Row(): | |
output_native = gr.Text(label="Native Language Caption") | |
with gr.Row(): | |
output_english = gr.Text(label="English Caption") | |
# Set up event handlers | |
country_dropdown.change( | |
fn=update_state_dropdown, | |
inputs=country_dropdown, | |
outputs=state_dropdown | |
) | |
submit_btn.click( | |
fn=process_submission, | |
inputs=[ | |
input_img, | |
text_answer, | |
country_dropdown, | |
state_dropdown, | |
city_textbox, | |
se_asia_relevance, | |
culture_knowledge, | |
native_caption, | |
english_caption | |
], | |
outputs=[ | |
output_img, | |
output_text, | |
output_location, | |
output_relevance, | |
output_knowledge, | |
output_native, | |
output_english | |
] | |
) | |
clear_btn.click( | |
fn=clear_inputs, | |
inputs=[], | |
outputs=[ | |
input_img, | |
text_answer, | |
country_dropdown, | |
state_dropdown, | |
city_textbox, | |
se_asia_relevance, | |
culture_knowledge, | |
native_caption, | |
english_caption | |
] | |
) | |
gr.HTML(""" | |
<style> | |
.share-button { | |
background-color: #f0f0f0 !important; | |
color: #000 !important; | |
border: 1px solid #ccc !important; | |
} | |
</style> | |
""") | |
if __name__ == "__main__": | |
gradio_app.launch() |