crowdsource / app.py
minemaster01's picture
updated city and state
0bd155b verified
raw
history blame
7.94 kB
import gradio as gr
import os
import json
import uuid
from datetime import datetime
import shutil
from huggingface_hub import HfApi, create_repo, upload_file, upload_folder
# Create directories for data storage
os.makedirs("uploaded_images", exist_ok=True)
os.makedirs("submissions", exist_ok=True)
# Hugging Face Dataset configuration
HF_TOKEN = os.environ.get("HF_TOKEN") # You'll need to set this as a secret in your Space
DATASET_NAME = "srishtiy/se-culture-dataset-results" # Change to your username/dataset-name
DATASET_CREATED = False
def setup_hf_dataset():
"""Initialize the Hugging Face dataset if it doesn't exist"""
global DATASET_CREATED
if not DATASET_CREATED and HF_TOKEN:
try:
api = HfApi()
create_repo(
DATASET_NAME,
repo_type="dataset",
token=HF_TOKEN,
exist_ok=True
)
DATASET_CREATED = True
print(f"Dataset {DATASET_NAME} is ready")
except Exception as e:
print(f"Error setting up dataset: {e}")
elif not HF_TOKEN:
print("Warning: HF_TOKEN not set. Data will be stored locally only.")
def process_submission(input_img, text_answer, multiple_choice, city, country, se_asia_relevance, culture_knowledge, native_caption, english_caption):
# Generate unique ID for this submission
submission_id = str(uuid.uuid4())
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Save the image if provided
image_path = None
if input_img is not None:
# Create filename with submission ID
image_filename = f"{timestamp}_{submission_id}.jpg"
image_path = os.path.join("uploaded_images", image_filename)
# Save the image
if isinstance(input_img, str): # If it's a file path
shutil.copy(input_img, image_path)
else: # If it's a PIL Image
input_img.save(image_path)
# Create a data structure for the submission
submission_data = {
"id": submission_id,
"timestamp": timestamp,
"image_filename": os.path.basename(image_path) if image_path else None,
"cultural_relevance": text_answer,
"continent": multiple_choice,
"city": city,
"country": country,
"se_asia_relevance": se_asia_relevance,
"cultural_knowledge_source": culture_knowledge,
"native_caption": native_caption,
"english_caption": english_caption
}
# Save the data as JSON
json_filename = f"{timestamp}_{submission_id}.json"
json_path = os.path.join("submissions", json_filename)
with open(json_path, "w") as f:
json.dump(submission_data, f, indent=2)
# Upload to Hugging Face Dataset if token is available
if HF_TOKEN and DATASET_CREATED:
try:
api = HfApi()
# Upload the JSON data
api.upload_file(
path_or_fileobj=json_path,
path_in_repo=f"submissions/{json_filename}",
repo_id=DATASET_NAME,
repo_type="dataset",
token=HF_TOKEN
)
# Upload the image if it exists
if image_path and os.path.exists(image_path):
api.upload_file(
path_or_fileobj=image_path,
path_in_repo=f"images/{os.path.basename(image_path)}",
repo_id=DATASET_NAME,
repo_type="dataset",
token=HF_TOKEN
)
print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
except Exception as e:
print(f"Error uploading to dataset: {e}")
# Return values to display in the interface
return input_img, f"Your text response: {text_answer}", f"Your selected option: {multiple_choice}", f"Location: {city}, {country}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
# Initialize the dataset
setup_hf_dataset()
gradio_app = gr.Interface(
process_submission,
inputs=[
with gr.Blocks() as gradio_app:
selected_country = gr.Dropdown(
choices=list(country_to_states.keys()),
label="Country where the image was taken:",
interactive=True
)
selected_state = gr.Dropdown(
choices=[],
label="State / Province",
interactive=True
)
city_field = gr.Textbox(label="City (optional)")
image_input = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
cultural_text = gr.Textbox(label="The image portrays culturally-relevant information in:")
sa_relevance = gr.Radio(
choices=[
"Yes. Unique to South Asia",
"Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.",
"Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia",
"Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia",
"No. Totally unrelated to South Asia"
],
label="Is the image culturally relevant in South Asia?"
)
knowledge_source = gr.Radio(
choices=[
"I'm from this country/culture",
"I checked online resources (e.g., Wikipedia, articles, blogs)"
],
label="How do you know about this culture?",
info="Please do not consult LLMs"
)
native_caption = gr.Textbox(label="Caption in Native Language:")
english_caption = gr.Textbox(label="English Caption:")
output_components = [
gr.Image(label="Submitted Image"),
gr.Text(label="Text Response"),
gr.Text(label="Country and State"),
gr.Text(label="Location Information"),
gr.Text(label="South Asia Cultural Relevance"),
gr.Text(label="Cultural Knowledge Source"),
gr.Text(label="Native Language Caption"),
gr.Text(label="English Caption")
]
# Link dynamic dropdown update
def update_states(selected_country):
return gr.Dropdown.update(choices=country_to_states.get(selected_country, []), value=None)
selected_country.change(fn=update_states, inputs=selected_country, outputs=selected_state)
submit_btn = gr.Button("Submit")
submit_btn.click(
process_submission,
inputs=[
image_input,
cultural_text,
selected_country,
selected_state,
city_field,
sa_relevance,
knowledge_source,
native_caption,
english_caption
],
outputs=output_components
)
gr.Markdown("## South Asian Image Data Collection")
gr.Markdown("Upload an image and answer questions about its cultural significance.")
],
outputs=[
gr.Image(label="Submitted Image"),
gr.Text(label="Text Response"),
gr.Text(label="Multiple Choice Response"),
gr.Text(label="Location Information"),
gr.Text(label="South Asia Cultural Relevance"),
gr.Text(label="Cultural Knowledge Source"),
gr.Text(label="Native Language Caption"),
gr.Text(label="English Caption")
],
title="South Asian Image Data Collection",
description="Upload an image and answer questions about its cultural significance."
)
if __name__ == "__main__":
gradio_app.launch()