import gradio as gr import os import json import uuid from datetime import datetime import shutil from huggingface_hub import HfApi, create_repo, upload_file, upload_folder # Create directories for data storage os.makedirs("uploaded_images", exist_ok=True) os.makedirs("submissions", exist_ok=True) # Hugging Face Dataset configuration HF_TOKEN = os.environ.get("HF_TOKEN") # You'll need to set this as a secret in your Space DATASET_NAME = "srishtiy/se-culture-dataset-results" # Change to your username/dataset-name DATASET_CREATED = False def setup_hf_dataset(): """Initialize the Hugging Face dataset if it doesn't exist""" global DATASET_CREATED if not DATASET_CREATED and HF_TOKEN: try: api = HfApi() create_repo( DATASET_NAME, repo_type="dataset", token=HF_TOKEN, exist_ok=True ) DATASET_CREATED = True print(f"Dataset {DATASET_NAME} is ready") except Exception as e: print(f"Error setting up dataset: {e}") elif not HF_TOKEN: print("Warning: HF_TOKEN not set. Data will be stored locally only.") def process_submission(input_img, text_answer, multiple_choice, city, country, se_asia_relevance, culture_knowledge, native_caption, english_caption): # Generate unique ID for this submission submission_id = str(uuid.uuid4()) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Save the image if provided image_path = None if input_img is not None: # Create filename with submission ID image_filename = f"{timestamp}_{submission_id}.jpg" image_path = os.path.join("uploaded_images", image_filename) # Save the image if isinstance(input_img, str): # If it's a file path shutil.copy(input_img, image_path) else: # If it's a PIL Image input_img.save(image_path) # Create a data structure for the submission submission_data = { "id": submission_id, "timestamp": timestamp, "image_filename": os.path.basename(image_path) if image_path else None, "cultural_relevance": text_answer, "continent": multiple_choice, "city": city, "country": country, "se_asia_relevance": se_asia_relevance, "cultural_knowledge_source": culture_knowledge, "native_caption": native_caption, "english_caption": english_caption } # Save the data as JSON json_filename = f"{timestamp}_{submission_id}.json" json_path = os.path.join("submissions", json_filename) with open(json_path, "w") as f: json.dump(submission_data, f, indent=2) # Upload to Hugging Face Dataset if token is available if HF_TOKEN and DATASET_CREATED: try: api = HfApi() # Upload the JSON data api.upload_file( path_or_fileobj=json_path, path_in_repo=f"submissions/{json_filename}", repo_id=DATASET_NAME, repo_type="dataset", token=HF_TOKEN ) # Upload the image if it exists if image_path and os.path.exists(image_path): api.upload_file( path_or_fileobj=image_path, path_in_repo=f"images/{os.path.basename(image_path)}", repo_id=DATASET_NAME, repo_type="dataset", token=HF_TOKEN ) print(f"Submission {submission_id} uploaded to Hugging Face Dataset") except Exception as e: print(f"Error uploading to dataset: {e}") # Return values to display in the interface return input_img, f"Your text response: {text_answer}", f"Your selected option: {multiple_choice}", f"Location: {city}, {country}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}" # Initialize the dataset setup_hf_dataset() gradio_app = gr.Interface( process_submission, with gr.Blocks() as gradio_app: selected_country = gr.Dropdown( choices=list(country_to_states.keys()), label="Country where the image was taken:", interactive=True ) selected_state = gr.Dropdown( choices=[], label="State / Province / District (updates based on country)", interactive=True ) city_field = gr.Textbox(label="City (optional)") image_input = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil") cultural_text = gr.Textbox(label="The image portrays culturally-relevant information in:") sa_relevance = gr.Radio( choices=[ "Yes. Unique to South Asia", "Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.", "Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia", "Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia", "No. Totally unrelated to South Asia" ], label="Is the image culturally relevant in South Asia?" ) knowledge_source = gr.Radio( choices=[ "I'm from this country/culture", "I checked online resources (e.g., Wikipedia, articles, blogs)" ], label="How do you know about this culture?", info="Please do not consult LLMs" ) native_caption = gr.Textbox(label="Caption in Native Language:") english_caption = gr.Textbox(label="English Caption:") output_components = [ gr.Image(label="Submitted Image"), gr.Text(label="Text Response"), gr.Text(label="Country and State"), gr.Text(label="Location Information"), gr.Text(label="South Asia Cultural Relevance"), gr.Text(label="Cultural Knowledge Source"), gr.Text(label="Native Language Caption"), gr.Text(label="English Caption") ] # Link dynamic dropdown update def update_states(selected_country): return gr.Dropdown.update(choices=country_to_states.get(selected_country, []), value=None) selected_country.change(fn=update_states, inputs=selected_country, outputs=selected_state) submit_btn = gr.Button("Submit") submit_btn.click( process_submission, inputs=[ image_input, cultural_text, selected_country, selected_state, city_field, sa_relevance, knowledge_source, native_caption, english_caption ], outputs=output_components ) gr.Markdown("## South Asian Image Data Collection") gr.Markdown("Upload an image and answer questions about its cultural significance."), outputs=[ gr.Image(label="Submitted Image"), gr.Text(label="Text Response"), gr.Text(label="Multiple Choice Response"), gr.Text(label="Location Information"), gr.Text(label="South Asia Cultural Relevance"), gr.Text(label="Cultural Knowledge Source"), gr.Text(label="Native Language Caption"), gr.Text(label="English Caption") ], title="South Asian Image Data Collection", description="Upload an image and answer questions about its cultural significance." ) if __name__ == "__main__": gradio_app.launch()