crowdsource / app.py
minemaster01's picture
Update app.py
266c914 verified
raw
history blame
11.1 kB
import gradio as gr
import os
import json
import uuid
from datetime import datetime
import shutil
from huggingface_hub import HfApi, create_repo, upload_file, upload_folder
# Create directories for data storage
os.makedirs("uploaded_images", exist_ok=True)
os.makedirs("submissions", exist_ok=True)
# Hugging Face Dataset configuration
HF_TOKEN = os.environ.get("HF_TOKEN") # You'll need to set this as a secret in your Space
DATASET_NAME = "srishtiy/se-culture-dataset-results" # Change to your username/dataset-name
DATASET_CREATED = False
# States by country dictionary
states_by_country = {
"India": [
"Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana",
"Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur",
"Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana",
"Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal", "Andaman and Nicobar Islands", "Chandigarh",
"Dadra and Nagar Haveli and Daman and Diu", "Delhi", "Jammu and Kashmir", "Ladakh", "Lakshadweep", "Puducherry"
],
"Pakistan": [
"Balochistan", "Khyber Pakhtunkhwa", "Punjab", "Sindh", "Islamabad Capital Territory",
"Azad Jammu and Kashmir", "Gilgit-Baltistan"
],
"Bangladesh": [
"Barisal", "Chittagong", "Dhaka", "Khulna", "Mymensingh", "Rajshahi", "Rangpur", "Sylhet"
],
"Afghanistan": [
"Badakhshan", "Badghis", "Baghlan", "Balkh", "Bamyan", "Daykundi", "Farah", "Faryab", "Ghazni", "Ghor",
"Helmand", "Herat", "Jowzjan", "Kabul", "Kandahar", "Kapisa", "Khost", "Kunar", "Kunduz", "Laghman",
"Logar", "Nangarhar", "Nimruz", "Nuristan", "Paktia", "Paktika", "Panjshir", "Parwan", "Samangan",
"Sar-e Pol", "Takhar", "Uruzgan", "Wardak", "Zabul"
],
"Bhutan": [
"Bumthang", "Chukha", "Dagana", "Gasa", "Haa", "Lhuentse", "Mongar", "Paro", "Pemagatshel", "Punakha",
"Samdrup Jongkhar", "Samtse", "Sarpang", "Thimphu", "Trashigang", "Trashiyangtse", "Trongsa", "Tsirang",
"Wangdue Phodrang", "Zhemgang"
],
"Nepal": [
"Bagmati", "Gandaki", "Karnali", "Koshi", "Lumbini", "Madhesh", "Sudurpashchim"
],
"Sri Lanka": [
"Central", "Eastern", "North Central", "Northern", "North Western", "Sabaragamuwa", "Southern", "Uva", "Western"
]
}
def setup_hf_dataset():
"""Initialize the Hugging Face dataset if it doesn't exist"""
global DATASET_CREATED
if not DATASET_CREATED and HF_TOKEN:
try:
api = HfApi()
create_repo(
DATASET_NAME,
repo_type="dataset",
token=HF_TOKEN,
exist_ok=True
)
DATASET_CREATED = True
print(f"Dataset {DATASET_NAME} is ready")
except Exception as e:
print(f"Error setting up dataset: {e}")
elif not HF_TOKEN:
print("Warning: HF_TOKEN not set. Data will be stored locally only.")
def update_state_dropdown(country):
"""Update state dropdown based on selected country"""
if country in states_by_country:
return gr.Dropdown(choices=states_by_country[country], label=f"State/Province in {country}:", interactive=True)
return gr.Dropdown(choices=[], label="State/Province:", interactive=True)
def process_submission(input_img, text_answer, country, state, city, se_asia_relevance, culture_knowledge, native_caption, english_caption):
# Generate unique ID for this submission
submission_id = str(uuid.uuid4())
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Save the image if provided
image_path = None
if input_img is not None:
# Create filename with submission ID
image_filename = f"{timestamp}_{submission_id}.jpg"
image_path = os.path.join("uploaded_images", image_filename)
# Save the image
if isinstance(input_img, str): # If it's a file path
shutil.copy(input_img, image_path)
else: # If it's a PIL Image
input_img.save(image_path)
# Create a data structure for the submission
submission_data = {
"id": submission_id,
"timestamp": timestamp,
"image_filename": os.path.basename(image_path) if image_path else None,
"cultural_relevance": text_answer,
"country": country,
"state": state,
"city": city,
"se_asia_relevance": se_asia_relevance,
"cultural_knowledge_source": culture_knowledge,
"native_caption": native_caption,
"english_caption": english_caption
}
# Save the data as JSON
json_filename = f"{timestamp}_{submission_id}.json"
json_path = os.path.join("submissions", json_filename)
with open(json_path, "w") as f:
json.dump(submission_data, f, indent=2)
# Upload to Hugging Face Dataset if token is available
if HF_TOKEN and DATASET_CREATED:
try:
api = HfApi()
# Upload the JSON data
api.upload_file(
path_or_fileobj=json_path,
path_in_repo=f"submissions/{json_filename}",
repo_id=DATASET_NAME,
repo_type="dataset",
token=HF_TOKEN
)
# Upload the image if it exists
if image_path and os.path.exists(image_path):
api.upload_file(
path_or_fileobj=image_path,
path_in_repo=f"images/{os.path.basename(image_path)}",
repo_id=DATASET_NAME,
repo_type="dataset",
token=HF_TOKEN
)
print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
except Exception as e:
print(f"Error uploading to dataset: {e}")
# Return values to display in the interface
location_info = f"Location: {city}, {state}, {country}" if state else f"Location: {city}, {country}"
return input_img, f"Your text response: {text_answer}", f"Selected location: {location_info}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
def clear_inputs():
return None, "", None, None, "", None, None, "", ""
# Initialize the dataset
setup_hf_dataset()
with gr.Blocks() as gradio_app:
gr.Markdown("# South Asian Image Data Collection")
gr.Markdown("Upload an image and answer questions about its cultural significance.")
with gr.Row():
input_img = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
with gr.Row():
text_answer = gr.Textbox(label="The image portrays culturally-relevant information in:", placeholder="what culture does this image represent?")
with gr.Row():
country_dropdown = gr.Dropdown(
choices=["India", "Pakistan", "Bangladesh", "Afghanistan", "Bhutan", "Nepal", "Sri Lanka"],
label="Country where the image was taken:",
interactive=True
)
with gr.Row():
state_dropdown = gr.Dropdown(
choices=[],
label="State/Province:",
interactive=True
)
with gr.Row():
city_textbox = gr.Textbox(label="City where the image was taken:", placeholder="Enter city name")
with gr.Row():
se_asia_relevance = gr.Radio(
choices=[
"Yes. Unique to South Asia",
"Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.",
"Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia",
"Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia",
"No. Totally unrelated to South Asia"
],
label="Is the image culturally relevant in South Asia?"
)
with gr.Row():
culture_knowledge = gr.Radio(
choices=[
"I'm from this country/culture",
"I checked online resources (e.g., Wikipedia, articles, blogs)"
],
label="How do you know about this culture?",
info="Please do not consult LLMs (e.g., GPT-4o, Claude, Command-R, etc.)"
)
with gr.Row():
native_caption = gr.Textbox(label="Caption in Native Language:", placeholder="Enter caption in the native language of the culture depicted")
with gr.Row():
english_caption = gr.Textbox(label="English Caption:", placeholder="Enter caption in English")
# Add two rows of buttons as shown in the image
with gr.Row():
clear_btn = gr.Button("Clear")
submit_btn = gr.Button("Submit")
with gr.Row():
share_btn = gr.Button("Share via Link", variant="secondary", elem_classes=["share-button"])
with gr.Row():
output_img = gr.Image(label="Submitted Image")
with gr.Row():
output_text = gr.Text(label="Text Response")
with gr.Row():
output_location = gr.Text(label="Location Information")
with gr.Row():
output_relevance = gr.Text(label="South Asia Cultural Relevance")
with gr.Row():
output_knowledge = gr.Text(label="Cultural Knowledge Source")
with gr.Row():
output_native = gr.Text(label="Native Language Caption")
with gr.Row():
output_english = gr.Text(label="English Caption")
# Set up event handlers
country_dropdown.change(
fn=update_state_dropdown,
inputs=country_dropdown,
outputs=state_dropdown
)
submit_btn.click(
fn=process_submission,
inputs=[
input_img,
text_answer,
country_dropdown,
state_dropdown,
city_textbox,
se_asia_relevance,
culture_knowledge,
native_caption,
english_caption
],
outputs=[
output_img,
output_text,
output_location,
output_relevance,
output_knowledge,
output_native,
output_english
]
)
clear_btn.click(
fn=clear_inputs,
inputs=[],
outputs=[
input_img,
text_answer,
country_dropdown,
state_dropdown,
city_textbox,
se_asia_relevance,
culture_knowledge,
native_caption,
english_caption
]
)
gr.HTML("""
<style>
.share-button {
background-color: #f0f0f0 !important;
color: #000 !important;
border: 1px solid #ccc !important;
}
</style>
""")
if __name__ == "__main__":
gradio_app.launch()