Spaces:
Running
Running
File size: 10,412 Bytes
bddfb4b 96e76ac 654128b 96e76ac 05fb91e 654128b c4adefd 76c5b42 c4adefd ddd08df c4adefd 76c5b42 c4adefd 76c5b42 c4adefd 76c5b42 c4adefd 6ffc297 654128b 76c5b42 c4adefd 76c5b42 ecbc96c 96e76ac 8021559 96e76ac 628342c 96e76ac 628342c 96e76ac 654128b ecbc96c 96e76ac 76c5b42 c4adefd 96e76ac 1e081e6 1e80e14 96e76ac 8021559 96e76ac 654128b c4adefd ecbc96c 266c914 1e80e14 654128b 76c5b42 c4adefd 9968bbc 6ffc297 9968bbc 6ffc297 9968bbc 1e80e14 9968bbc 76c5b42 266c914 76c5b42 1e081e6 9968bbc c4adefd 76c5b42 c4adefd 1e081e6 c4adefd 76c5b42 ecbc96c 76c5b42 1e80e14 c4adefd 76c5b42 1e081e6 c4adefd 266c914 ecbc96c 266c914 1e80e14 266c914 bddfb4b ddd08df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
import gradio as gr
import os
import json
import uuid
from datetime import datetime
import shutil
from huggingface_hub import HfApi, create_repo, upload_file, upload_folder
os.makedirs("uploaded_images", exist_ok=True)
os.makedirs("submissions", exist_ok=True)
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_NAME = "minemaster01/se-culture-dataset-results"
DATASET_CREATED = False
states_by_country = {
"India": [
"Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana",
"Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur",
"Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana",
"Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal", "Andaman and Nicobar Islands", "Chandigarh",
"Dadra and Nagar Haveli and Daman and Diu", "Delhi", "Jammu and Kashmir", "Ladakh", "Lakshadweep", "Puducherry"
],
"Pakistan": [
"Balochistan", "Khyber Pakhtunkhwa", "Punjab", "Sindh", "Islamabad Capital Territory", "Gilgit-Baltistan"
],
"Bangladesh": [
"Barisal", "Chittagong", "Dhaka", "Khulna", "Mymensingh", "Rajshahi", "Rangpur", "Sylhet"
],
"Afghanistan": [
"Badakhshan", "Badghis", "Baghlan", "Balkh", "Bamyan", "Daykundi", "Farah", "Faryab", "Ghazni", "Ghor",
"Helmand", "Herat", "Jowzjan", "Kabul", "Kandahar", "Kapisa", "Khost", "Kunar", "Kunduz", "Laghman",
"Logar", "Nangarhar", "Nimruz", "Nuristan", "Paktia", "Paktika", "Panjshir", "Parwan", "Samangan",
"Sar-e Pol", "Takhar", "Uruzgan", "Wardak", "Zabul"
],
"Bhutan": [
"Bumthang", "Chukha", "Dagana", "Gasa", "Haa", "Lhuentse", "Mongar", "Paro", "Pemagatshel", "Punakha",
"Samdrup Jongkhar", "Samtse", "Sarpang", "Thimphu", "Trashigang", "Trashiyangtse", "Trongsa", "Tsirang",
"Wangdue Phodrang", "Zhemgang"
],
"Nepal": [
"Bagmati", "Gandaki", "Karnali", "Koshi", "Lumbini", "Madhesh", "Sudurpashchim"
],
"Sri Lanka": [
"Central", "Eastern", "North Central", "Northern", "North Western", "Sabaragamuwa", "Southern", "Uva", "Western"
]
}
south_asian_languages = [
"Hindi", "Bengali", "Urdu", "Punjabi", "Tamil", "Telugu",
"Marathi", "Gujarati", "Kannada", "Malayalam", "Odia",
"Sindhi", "Nepali", "Sinhala", "Pashto", "Dari",
"Dzongkha", "Assamese", "Kashmiri", "Sanskrit", "Other"
]
def setup_hf_dataset():
"""Initialize the Hugging Face dataset if it doesn't exist"""
global DATASET_CREATED
if not DATASET_CREATED and HF_TOKEN:
try:
api = HfApi()
create_repo(
DATASET_NAME,
repo_type="dataset",
token=HF_TOKEN,
exist_ok=True
)
DATASET_CREATED = True
print(f"Dataset {DATASET_NAME} is ready")
except Exception as e:
print(f"Error setting up dataset: {e}")
elif not HF_TOKEN:
print("Warning: HF_TOKEN not set. Data will be stored locally only.")
def update_state_dropdown(country):
"""Update state dropdown based on selected country"""
if country in states_by_country:
return gr.Dropdown(choices=states_by_country[country], label=f"State/Province in {country}:", interactive=True)
return gr.Dropdown(choices=[], label="State/Province:", interactive=True)
def process_submission(input_img, language, country, state, city, se_asia_relevance, culture_knowledge, native_caption, english_caption,email):
submission_id = str(uuid.uuid4())
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
image_path = None
if input_img is not None:
image_filename = f"{timestamp}.jpg"
image_path = os.path.join("uploaded_images", image_filename)
if isinstance(input_img, str):
shutil.copy(input_img, image_path)
else:
input_img.save(image_path)
submission_data = {
"id": submission_id,
"timestamp": timestamp,
"image_filename": os.path.basename(image_path) if image_path else None,
"cultural_relevance": language,
"country": country,
"state": state,
"city": city,
"se_asia_relevance": se_asia_relevance,
"cultural_knowledge_source": culture_knowledge,
"native_caption": native_caption,
"english_caption": english_caption,
"email": email
}
json_filename = f"{timestamp}.json"
json_path = os.path.join("submissions", json_filename)
with open(json_path, "w") as f:
json.dump(submission_data, f, indent=2)
if HF_TOKEN and DATASET_CREATED:
try:
api = HfApi()
api.upload_file(
path_or_fileobj=json_path,
path_in_repo=f"submissions/{json_filename}",
repo_id=DATASET_NAME,
repo_type="dataset",
token=HF_TOKEN
)
if image_path and os.path.exists(image_path):
api.upload_file(
path_or_fileobj=image_path,
path_in_repo=f"images/{os.path.basename(image_path)}",
repo_id=DATASET_NAME,
repo_type="dataset",
token=HF_TOKEN
)
print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
except Exception as e:
print(f"Error uploading to dataset: {e}")
location_info = f"Location: {city}, {state}, {country}" if state else f"Location: {city}, {country}"
return input_img, f"Your text response: {language}", f"Selected location: {location_info}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
def clear_inputs():
return None, "", None, None, "", None, None, "", "", ""
setup_hf_dataset()
with gr.Blocks() as gradio_app:
gr.Markdown("# South Asian Image Data Collection")
gr.Markdown("Upload an image and answer questions about its cultural significance.")
with gr.Row():
with gr.Column(scale=1):
input_img = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
language = gr.Dropdown(
choices=south_asian_languages,
label="Language:",
info="Select the native language relevant to the image",
interactive=True
)
country_dropdown = gr.Dropdown(
choices=["None","India", "Pakistan", "Bangladesh", "Afghanistan", "Bhutan", "Nepal", "Sri Lanka"],
label="Country where the image was taken:",
interactive=True
)
state_dropdown = gr.Dropdown(
choices=[],
label="State/Province:",
interactive=True
)
city_textbox = gr.Textbox(label="City where the image was taken:", placeholder="Enter city name")
email_input = gr.Textbox(label="Your Email:", placeholder="Enter your email address", info="Used as unique contributor ID")
with gr.Column(scale=1):
se_asia_relevance = gr.Radio(
choices=[
"Yes. Unique to South Asia",
"Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.",
"Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia",
"Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia",
"No. Totally unrelated to South Asia"
],
label="Is the image culturally relevant in South Asia?"
)
culture_knowledge = gr.Radio(
choices=[
"I'm from this country/culture",
"I checked online resources (e.g., Wikipedia, articles, blogs)"
],
label="How do you know about this culture?",
info="Please do not consult LLMs (e.g., GPT-4o, Claude, Command-R, etc.)"
)
native_caption = gr.Textbox(label="Caption in Native Language:", placeholder="Enter caption in the native language of the culture depicted")
english_caption = gr.Textbox(label="English Caption:", placeholder="Enter caption in English")
with gr.Row():
clear_btn = gr.Button("Clear")
submit_btn = gr.Button("Submit")
with gr.Row():
with gr.Column(scale=1):
output_img = gr.Image(label="Submitted Image")
output_text = gr.Text(label="Text Response")
output_location = gr.Text(label="Location Information")
with gr.Column(scale=1):
output_relevance = gr.Text(label="South Asia Cultural Relevance")
output_knowledge = gr.Text(label="Cultural Knowledge Source")
output_native = gr.Text(label="Native Language Caption")
output_english = gr.Text(label="English Caption")
country_dropdown.change(
fn=update_state_dropdown,
inputs=country_dropdown,
outputs=state_dropdown
)
submit_btn.click(
fn=process_submission,
inputs=[
input_img,
language,
country_dropdown,
state_dropdown,
city_textbox,
se_asia_relevance,
culture_knowledge,
native_caption,
english_caption,
email_input
],
outputs=[
output_img,
output_text,
output_location,
output_relevance,
output_knowledge,
output_native,
output_english
]
)
clear_btn.click(
fn=clear_inputs,
inputs=[],
outputs=[
input_img,
language,
country_dropdown,
state_dropdown,
city_textbox,
se_asia_relevance,
culture_knowledge,
native_caption,
english_caption,
email_input
]
)
if __name__ == "__main__":
gradio_app.launch() |