Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,44 @@ HF_TOKEN = os.environ.get("HF_TOKEN") # You'll need to set this as a secret in
|
|
15 |
DATASET_NAME = "srishtiy/se-culture-dataset-results" # Change to your username/dataset-name
|
16 |
DATASET_CREATED = False
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def setup_hf_dataset():
|
19 |
"""Initialize the Hugging Face dataset if it doesn't exist"""
|
20 |
global DATASET_CREATED
|
@@ -34,7 +72,14 @@ def setup_hf_dataset():
|
|
34 |
elif not HF_TOKEN:
|
35 |
print("Warning: HF_TOKEN not set. Data will be stored locally only.")
|
36 |
|
37 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
# Generate unique ID for this submission
|
39 |
submission_id = str(uuid.uuid4())
|
40 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
@@ -58,9 +103,9 @@ def process_submission(input_img, text_answer, multiple_choice, city, country, s
|
|
58 |
"timestamp": timestamp,
|
59 |
"image_filename": os.path.basename(image_path) if image_path else None,
|
60 |
"cultural_relevance": text_answer,
|
61 |
-
"continent": multiple_choice,
|
62 |
-
"city": city,
|
63 |
"country": country,
|
|
|
|
|
64 |
"se_asia_relevance": se_asia_relevance,
|
65 |
"cultural_knowledge_source": culture_knowledge,
|
66 |
"native_caption": native_caption,
|
@@ -99,102 +144,104 @@ def process_submission(input_img, text_answer, multiple_choice, city, country, s
|
|
99 |
print(f"Error uploading to dataset: {e}")
|
100 |
|
101 |
# Return values to display in the interface
|
102 |
-
|
|
|
103 |
|
104 |
# Initialize the dataset
|
105 |
setup_hf_dataset()
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
selected_country = gr.Dropdown(
|
111 |
-
choices=list(country_to_states.keys()),
|
112 |
-
label="Country where the image was taken:",
|
113 |
-
interactive=True
|
114 |
-
)
|
115 |
-
selected_state = gr.Dropdown(
|
116 |
-
choices=[],
|
117 |
-
label="State / Province / District (updates based on country)",
|
118 |
-
interactive=True
|
119 |
-
)
|
120 |
-
city_field = gr.Textbox(label="City (optional)")
|
121 |
|
122 |
-
|
123 |
-
|
|
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
"
|
128 |
-
"
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
"
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
gr.
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
-
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
-
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
knowledge_source,
|
177 |
-
native_caption,
|
178 |
-
english_caption
|
179 |
-
],
|
180 |
-
outputs=output_components
|
181 |
-
)
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
],
|
195 |
-
title="South Asian Image Data Collection",
|
196 |
-
description="Upload an image and answer questions about its cultural significance."
|
197 |
-
)
|
198 |
|
199 |
if __name__ == "__main__":
|
200 |
gradio_app.launch()
|
|
|
15 |
DATASET_NAME = "srishtiy/se-culture-dataset-results" # Change to your username/dataset-name
|
16 |
DATASET_CREATED = False
|
17 |
|
18 |
+
# Define states/provinces for each country
|
19 |
+
states_by_country = {
|
20 |
+
"India": [
|
21 |
+
"Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa",
|
22 |
+
"Gujarat", "Haryana", "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala",
|
23 |
+
"Madhya Pradesh", "Maharashtra", "Manipur", "Meghalaya", "Mizoram", "Nagaland",
|
24 |
+
"Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana", "Tripura",
|
25 |
+
"Uttar Pradesh", "Uttarakhand", "West Bengal", "Andaman and Nicobar Islands",
|
26 |
+
"Chandigarh", "Dadra and Nagar Haveli and Daman and Diu", "Delhi", "Jammu and Kashmir",
|
27 |
+
"Ladakh", "Lakshadweep", "Puducherry"
|
28 |
+
],
|
29 |
+
"Pakistan": [
|
30 |
+
"Balochistan", "Khyber Pakhtunkhwa", "Punjab", "Sindh", "Islamabad Capital Territory",
|
31 |
+
"Azad Jammu and Kashmir", "Gilgit-Baltistan"
|
32 |
+
],
|
33 |
+
"Bangladesh": [
|
34 |
+
"Barisal", "Chittagong", "Dhaka", "Khulna", "Mymensingh", "Rajshahi", "Rangpur", "Sylhet"
|
35 |
+
],
|
36 |
+
"Afghanistan": [
|
37 |
+
"Badakhshan", "Badghis", "Baghlan", "Balkh", "Bamyan", "Daykundi", "Farah", "Faryab",
|
38 |
+
"Ghazni", "Ghor", "Helmand", "Herat", "Jowzjan", "Kabul", "Kandahar", "Kapisa", "Khost",
|
39 |
+
"Kunar", "Kunduz", "Laghman", "Logar", "Nangarhar", "Nimruz", "Nuristan", "Paktia",
|
40 |
+
"Paktika", "Panjshir", "Parwan", "Samangan", "Sar-e Pol", "Takhar", "Uruzgan", "Wardak", "Zabul"
|
41 |
+
],
|
42 |
+
"Bhutan": [
|
43 |
+
"Bumthang", "Chukha", "Dagana", "Gasa", "Haa", "Lhuentse", "Mongar", "Paro", "Pemagatshel",
|
44 |
+
"Punakha", "Samdrup Jongkhar", "Samtse", "Sarpang", "Thimphu", "Trashigang", "Trashiyangtse",
|
45 |
+
"Trongsa", "Tsirang", "Wangdue Phodrang", "Zhemgang"
|
46 |
+
],
|
47 |
+
"Nepal": [
|
48 |
+
"Bagmati", "Gandaki", "Karnali", "Koshi", "Lumbini", "Madhesh", "Sudurpashchim"
|
49 |
+
],
|
50 |
+
"Sri Lanka": [
|
51 |
+
"Central", "Eastern", "North Central", "Northern", "North Western", "Sabaragamuwa",
|
52 |
+
"Southern", "Uva", "Western"
|
53 |
+
]
|
54 |
+
}
|
55 |
+
|
56 |
def setup_hf_dataset():
|
57 |
"""Initialize the Hugging Face dataset if it doesn't exist"""
|
58 |
global DATASET_CREATED
|
|
|
72 |
elif not HF_TOKEN:
|
73 |
print("Warning: HF_TOKEN not set. Data will be stored locally only.")
|
74 |
|
75 |
+
def update_states(country):
|
76 |
+
"""Update the states dropdown based on the selected country"""
|
77 |
+
if country in states_by_country:
|
78 |
+
return gr.Dropdown.update(choices=states_by_country[country], visible=True)
|
79 |
+
else:
|
80 |
+
return gr.Dropdown.update(choices=[], value=None, visible=False)
|
81 |
+
|
82 |
+
def process_submission(input_img, text_answer, country, state, city, se_asia_relevance, culture_knowledge, native_caption, english_caption):
|
83 |
# Generate unique ID for this submission
|
84 |
submission_id = str(uuid.uuid4())
|
85 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
103 |
"timestamp": timestamp,
|
104 |
"image_filename": os.path.basename(image_path) if image_path else None,
|
105 |
"cultural_relevance": text_answer,
|
|
|
|
|
106 |
"country": country,
|
107 |
+
"state_province": state,
|
108 |
+
"city": city,
|
109 |
"se_asia_relevance": se_asia_relevance,
|
110 |
"cultural_knowledge_source": culture_knowledge,
|
111 |
"native_caption": native_caption,
|
|
|
144 |
print(f"Error uploading to dataset: {e}")
|
145 |
|
146 |
# Return values to display in the interface
|
147 |
+
location_info = f"Location: {city}, {state}, {country}" if state else f"Location: {city}, {country}"
|
148 |
+
return input_img, f"Your text response: {text_answer}", location_info, f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
|
149 |
|
150 |
# Initialize the dataset
|
151 |
setup_hf_dataset()
|
152 |
|
153 |
+
with gr.Blocks(title="South Asian Image Data Collection") as gradio_app:
|
154 |
+
gr.Markdown("# South Asian Image Data Collection")
|
155 |
+
gr.Markdown("Upload an image and answer questions about its cultural significance.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
+
with gr.Row():
|
158 |
+
with gr.Column(scale=1):
|
159 |
+
input_img = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
|
160 |
|
161 |
+
with gr.Column(scale=1):
|
162 |
+
text_answer = gr.Textbox(
|
163 |
+
label="The image portrays culturally-relevant information in:",
|
164 |
+
placeholder="What culture does this image represent?"
|
165 |
+
)
|
166 |
+
|
167 |
+
# Location information with dynamic dropdowns
|
168 |
+
country_dropdown = gr.Dropdown(
|
169 |
+
choices=list(states_by_country.keys()),
|
170 |
+
label="Country where the image was taken:",
|
171 |
+
interactive=True
|
172 |
+
)
|
173 |
+
|
174 |
+
state_dropdown = gr.Dropdown(
|
175 |
+
label="State/Province/District:",
|
176 |
+
interactive=True,
|
177 |
+
visible=False # Will be made visible when country is selected
|
178 |
+
)
|
179 |
+
|
180 |
+
city_textbox = gr.Textbox(
|
181 |
+
label="City (optional):",
|
182 |
+
placeholder="Enter city name"
|
183 |
+
)
|
184 |
+
|
185 |
+
se_asia_relevance = gr.Radio(
|
186 |
+
choices=[
|
187 |
+
"Yes. Unique to South Asia",
|
188 |
+
"Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.",
|
189 |
+
"Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia",
|
190 |
+
"Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia",
|
191 |
+
"No. Totally unrelated to South Asia"
|
192 |
+
],
|
193 |
+
label="Is the image culturally relevant in South Asia?"
|
194 |
+
)
|
195 |
+
|
196 |
+
culture_knowledge = gr.Radio(
|
197 |
+
choices=[
|
198 |
+
"I'm from this country/culture",
|
199 |
+
"I checked online resources (e.g., Wikipedia, articles, blogs)"
|
200 |
+
],
|
201 |
+
label="How do you know about this culture?",
|
202 |
+
info="Please do not consult LLMs (e.g., GPT-4o, Claude, Command-R, etc.)"
|
203 |
+
)
|
204 |
+
|
205 |
+
native_caption = gr.Textbox(
|
206 |
+
label="Caption in Native Language:",
|
207 |
+
placeholder="Enter caption in the native language of the culture depicted"
|
208 |
+
)
|
209 |
+
|
210 |
+
english_caption = gr.Textbox(
|
211 |
+
label="English Caption:",
|
212 |
+
placeholder="Enter caption in English"
|
213 |
+
)
|
214 |
|
215 |
+
# Connect the country dropdown to update the state dropdown
|
216 |
+
country_dropdown.change(
|
217 |
+
fn=update_states,
|
218 |
+
inputs=country_dropdown,
|
219 |
+
outputs=state_dropdown
|
220 |
+
)
|
221 |
|
222 |
+
submit_btn = gr.Button("Submit")
|
223 |
|
224 |
+
with gr.Row():
|
225 |
+
with gr.Column():
|
226 |
+
output_img = gr.Image(label="Submitted Image")
|
227 |
+
text_response = gr.Text(label="Text Response")
|
228 |
+
location_info = gr.Text(label="Location Information")
|
229 |
+
relevance_output = gr.Text(label="South Asia Cultural Relevance")
|
230 |
+
knowledge_source = gr.Text(label="Cultural Knowledge Source")
|
231 |
+
native_caption_output = gr.Text(label="Native Language Caption")
|
232 |
+
english_caption_output = gr.Text(label="English Caption")
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
+
submit_btn.click(
|
235 |
+
fn=process_submission,
|
236 |
+
inputs=[
|
237 |
+
input_img, text_answer, country_dropdown, state_dropdown, city_textbox,
|
238 |
+
se_asia_relevance, culture_knowledge, native_caption, english_caption
|
239 |
+
],
|
240 |
+
outputs=[
|
241 |
+
output_img, text_response, location_info, relevance_output,
|
242 |
+
knowledge_source, native_caption_output, english_caption_output
|
243 |
+
]
|
244 |
+
)
|
|
|
|
|
|
|
|
|
245 |
|
246 |
if __name__ == "__main__":
|
247 |
gradio_app.launch()
|