Files changed (1) hide show
  1. app.py +20 -212
app.py CHANGED
@@ -5,277 +5,85 @@ import uuid
5
  from datetime import datetime
6
  import shutil
7
  from huggingface_hub import HfApi, create_repo, upload_file, upload_folder
8
-
9
- # Create directories for data storage
10
  os.makedirs("uploaded_images", exist_ok=True)
11
  os.makedirs("submissions", exist_ok=True)
12
-
13
- # Hugging Face Dataset configuration
14
  HF_TOKEN = os.environ.get("HF_TOKEN")
15
  DATASET_NAME = "minemaster01/se-culture-dataset-results"
16
  DATASET_CREATED = False
17
-
18
- # States by country dictionary
19
- states_by_country = {
20
- "India": [
21
- "Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana",
22
- "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur",
23
- "Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana",
24
- "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal", "Andaman and Nicobar Islands", "Chandigarh",
25
- "Dadra and Nagar Haveli and Daman and Diu", "Delhi", "Jammu and Kashmir", "Ladakh", "Lakshadweep", "Puducherry"
26
- ],
27
- "Pakistan": [
28
- "Balochistan", "Khyber Pakhtunkhwa", "Punjab", "Sindh", "Islamabad Capital Territory", "Gilgit-Baltistan"
29
- ],
30
- "Bangladesh": [
31
- "Barisal", "Chittagong", "Dhaka", "Khulna", "Mymensingh", "Rajshahi", "Rangpur", "Sylhet"
32
- ],
33
- "Afghanistan": [
34
- "Badakhshan", "Badghis", "Baghlan", "Balkh", "Bamyan", "Daykundi", "Farah", "Faryab", "Ghazni", "Ghor",
35
- "Helmand", "Herat", "Jowzjan", "Kabul", "Kandahar", "Kapisa", "Khost", "Kunar", "Kunduz", "Laghman",
36
- "Logar", "Nangarhar", "Nimruz", "Nuristan", "Paktia", "Paktika", "Panjshir", "Parwan", "Samangan",
37
- "Sar-e Pol", "Takhar", "Uruzgan", "Wardak", "Zabul"
38
- ],
39
- "Bhutan": [
40
- "Bumthang", "Chukha", "Dagana", "Gasa", "Haa", "Lhuentse", "Mongar", "Paro", "Pemagatshel", "Punakha",
41
- "Samdrup Jongkhar", "Samtse", "Sarpang", "Thimphu", "Trashigang", "Trashiyangtse", "Trongsa", "Tsirang",
42
- "Wangdue Phodrang", "Zhemgang"
43
- ],
44
- "Nepal": [
45
- "Bagmati", "Gandaki", "Karnali", "Koshi", "Lumbini", "Madhesh", "Sudurpashchim"
46
- ],
47
- "Sri Lanka": [
48
- "Central", "Eastern", "North Central", "Northern", "North Western", "Sabaragamuwa", "Southern", "Uva", "Western"
49
- ]
50
- }
51
- south_asian_languages = [
52
- "Hindi", "Bengali", "Urdu", "Punjabi", "Tamil", "Telugu",
53
- "Marathi", "Gujarati", "Kannada", "Malayalam", "Odia",
54
- "Sindhi", "Nepali", "Sinhala", "Pashto", "Dari",
55
- "Dzongkha", "Assamese", "Kashmiri", "Sanskrit", "Other"
56
- ]
57
  def setup_hf_dataset():
58
- """Initialize the Hugging Face dataset if it doesn't exist"""
59
  global DATASET_CREATED
60
  if not DATASET_CREATED and HF_TOKEN:
61
  try:
62
  api = HfApi()
63
- create_repo(
64
- DATASET_NAME,
65
- repo_type="dataset",
66
- token=HF_TOKEN,
67
- exist_ok=True
68
- )
69
  DATASET_CREATED = True
70
  print(f"Dataset {DATASET_NAME} is ready")
71
- except Exception as e:
72
- print(f"Error setting up dataset: {e}")
73
  elif not HF_TOKEN:
74
  print("Warning: HF_TOKEN not set. Data will be stored locally only.")
75
-
76
  def update_state_dropdown(country):
77
- """Update state dropdown based on selected country"""
78
  if country in states_by_country:
79
  return gr.Dropdown(choices=states_by_country[country], label=f"State/Province in {country}:", interactive=True)
80
  return gr.Dropdown(choices=[], label="State/Province:", interactive=True)
81
-
82
  def process_submission(input_img, language, country, state, city, se_asia_relevance, culture_knowledge, native_caption, english_caption,email):
83
- # Generate unique ID for this submission
84
  submission_id = str(uuid.uuid4())
85
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
86
-
87
- # Save the image if provided
88
  image_path = None
89
  if input_img is not None:
90
- # Create filename with submission ID
91
  image_filename = f"{timestamp}.jpg"
92
  image_path = os.path.join("uploaded_images", image_filename)
93
-
94
- # Save the image
95
- if isinstance(input_img, str): # If it's a file path
96
- shutil.copy(input_img, image_path)
97
- else: # If it's a PIL Image
98
- input_img.save(image_path)
99
-
100
- # Create a data structure for the submission
101
- submission_data = {
102
- "id": submission_id,
103
- "timestamp": timestamp,
104
- "image_filename": os.path.basename(image_path) if image_path else None,
105
- "cultural_relevance": language,
106
- "country": country,
107
- "state": state,
108
- "city": city,
109
- "se_asia_relevance": se_asia_relevance,
110
- "cultural_knowledge_source": culture_knowledge,
111
- "native_caption": native_caption,
112
- "english_caption": english_caption,
113
- "email": email
114
- }
115
-
116
- # Save the data as JSON
117
  json_filename = f"{timestamp}.json"
118
  json_path = os.path.join("submissions", json_filename)
119
- with open(json_path, "w") as f:
120
- json.dump(submission_data, f, indent=2)
121
-
122
- # Upload to Hugging Face Dataset if token is available
123
  if HF_TOKEN and DATASET_CREATED:
124
  try:
125
  api = HfApi()
126
- # Upload the JSON data
127
- api.upload_file(
128
- path_or_fileobj=json_path,
129
- path_in_repo=f"submissions/{json_filename}",
130
- repo_id=DATASET_NAME,
131
- repo_type="dataset",
132
- token=HF_TOKEN
133
- )
134
- # Upload the image if it exists
135
  if image_path and os.path.exists(image_path):
136
- api.upload_file(
137
- path_or_fileobj=image_path,
138
- path_in_repo=f"images/{os.path.basename(image_path)}",
139
- repo_id=DATASET_NAME,
140
- repo_type="dataset",
141
- token=HF_TOKEN
142
- )
143
  print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
144
- except Exception as e:
145
- print(f"Error uploading to dataset: {e}")
146
-
147
- # Return values to display in the interface
148
  location_info = f"Location: {city}, {state}, {country}" if state else f"Location: {city}, {country}"
149
  return input_img, f"Your text response: {language}", f"Selected location: {location_info}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
150
-
151
  def clear_inputs():
152
  return None, "", None, None, "", None, None, "", "", ""
153
-
154
- # Initialize the dataset
155
  setup_hf_dataset()
156
-
157
- with gr.Blocks() as gradio_app:
158
  gr.Markdown("# South Asian Image Data Collection")
159
  gr.Markdown("Upload an image and answer questions about its cultural significance.")
160
-
161
- # Split the interface into two columns
162
  with gr.Row():
163
- # Left column for image upload and basic information
164
  with gr.Column(scale=1):
165
  input_img = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
166
- language = gr.Dropdown(
167
- choices=south_asian_languages,
168
- label="Language:",
169
- info="Select the native language relevant to the image",
170
- interactive=True
171
- )
172
-
173
- # Location information in the left column
174
- country_dropdown = gr.Dropdown(
175
- choices=["None","India", "Pakistan", "Bangladesh", "Afghanistan", "Bhutan", "Nepal", "Sri Lanka"],
176
- label="Country where the image was taken:",
177
- interactive=True
178
- )
179
- state_dropdown = gr.Dropdown(
180
- choices=[],
181
- label="State/Province:",
182
- interactive=True
183
- )
184
  city_textbox = gr.Textbox(label="City where the image was taken:", placeholder="Enter city name")
185
  email_input = gr.Textbox(label="Your Email:", placeholder="Enter your email address", info="Used as unique contributor ID")
186
-
187
- # Right column for additional information
188
  with gr.Column(scale=1):
189
- se_asia_relevance = gr.Radio(
190
- choices=[
191
- "Yes. Unique to South Asia",
192
- "Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.",
193
- "Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia",
194
- "Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia",
195
- "No. Totally unrelated to South Asia"
196
- ],
197
- label="Is the image culturally relevant in South Asia?"
198
- )
199
-
200
- culture_knowledge = gr.Radio(
201
- choices=[
202
- "I'm from this country/culture",
203
- "I checked online resources (e.g., Wikipedia, articles, blogs)"
204
- ],
205
- label="How do you know about this culture?",
206
- info="Please do not consult LLMs (e.g., GPT-4o, Claude, Command-R, etc.)"
207
- )
208
-
209
  native_caption = gr.Textbox(label="Caption in Native Language:", placeholder="Enter caption in the native language of the culture depicted")
210
  english_caption = gr.Textbox(label="English Caption:", placeholder="Enter caption in English")
211
-
212
- # Buttons row
213
  with gr.Row():
214
  clear_btn = gr.Button("Clear")
215
  submit_btn = gr.Button("Submit")
216
-
217
- # Output display section - also split into two columns
218
  with gr.Row():
219
  with gr.Column(scale=1):
220
  output_img = gr.Image(label="Submitted Image")
221
  output_text = gr.Text(label="Text Response")
222
  output_location = gr.Text(label="Location Information")
223
-
224
  with gr.Column(scale=1):
225
  output_relevance = gr.Text(label="South Asia Cultural Relevance")
226
  output_knowledge = gr.Text(label="Cultural Knowledge Source")
227
  output_native = gr.Text(label="Native Language Caption")
228
  output_english = gr.Text(label="English Caption")
229
-
230
- # Set up event handlers
231
- country_dropdown.change(
232
- fn=update_state_dropdown,
233
- inputs=country_dropdown,
234
- outputs=state_dropdown
235
- )
236
-
237
- submit_btn.click(
238
- fn=process_submission,
239
- inputs=[
240
- input_img,
241
- language,
242
- country_dropdown,
243
- state_dropdown,
244
- city_textbox,
245
- se_asia_relevance,
246
- culture_knowledge,
247
- native_caption,
248
- english_caption,
249
- email_input
250
- ],
251
- outputs=[
252
- output_img,
253
- output_text,
254
- output_location,
255
- output_relevance,
256
- output_knowledge,
257
- output_native,
258
- output_english
259
- ]
260
- )
261
-
262
- clear_btn.click(
263
- fn=clear_inputs,
264
- inputs=[],
265
- outputs=[
266
- input_img,
267
- language,
268
- country_dropdown,
269
- state_dropdown,
270
- city_textbox,
271
- se_asia_relevance,
272
- culture_knowledge,
273
- native_caption,
274
- english_caption,
275
- email_input
276
- ]
277
- )
278
-
279
-
280
  if __name__ == "__main__":
281
  gradio_app.launch()
 
5
  from datetime import datetime
6
  import shutil
7
  from huggingface_hub import HfApi, create_repo, upload_file, upload_folder
 
 
8
  os.makedirs("uploaded_images", exist_ok=True)
9
  os.makedirs("submissions", exist_ok=True)
 
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN")
11
  DATASET_NAME = "minemaster01/se-culture-dataset-results"
12
  DATASET_CREATED = False
13
+ states_by_country = {"India": ["Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana", "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur", "Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana", "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal", "Andaman and Nicobar Islands", "Chandigarh", "Dadra and Nagar Haveli and Daman and Diu", "Delhi", "Jammu and Kashmir", "Ladakh", "Lakshadweep", "Puducherry"], "Pakistan": ["Balochistan", "Khyber Pakhtunkhwa", "Punjab", "Sindh", "Islamabad Capital Territory", "Other"], "Bangladesh": ["Barisal", "Chittagong", "Dhaka", "Khulna", "Mymensingh", "Rajshahi", "Rangpur", "Sylhet"], "Afghanistan": ["Badakhshan", "Badghis", "Baghlan", "Balkh", "Bamyan", "Daykundi", "Farah", "Faryab", "Ghazni", "Ghor", "Helmand", "Herat", "Jowzjan", "Kabul", "Kandahar", "Kapisa", "Khost", "Kunar", "Kunduz", "Laghman", "Logar", "Nangarhar", "Nimruz", "Nuristan", "Paktia", "Paktika", "Panjshir", "Parwan", "Samangan", "Sar-e Pol", "Takhar", "Uruzgan", "Wardak", "Zabul"], "Bhutan": ["Bumthang", "Chukha", "Dagana", "Gasa", "Haa", "Lhuentse", "Mongar", "Paro", "Pemagatshel", "Punakha", "Samdrup Jongkhar", "Samtse", "Sarpang", "Thimphu", "Trashigang", "Trashiyangtse", "Trongsa", "Tsirang", "Wangdue Phodrang", "Zhemgang"], "Nepal": ["Bagmati", "Gandaki", "Karnali", "Koshi", "Lumbini", "Madhesh", "Sudurpashchim"], "Sri Lanka": ["Central", "Eastern", "North Central", "Northern", "North Western", "Sabaragamuwa", "Southern", "Uva", "Western"]}
14
+ south_asian_languages = ["Assamese", "Bengali", "Bhojpuri", "Bodo", "Dari", "Dzongkha", "Dogri", "Gujarati", "Hindi", "Kannada", "Kashmiri", "Konkani", "Maithili", "Malayalam", "Marathi", "Meitei", "Nepali", "Odia", "Pashto", "Punjabi", "Sanskrit", "Santali", "Sindhi", "Sinhala", "Tamil", "Telugu", "Tulu", "Urdu", "OTHER"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def setup_hf_dataset():
 
16
  global DATASET_CREATED
17
  if not DATASET_CREATED and HF_TOKEN:
18
  try:
19
  api = HfApi()
20
+ create_repo(DATASET_NAME, repo_type="dataset", token=HF_TOKEN, exist_ok=True)
 
 
 
 
 
21
  DATASET_CREATED = True
22
  print(f"Dataset {DATASET_NAME} is ready")
23
+ except Exception as e: print(f"Error setting up dataset: {e}")
 
24
  elif not HF_TOKEN:
25
  print("Warning: HF_TOKEN not set. Data will be stored locally only.")
 
26
  def update_state_dropdown(country):
 
27
  if country in states_by_country:
28
  return gr.Dropdown(choices=states_by_country[country], label=f"State/Province in {country}:", interactive=True)
29
  return gr.Dropdown(choices=[], label="State/Province:", interactive=True)
 
30
  def process_submission(input_img, language, country, state, city, se_asia_relevance, culture_knowledge, native_caption, english_caption,email):
 
31
  submission_id = str(uuid.uuid4())
32
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 
 
33
  image_path = None
34
  if input_img is not None:
 
35
  image_filename = f"{timestamp}.jpg"
36
  image_path = os.path.join("uploaded_images", image_filename)
37
+ if isinstance(input_img, str): shutil.copy(input_img, image_path)
38
+ else: input_img.save(image_path)
39
+ submission_data = {"id": submission_id, "timestamp": timestamp, "image_filename": os.path.basename(image_path) if image_path else None, "cultural_relevance": language, "country": country, "state": state, "city": city, "se_asia_relevance": se_asia_relevance, "cultural_knowledge_source": culture_knowledge, "native_caption": native_caption, "english_caption": english_caption, "email": email}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  json_filename = f"{timestamp}.json"
41
  json_path = os.path.join("submissions", json_filename)
42
+ with open(json_path, "w") as f: json.dump(submission_data, f, indent=2)
 
 
 
43
  if HF_TOKEN and DATASET_CREATED:
44
  try:
45
  api = HfApi()
46
+ api.upload_file(path_or_fileobj=json_path, path_in_repo=f"submissions/{json_filename}", repo_id=DATASET_NAME, repo_type="dataset", token=HF_TOKEN)
 
 
 
 
 
 
 
 
47
  if image_path and os.path.exists(image_path):
48
+ api.upload_file(path_or_fileobj=image_path, path_in_repo=f"images/{os.path.basename(image_path)}", repo_id=DATASET_NAME, repo_type="dataset", token=HF_TOKEN)
 
 
 
 
 
 
49
  print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
50
+ except Exception as e: print(f"Error uploading to dataset: {e}")
 
 
 
51
  location_info = f"Location: {city}, {state}, {country}" if state else f"Location: {city}, {country}"
52
  return input_img, f"Your text response: {language}", f"Selected location: {location_info}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
 
53
  def clear_inputs():
54
  return None, "", None, None, "", None, None, "", "", ""
 
 
55
  setup_hf_dataset()
56
+ with gr.Blocks(theme='1024m/1024m-1') as gradio_app:
 
57
  gr.Markdown("# South Asian Image Data Collection")
58
  gr.Markdown("Upload an image and answer questions about its cultural significance.")
 
 
59
  with gr.Row():
 
60
  with gr.Column(scale=1):
61
  input_img = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
62
+ language = gr.Dropdown(choices=south_asian_languages, label="Language:", info="Select the native language relevant to the image", interactive=True)
63
+ country_dropdown = gr.Dropdown(choices=["None","India", "Pakistan", "Bangladesh", "Afghanistan", "Bhutan", "Nepal", "Sri Lanka"], label="Country where the image was taken:", interactive=True)
64
+ state_dropdown = gr.Dropdown(choices=[], label="State/Province:", interactive=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  city_textbox = gr.Textbox(label="City where the image was taken:", placeholder="Enter city name")
66
  email_input = gr.Textbox(label="Your Email:", placeholder="Enter your email address", info="Used as unique contributor ID")
 
 
67
  with gr.Column(scale=1):
68
+ se_asia_relevance = gr.Radio(choices=["Yes. Unique to South Asia", "Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.", "Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia", "Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia", "No. Totally unrelated to South Asia"], label="Is the image culturally relevant in South Asia?")
69
+ culture_knowledge = gr.Radio(choices=["I'm from this country/culture", "I checked online resources (e.g., Wikipedia, articles, blogs)"], label="How do you know about this culture?", info="Please do not consult LLMs (e.g., GPT-4o, Claude, Command-R, etc.)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  native_caption = gr.Textbox(label="Caption in Native Language:", placeholder="Enter caption in the native language of the culture depicted")
71
  english_caption = gr.Textbox(label="English Caption:", placeholder="Enter caption in English")
 
 
72
  with gr.Row():
73
  clear_btn = gr.Button("Clear")
74
  submit_btn = gr.Button("Submit")
 
 
75
  with gr.Row():
76
  with gr.Column(scale=1):
77
  output_img = gr.Image(label="Submitted Image")
78
  output_text = gr.Text(label="Text Response")
79
  output_location = gr.Text(label="Location Information")
 
80
  with gr.Column(scale=1):
81
  output_relevance = gr.Text(label="South Asia Cultural Relevance")
82
  output_knowledge = gr.Text(label="Cultural Knowledge Source")
83
  output_native = gr.Text(label="Native Language Caption")
84
  output_english = gr.Text(label="English Caption")
85
+ country_dropdown.change(fn=update_state_dropdown, inputs=country_dropdown, outputs=state_dropdown)
86
+ submit_btn.click(fn=process_submission, inputs=[input_img, language, country_dropdown, state_dropdown, city_textbox, se_asia_relevance, culture_knowledge, native_caption, english_caption, email_input], outputs=[output_img, output_text, output_location, output_relevance, output_knowledge, output_native, output_english])
87
+ clear_btn.click(fn=clear_inputs, inputs=[], outputs=[input_img, language, country_dropdown, state_dropdown, city_textbox, se_asia_relevance, culture_knowledge, native_caption, english_caption, email_input])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  if __name__ == "__main__":
89
  gradio_app.launch()