1024m commited on
Commit
f539781
·
verified ·
1 Parent(s): 628342c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -164
app.py CHANGED
@@ -10,64 +10,20 @@ os.makedirs("submissions", exist_ok=True)
10
  HF_TOKEN = os.environ.get("HF_TOKEN")
11
  DATASET_NAME = "minemaster01/se-culture-dataset-results"
12
  DATASET_CREATED = False
13
- states_by_country = {
14
- "India": [
15
- "Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana",
16
- "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur",
17
- "Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana",
18
- "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal", "Andaman and Nicobar Islands", "Chandigarh",
19
- "Dadra and Nagar Haveli and Daman and Diu", "Delhi", "Jammu and Kashmir", "Ladakh", "Lakshadweep", "Puducherry"
20
- ],
21
- "Pakistan": [
22
- "Balochistan", "Khyber Pakhtunkhwa", "Punjab", "Sindh", "Islamabad Capital Territory", "Gilgit-Baltistan"
23
- ],
24
- "Bangladesh": [
25
- "Barisal", "Chittagong", "Dhaka", "Khulna", "Mymensingh", "Rajshahi", "Rangpur", "Sylhet"
26
- ],
27
- "Afghanistan": [
28
- "Badakhshan", "Badghis", "Baghlan", "Balkh", "Bamyan", "Daykundi", "Farah", "Faryab", "Ghazni", "Ghor",
29
- "Helmand", "Herat", "Jowzjan", "Kabul", "Kandahar", "Kapisa", "Khost", "Kunar", "Kunduz", "Laghman",
30
- "Logar", "Nangarhar", "Nimruz", "Nuristan", "Paktia", "Paktika", "Panjshir", "Parwan", "Samangan",
31
- "Sar-e Pol", "Takhar", "Uruzgan", "Wardak", "Zabul"
32
- ],
33
- "Bhutan": [
34
- "Bumthang", "Chukha", "Dagana", "Gasa", "Haa", "Lhuentse", "Mongar", "Paro", "Pemagatshel", "Punakha",
35
- "Samdrup Jongkhar", "Samtse", "Sarpang", "Thimphu", "Trashigang", "Trashiyangtse", "Trongsa", "Tsirang",
36
- "Wangdue Phodrang", "Zhemgang"
37
- ],
38
- "Nepal": [
39
- "Bagmati", "Gandaki", "Karnali", "Koshi", "Lumbini", "Madhesh", "Sudurpashchim"
40
- ],
41
- "Sri Lanka": [
42
- "Central", "Eastern", "North Central", "Northern", "North Western", "Sabaragamuwa", "Southern", "Uva", "Western"
43
- ]
44
- }
45
- south_asian_languages = [
46
- "Hindi", "Bengali", "Urdu", "Punjabi", "Tamil", "Telugu",
47
- "Marathi", "Gujarati", "Kannada", "Malayalam", "Odia",
48
- "Sindhi", "Nepali", "Sinhala", "Pashto", "Dari",
49
- "Dzongkha", "Assamese", "Kashmiri", "Sanskrit", "Other"
50
- ]
51
  def setup_hf_dataset():
52
- """Initialize the Hugging Face dataset if it doesn't exist"""
53
  global DATASET_CREATED
54
  if not DATASET_CREATED and HF_TOKEN:
55
  try:
56
  api = HfApi()
57
- create_repo(
58
- DATASET_NAME,
59
- repo_type="dataset",
60
- token=HF_TOKEN,
61
- exist_ok=True
62
- )
63
  DATASET_CREATED = True
64
  print(f"Dataset {DATASET_NAME} is ready")
65
- except Exception as e:
66
- print(f"Error setting up dataset: {e}")
67
  elif not HF_TOKEN:
68
  print("Warning: HF_TOKEN not set. Data will be stored locally only.")
69
  def update_state_dropdown(country):
70
- """Update state dropdown based on selected country"""
71
  if country in states_by_country:
72
  return gr.Dropdown(choices=states_by_country[country], label=f"State/Province in {country}:", interactive=True)
73
  return gr.Dropdown(choices=[], label="State/Province:", interactive=True)
@@ -78,97 +34,39 @@ def process_submission(input_img, language, country, state, city, se_asia_releva
78
  if input_img is not None:
79
  image_filename = f"{timestamp}.jpg"
80
  image_path = os.path.join("uploaded_images", image_filename)
81
- if isinstance(input_img, str):
82
- shutil.copy(input_img, image_path)
83
- else:
84
- input_img.save(image_path)
85
- submission_data = {
86
- "id": submission_id,
87
- "timestamp": timestamp,
88
- "image_filename": os.path.basename(image_path) if image_path else None,
89
- "cultural_relevance": language,
90
- "country": country,
91
- "state": state,
92
- "city": city,
93
- "se_asia_relevance": se_asia_relevance,
94
- "cultural_knowledge_source": culture_knowledge,
95
- "native_caption": native_caption,
96
- "english_caption": english_caption,
97
- "email": email
98
- }
99
  json_filename = f"{timestamp}.json"
100
  json_path = os.path.join("submissions", json_filename)
101
- with open(json_path, "w") as f:
102
- json.dump(submission_data, f, indent=2)
103
  if HF_TOKEN and DATASET_CREATED:
104
  try:
105
  api = HfApi()
106
- api.upload_file(
107
- path_or_fileobj=json_path,
108
- path_in_repo=f"submissions/{json_filename}",
109
- repo_id=DATASET_NAME,
110
- repo_type="dataset",
111
- token=HF_TOKEN
112
- )
113
  if image_path and os.path.exists(image_path):
114
- api.upload_file(
115
- path_or_fileobj=image_path,
116
- path_in_repo=f"images/{os.path.basename(image_path)}",
117
- repo_id=DATASET_NAME,
118
- repo_type="dataset",
119
- token=HF_TOKEN
120
- )
121
  print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
122
- except Exception as e:
123
- print(f"Error uploading to dataset: {e}")
124
  location_info = f"Location: {city}, {state}, {country}" if state else f"Location: {city}, {country}"
125
  return input_img, f"Your text response: {language}", f"Selected location: {location_info}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
126
  def clear_inputs():
127
  return None, "", None, None, "", None, None, "", "", ""
128
  setup_hf_dataset()
129
- with gr.Blocks() as gradio_app:
130
  gr.Markdown("# South Asian Image Data Collection")
131
  gr.Markdown("Upload an image and answer questions about its cultural significance.")
132
  with gr.Row():
133
  with gr.Column(scale=1):
134
  input_img = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
135
- language = gr.Dropdown(
136
- choices=south_asian_languages,
137
- label="Language:",
138
- info="Select the native language relevant to the image",
139
- interactive=True
140
- )
141
- country_dropdown = gr.Dropdown(
142
- choices=["None","India", "Pakistan", "Bangladesh", "Afghanistan", "Bhutan", "Nepal", "Sri Lanka"],
143
- label="Country where the image was taken:",
144
- interactive=True
145
- )
146
- state_dropdown = gr.Dropdown(
147
- choices=[],
148
- label="State/Province:",
149
- interactive=True
150
- )
151
  city_textbox = gr.Textbox(label="City where the image was taken:", placeholder="Enter city name")
152
  email_input = gr.Textbox(label="Your Email:", placeholder="Enter your email address", info="Used as unique contributor ID")
153
  with gr.Column(scale=1):
154
- se_asia_relevance = gr.Radio(
155
- choices=[
156
- "Yes. Unique to South Asia",
157
- "Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.",
158
- "Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia",
159
- "Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia",
160
- "No. Totally unrelated to South Asia"
161
- ],
162
- label="Is the image culturally relevant in South Asia?"
163
- )
164
- culture_knowledge = gr.Radio(
165
- choices=[
166
- "I'm from this country/culture",
167
- "I checked online resources (e.g., Wikipedia, articles, blogs)"
168
- ],
169
- label="How do you know about this culture?",
170
- info="Please do not consult LLMs (e.g., GPT-4o, Claude, Command-R, etc.)"
171
- )
172
  native_caption = gr.Textbox(label="Caption in Native Language:", placeholder="Enter caption in the native language of the culture depicted")
173
  english_caption = gr.Textbox(label="English Caption:", placeholder="Enter caption in English")
174
  with gr.Row():
@@ -184,50 +82,8 @@ with gr.Blocks() as gradio_app:
184
  output_knowledge = gr.Text(label="Cultural Knowledge Source")
185
  output_native = gr.Text(label="Native Language Caption")
186
  output_english = gr.Text(label="English Caption")
187
- country_dropdown.change(
188
- fn=update_state_dropdown,
189
- inputs=country_dropdown,
190
- outputs=state_dropdown
191
- )
192
- submit_btn.click(
193
- fn=process_submission,
194
- inputs=[
195
- input_img,
196
- language,
197
- country_dropdown,
198
- state_dropdown,
199
- city_textbox,
200
- se_asia_relevance,
201
- culture_knowledge,
202
- native_caption,
203
- english_caption,
204
- email_input
205
- ],
206
- outputs=[
207
- output_img,
208
- output_text,
209
- output_location,
210
- output_relevance,
211
- output_knowledge,
212
- output_native,
213
- output_english
214
- ]
215
- )
216
- clear_btn.click(
217
- fn=clear_inputs,
218
- inputs=[],
219
- outputs=[
220
- input_img,
221
- language,
222
- country_dropdown,
223
- state_dropdown,
224
- city_textbox,
225
- se_asia_relevance,
226
- culture_knowledge,
227
- native_caption,
228
- english_caption,
229
- email_input
230
- ]
231
- )
232
  if __name__ == "__main__":
233
  gradio_app.launch()
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN")
11
  DATASET_NAME = "minemaster01/se-culture-dataset-results"
12
  DATASET_CREATED = False
13
+ states_by_country = {"India": ["Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana", "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur", "Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana", "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal", "Andaman and Nicobar Islands", "Chandigarh", "Dadra and Nagar Haveli and Daman and Diu", "Delhi", "Jammu and Kashmir", "Ladakh", "Lakshadweep", "Puducherry"], "Pakistan": ["Balochistan", "Khyber Pakhtunkhwa", "Punjab", "Sindh", "Islamabad Capital Territory", "Other"], "Bangladesh": ["Barisal", "Chittagong", "Dhaka", "Khulna", "Mymensingh", "Rajshahi", "Rangpur", "Sylhet"], "Afghanistan": ["Badakhshan", "Badghis", "Baghlan", "Balkh", "Bamyan", "Daykundi", "Farah", "Faryab", "Ghazni", "Ghor", "Helmand", "Herat", "Jowzjan", "Kabul", "Kandahar", "Kapisa", "Khost", "Kunar", "Kunduz", "Laghman", "Logar", "Nangarhar", "Nimruz", "Nuristan", "Paktia", "Paktika", "Panjshir", "Parwan", "Samangan", "Sar-e Pol", "Takhar", "Uruzgan", "Wardak", "Zabul"], "Bhutan": ["Bumthang", "Chukha", "Dagana", "Gasa", "Haa", "Lhuentse", "Mongar", "Paro", "Pemagatshel", "Punakha", "Samdrup Jongkhar", "Samtse", "Sarpang", "Thimphu", "Trashigang", "Trashiyangtse", "Trongsa", "Tsirang", "Wangdue Phodrang", "Zhemgang"], "Nepal": ["Bagmati", "Gandaki", "Karnali", "Koshi", "Lumbini", "Madhesh", "Sudurpashchim"], "Sri Lanka": ["Central", "Eastern", "North Central", "Northern", "North Western", "Sabaragamuwa", "Southern", "Uva", "Western"]}
14
+ south_asian_languages = ["Assamese", "Bengali", "Bhojpuri", "Bodo", "Dari", "Dzongkha", "Dogri", "Gujarati", "Hindi", "Kannada", "Kashmiri", "Konkani", "Maithili", "Malayalam", "Marathi", "Meitei", "Nepali", "Odia", "Pashto", "Punjabi", "Sanskrit", "Santali", "Sindhi", "Sinhala", "Tamil", "Telugu", "Tulu", "Urdu", "OTHER"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def setup_hf_dataset():
 
16
  global DATASET_CREATED
17
  if not DATASET_CREATED and HF_TOKEN:
18
  try:
19
  api = HfApi()
20
+ create_repo(DATASET_NAME, repo_type="dataset", token=HF_TOKEN, exist_ok=True)
 
 
 
 
 
21
  DATASET_CREATED = True
22
  print(f"Dataset {DATASET_NAME} is ready")
23
+ except Exception as e: print(f"Error setting up dataset: {e}")
 
24
  elif not HF_TOKEN:
25
  print("Warning: HF_TOKEN not set. Data will be stored locally only.")
26
  def update_state_dropdown(country):
 
27
  if country in states_by_country:
28
  return gr.Dropdown(choices=states_by_country[country], label=f"State/Province in {country}:", interactive=True)
29
  return gr.Dropdown(choices=[], label="State/Province:", interactive=True)
 
34
  if input_img is not None:
35
  image_filename = f"{timestamp}.jpg"
36
  image_path = os.path.join("uploaded_images", image_filename)
37
+ if isinstance(input_img, str): shutil.copy(input_img, image_path)
38
+ else: input_img.save(image_path)
39
+ submission_data = {"id": submission_id, "timestamp": timestamp, "image_filename": os.path.basename(image_path) if image_path else None, "cultural_relevance": language, "country": country, "state": state, "city": city, "se_asia_relevance": se_asia_relevance, "cultural_knowledge_source": culture_knowledge, "native_caption": native_caption, "english_caption": english_caption, "email": email}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  json_filename = f"{timestamp}.json"
41
  json_path = os.path.join("submissions", json_filename)
42
+ with open(json_path, "w") as f: json.dump(submission_data, f, indent=2)
 
43
  if HF_TOKEN and DATASET_CREATED:
44
  try:
45
  api = HfApi()
46
+ api.upload_file(path_or_fileobj=json_path, path_in_repo=f"submissions/{json_filename}", repo_id=DATASET_NAME, repo_type="dataset", token=HF_TOKEN)
 
 
 
 
 
 
47
  if image_path and os.path.exists(image_path):
48
+ api.upload_file(path_or_fileobj=image_path, path_in_repo=f"images/{os.path.basename(image_path)}", repo_id=DATASET_NAME, repo_type="dataset", token=HF_TOKEN)
 
 
 
 
 
 
49
  print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
50
+ except Exception as e: print(f"Error uploading to dataset: {e}")
 
51
  location_info = f"Location: {city}, {state}, {country}" if state else f"Location: {city}, {country}"
52
  return input_img, f"Your text response: {language}", f"Selected location: {location_info}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
53
  def clear_inputs():
54
  return None, "", None, None, "", None, None, "", "", ""
55
  setup_hf_dataset()
56
+ with gr.Blocks(theme='1024m/1024m-1') as gradio_app:
57
  gr.Markdown("# South Asian Image Data Collection")
58
  gr.Markdown("Upload an image and answer questions about its cultural significance.")
59
  with gr.Row():
60
  with gr.Column(scale=1):
61
  input_img = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
62
+ language = gr.Dropdown(choices=south_asian_languages, label="Language:", info="Select the native language relevant to the image", interactive=True)
63
+ country_dropdown = gr.Dropdown(choices=["None","India", "Pakistan", "Bangladesh", "Afghanistan", "Bhutan", "Nepal", "Sri Lanka"], label="Country where the image was taken:", interactive=True)
64
+ state_dropdown = gr.Dropdown(choices=[], label="State/Province:", interactive=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  city_textbox = gr.Textbox(label="City where the image was taken:", placeholder="Enter city name")
66
  email_input = gr.Textbox(label="Your Email:", placeholder="Enter your email address", info="Used as unique contributor ID")
67
  with gr.Column(scale=1):
68
+ se_asia_relevance = gr.Radio(choices=["Yes. Unique to South Asia", "Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.", "Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia", "Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia", "No. Totally unrelated to South Asia"], label="Is the image culturally relevant in South Asia?")
69
+ culture_knowledge = gr.Radio(choices=["I'm from this country/culture", "I checked online resources (e.g., Wikipedia, articles, blogs)"], label="How do you know about this culture?", info="Please do not consult LLMs (e.g., GPT-4o, Claude, Command-R, etc.)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  native_caption = gr.Textbox(label="Caption in Native Language:", placeholder="Enter caption in the native language of the culture depicted")
71
  english_caption = gr.Textbox(label="English Caption:", placeholder="Enter caption in English")
72
  with gr.Row():
 
82
  output_knowledge = gr.Text(label="Cultural Knowledge Source")
83
  output_native = gr.Text(label="Native Language Caption")
84
  output_english = gr.Text(label="English Caption")
85
+ country_dropdown.change(fn=update_state_dropdown, inputs=country_dropdown, outputs=state_dropdown)
86
+ submit_btn.click(fn=process_submission, inputs=[input_img, language, country_dropdown, state_dropdown, city_textbox, se_asia_relevance, culture_knowledge, native_caption, english_caption, email_input], outputs=[output_img, output_text, output_location, output_relevance, output_knowledge, output_native, output_english])
87
+ clear_btn.click(fn=clear_inputs, inputs=[], outputs=[input_img, language, country_dropdown, state_dropdown, city_textbox, se_asia_relevance, culture_knowledge, native_caption, english_caption, email_input])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  if __name__ == "__main__":
89
  gradio_app.launch()