Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,17 +5,11 @@ import uuid
|
|
5 |
from datetime import datetime
|
6 |
import shutil
|
7 |
from huggingface_hub import HfApi, create_repo, upload_file, upload_folder
|
8 |
-
|
9 |
-
# Create directories for data storage
|
10 |
os.makedirs("uploaded_images", exist_ok=True)
|
11 |
os.makedirs("submissions", exist_ok=True)
|
12 |
-
|
13 |
-
# Hugging Face Dataset configuration
|
14 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
15 |
DATASET_NAME = "minemaster01/se-culture-dataset-results"
|
16 |
DATASET_CREATED = False
|
17 |
-
|
18 |
-
# States by country dictionary
|
19 |
states_by_country = {
|
20 |
"India": [
|
21 |
"Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana",
|
@@ -72,32 +66,22 @@ def setup_hf_dataset():
|
|
72 |
print(f"Error setting up dataset: {e}")
|
73 |
elif not HF_TOKEN:
|
74 |
print("Warning: HF_TOKEN not set. Data will be stored locally only.")
|
75 |
-
|
76 |
def update_state_dropdown(country):
|
77 |
"""Update state dropdown based on selected country"""
|
78 |
if country in states_by_country:
|
79 |
return gr.Dropdown(choices=states_by_country[country], label=f"State/Province in {country}:", interactive=True)
|
80 |
return gr.Dropdown(choices=[], label="State/Province:", interactive=True)
|
81 |
-
|
82 |
def process_submission(input_img, language, country, state, city, se_asia_relevance, culture_knowledge, native_caption, english_caption,email):
|
83 |
-
# Generate unique ID for this submission
|
84 |
submission_id = str(uuid.uuid4())
|
85 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
86 |
-
|
87 |
-
# Save the image if provided
|
88 |
image_path = None
|
89 |
if input_img is not None:
|
90 |
-
# Create filename with submission ID
|
91 |
image_filename = f"{timestamp}.jpg"
|
92 |
image_path = os.path.join("uploaded_images", image_filename)
|
93 |
-
|
94 |
-
# Save the image
|
95 |
-
if isinstance(input_img, str): # If it's a file path
|
96 |
shutil.copy(input_img, image_path)
|
97 |
-
else:
|
98 |
input_img.save(image_path)
|
99 |
-
|
100 |
-
# Create a data structure for the submission
|
101 |
submission_data = {
|
102 |
"id": submission_id,
|
103 |
"timestamp": timestamp,
|
@@ -112,18 +96,13 @@ def process_submission(input_img, language, country, state, city, se_asia_releva
|
|
112 |
"english_caption": english_caption,
|
113 |
"email": email
|
114 |
}
|
115 |
-
|
116 |
-
# Save the data as JSON
|
117 |
json_filename = f"{timestamp}.json"
|
118 |
json_path = os.path.join("submissions", json_filename)
|
119 |
with open(json_path, "w") as f:
|
120 |
json.dump(submission_data, f, indent=2)
|
121 |
-
|
122 |
-
# Upload to Hugging Face Dataset if token is available
|
123 |
if HF_TOKEN and DATASET_CREATED:
|
124 |
try:
|
125 |
api = HfApi()
|
126 |
-
# Upload the JSON data
|
127 |
api.upload_file(
|
128 |
path_or_fileobj=json_path,
|
129 |
path_in_repo=f"submissions/{json_filename}",
|
@@ -131,7 +110,6 @@ def process_submission(input_img, language, country, state, city, se_asia_releva
|
|
131 |
repo_type="dataset",
|
132 |
token=HF_TOKEN
|
133 |
)
|
134 |
-
# Upload the image if it exists
|
135 |
if image_path and os.path.exists(image_path):
|
136 |
api.upload_file(
|
137 |
path_or_fileobj=image_path,
|
@@ -143,24 +121,15 @@ def process_submission(input_img, language, country, state, city, se_asia_releva
|
|
143 |
print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
|
144 |
except Exception as e:
|
145 |
print(f"Error uploading to dataset: {e}")
|
146 |
-
|
147 |
-
# Return values to display in the interface
|
148 |
location_info = f"Location: {city}, {state}, {country}" if state else f"Location: {city}, {country}"
|
149 |
return input_img, f"Your text response: {language}", f"Selected location: {location_info}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
|
150 |
-
|
151 |
def clear_inputs():
|
152 |
return None, "", None, None, "", None, None, "", "", ""
|
153 |
-
|
154 |
-
# Initialize the dataset
|
155 |
setup_hf_dataset()
|
156 |
-
|
157 |
with gr.Blocks() as gradio_app:
|
158 |
gr.Markdown("# South Asian Image Data Collection")
|
159 |
gr.Markdown("Upload an image and answer questions about its cultural significance.")
|
160 |
-
|
161 |
-
# Split the interface into two columns
|
162 |
with gr.Row():
|
163 |
-
# Left column for image upload and basic information
|
164 |
with gr.Column(scale=1):
|
165 |
input_img = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
|
166 |
language = gr.Dropdown(
|
@@ -169,8 +138,6 @@ with gr.Blocks() as gradio_app:
|
|
169 |
info="Select the native language relevant to the image",
|
170 |
interactive=True
|
171 |
)
|
172 |
-
|
173 |
-
# Location information in the left column
|
174 |
country_dropdown = gr.Dropdown(
|
175 |
choices=["None","India", "Pakistan", "Bangladesh", "Afghanistan", "Bhutan", "Nepal", "Sri Lanka"],
|
176 |
label="Country where the image was taken:",
|
@@ -183,8 +150,6 @@ with gr.Blocks() as gradio_app:
|
|
183 |
)
|
184 |
city_textbox = gr.Textbox(label="City where the image was taken:", placeholder="Enter city name")
|
185 |
email_input = gr.Textbox(label="Your Email:", placeholder="Enter your email address", info="Used as unique contributor ID")
|
186 |
-
|
187 |
-
# Right column for additional information
|
188 |
with gr.Column(scale=1):
|
189 |
se_asia_relevance = gr.Radio(
|
190 |
choices=[
|
@@ -196,7 +161,6 @@ with gr.Blocks() as gradio_app:
|
|
196 |
],
|
197 |
label="Is the image culturally relevant in South Asia?"
|
198 |
)
|
199 |
-
|
200 |
culture_knowledge = gr.Radio(
|
201 |
choices=[
|
202 |
"I'm from this country/culture",
|
@@ -205,35 +169,26 @@ with gr.Blocks() as gradio_app:
|
|
205 |
label="How do you know about this culture?",
|
206 |
info="Please do not consult LLMs (e.g., GPT-4o, Claude, Command-R, etc.)"
|
207 |
)
|
208 |
-
|
209 |
native_caption = gr.Textbox(label="Caption in Native Language:", placeholder="Enter caption in the native language of the culture depicted")
|
210 |
english_caption = gr.Textbox(label="English Caption:", placeholder="Enter caption in English")
|
211 |
-
|
212 |
-
# Buttons row
|
213 |
with gr.Row():
|
214 |
clear_btn = gr.Button("Clear")
|
215 |
submit_btn = gr.Button("Submit")
|
216 |
-
|
217 |
-
# Output display section - also split into two columns
|
218 |
with gr.Row():
|
219 |
with gr.Column(scale=1):
|
220 |
output_img = gr.Image(label="Submitted Image")
|
221 |
output_text = gr.Text(label="Text Response")
|
222 |
output_location = gr.Text(label="Location Information")
|
223 |
-
|
224 |
with gr.Column(scale=1):
|
225 |
output_relevance = gr.Text(label="South Asia Cultural Relevance")
|
226 |
output_knowledge = gr.Text(label="Cultural Knowledge Source")
|
227 |
output_native = gr.Text(label="Native Language Caption")
|
228 |
output_english = gr.Text(label="English Caption")
|
229 |
-
|
230 |
-
# Set up event handlers
|
231 |
country_dropdown.change(
|
232 |
fn=update_state_dropdown,
|
233 |
inputs=country_dropdown,
|
234 |
outputs=state_dropdown
|
235 |
)
|
236 |
-
|
237 |
submit_btn.click(
|
238 |
fn=process_submission,
|
239 |
inputs=[
|
@@ -258,7 +213,6 @@ with gr.Blocks() as gradio_app:
|
|
258 |
output_english
|
259 |
]
|
260 |
)
|
261 |
-
|
262 |
clear_btn.click(
|
263 |
fn=clear_inputs,
|
264 |
inputs=[],
|
@@ -275,7 +229,5 @@ with gr.Blocks() as gradio_app:
|
|
275 |
email_input
|
276 |
]
|
277 |
)
|
278 |
-
|
279 |
-
|
280 |
if __name__ == "__main__":
|
281 |
gradio_app.launch()
|
|
|
5 |
from datetime import datetime
|
6 |
import shutil
|
7 |
from huggingface_hub import HfApi, create_repo, upload_file, upload_folder
|
|
|
|
|
8 |
os.makedirs("uploaded_images", exist_ok=True)
|
9 |
os.makedirs("submissions", exist_ok=True)
|
|
|
|
|
10 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
11 |
DATASET_NAME = "minemaster01/se-culture-dataset-results"
|
12 |
DATASET_CREATED = False
|
|
|
|
|
13 |
states_by_country = {
|
14 |
"India": [
|
15 |
"Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana",
|
|
|
66 |
print(f"Error setting up dataset: {e}")
|
67 |
elif not HF_TOKEN:
|
68 |
print("Warning: HF_TOKEN not set. Data will be stored locally only.")
|
|
|
69 |
def update_state_dropdown(country):
|
70 |
"""Update state dropdown based on selected country"""
|
71 |
if country in states_by_country:
|
72 |
return gr.Dropdown(choices=states_by_country[country], label=f"State/Province in {country}:", interactive=True)
|
73 |
return gr.Dropdown(choices=[], label="State/Province:", interactive=True)
|
|
|
74 |
def process_submission(input_img, language, country, state, city, se_asia_relevance, culture_knowledge, native_caption, english_caption,email):
|
|
|
75 |
submission_id = str(uuid.uuid4())
|
76 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
|
|
77 |
image_path = None
|
78 |
if input_img is not None:
|
|
|
79 |
image_filename = f"{timestamp}.jpg"
|
80 |
image_path = os.path.join("uploaded_images", image_filename)
|
81 |
+
if isinstance(input_img, str):
|
|
|
|
|
82 |
shutil.copy(input_img, image_path)
|
83 |
+
else:
|
84 |
input_img.save(image_path)
|
|
|
|
|
85 |
submission_data = {
|
86 |
"id": submission_id,
|
87 |
"timestamp": timestamp,
|
|
|
96 |
"english_caption": english_caption,
|
97 |
"email": email
|
98 |
}
|
|
|
|
|
99 |
json_filename = f"{timestamp}.json"
|
100 |
json_path = os.path.join("submissions", json_filename)
|
101 |
with open(json_path, "w") as f:
|
102 |
json.dump(submission_data, f, indent=2)
|
|
|
|
|
103 |
if HF_TOKEN and DATASET_CREATED:
|
104 |
try:
|
105 |
api = HfApi()
|
|
|
106 |
api.upload_file(
|
107 |
path_or_fileobj=json_path,
|
108 |
path_in_repo=f"submissions/{json_filename}",
|
|
|
110 |
repo_type="dataset",
|
111 |
token=HF_TOKEN
|
112 |
)
|
|
|
113 |
if image_path and os.path.exists(image_path):
|
114 |
api.upload_file(
|
115 |
path_or_fileobj=image_path,
|
|
|
121 |
print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
|
122 |
except Exception as e:
|
123 |
print(f"Error uploading to dataset: {e}")
|
|
|
|
|
124 |
location_info = f"Location: {city}, {state}, {country}" if state else f"Location: {city}, {country}"
|
125 |
return input_img, f"Your text response: {language}", f"Selected location: {location_info}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"
|
|
|
126 |
def clear_inputs():
|
127 |
return None, "", None, None, "", None, None, "", "", ""
|
|
|
|
|
128 |
setup_hf_dataset()
|
|
|
129 |
with gr.Blocks() as gradio_app:
|
130 |
gr.Markdown("# South Asian Image Data Collection")
|
131 |
gr.Markdown("Upload an image and answer questions about its cultural significance.")
|
|
|
|
|
132 |
with gr.Row():
|
|
|
133 |
with gr.Column(scale=1):
|
134 |
input_img = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
|
135 |
language = gr.Dropdown(
|
|
|
138 |
info="Select the native language relevant to the image",
|
139 |
interactive=True
|
140 |
)
|
|
|
|
|
141 |
country_dropdown = gr.Dropdown(
|
142 |
choices=["None","India", "Pakistan", "Bangladesh", "Afghanistan", "Bhutan", "Nepal", "Sri Lanka"],
|
143 |
label="Country where the image was taken:",
|
|
|
150 |
)
|
151 |
city_textbox = gr.Textbox(label="City where the image was taken:", placeholder="Enter city name")
|
152 |
email_input = gr.Textbox(label="Your Email:", placeholder="Enter your email address", info="Used as unique contributor ID")
|
|
|
|
|
153 |
with gr.Column(scale=1):
|
154 |
se_asia_relevance = gr.Radio(
|
155 |
choices=[
|
|
|
161 |
],
|
162 |
label="Is the image culturally relevant in South Asia?"
|
163 |
)
|
|
|
164 |
culture_knowledge = gr.Radio(
|
165 |
choices=[
|
166 |
"I'm from this country/culture",
|
|
|
169 |
label="How do you know about this culture?",
|
170 |
info="Please do not consult LLMs (e.g., GPT-4o, Claude, Command-R, etc.)"
|
171 |
)
|
|
|
172 |
native_caption = gr.Textbox(label="Caption in Native Language:", placeholder="Enter caption in the native language of the culture depicted")
|
173 |
english_caption = gr.Textbox(label="English Caption:", placeholder="Enter caption in English")
|
|
|
|
|
174 |
with gr.Row():
|
175 |
clear_btn = gr.Button("Clear")
|
176 |
submit_btn = gr.Button("Submit")
|
|
|
|
|
177 |
with gr.Row():
|
178 |
with gr.Column(scale=1):
|
179 |
output_img = gr.Image(label="Submitted Image")
|
180 |
output_text = gr.Text(label="Text Response")
|
181 |
output_location = gr.Text(label="Location Information")
|
|
|
182 |
with gr.Column(scale=1):
|
183 |
output_relevance = gr.Text(label="South Asia Cultural Relevance")
|
184 |
output_knowledge = gr.Text(label="Cultural Knowledge Source")
|
185 |
output_native = gr.Text(label="Native Language Caption")
|
186 |
output_english = gr.Text(label="English Caption")
|
|
|
|
|
187 |
country_dropdown.change(
|
188 |
fn=update_state_dropdown,
|
189 |
inputs=country_dropdown,
|
190 |
outputs=state_dropdown
|
191 |
)
|
|
|
192 |
submit_btn.click(
|
193 |
fn=process_submission,
|
194 |
inputs=[
|
|
|
213 |
output_english
|
214 |
]
|
215 |
)
|
|
|
216 |
clear_btn.click(
|
217 |
fn=clear_inputs,
|
218 |
inputs=[],
|
|
|
229 |
email_input
|
230 |
]
|
231 |
)
|
|
|
|
|
232 |
if __name__ == "__main__":
|
233 |
gradio_app.launch()
|