File size: 11,112 Bytes
bddfb4b
96e76ac
 
 
 
 
654128b
96e76ac
 
 
 
bddfb4b
654128b
 
 
 
 
76c5b42
c4adefd
 
76c5b42
 
 
 
 
c4adefd
 
76c5b42
c4adefd
 
 
 
 
 
76c5b42
 
 
 
c4adefd
 
76c5b42
 
 
c4adefd
 
 
 
 
76c5b42
c4adefd
 
 
654128b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76c5b42
 
c4adefd
76c5b42
 
c4adefd
1e081e6
96e76ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654128b
96e76ac
 
76c5b42
c4adefd
96e76ac
 
1e081e6
 
96e76ac
 
 
 
 
 
 
 
654128b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96e76ac
c4adefd
1e081e6
bddfb4b
266c914
 
 
654128b
 
 
76c5b42
c4adefd
 
0bd155b
c4adefd
76c5b42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266c914
76c5b42
266c914
76c5b42
 
266c914
 
 
1e081e6
 
76c5b42
1e081e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76c5b42
 
c4adefd
76c5b42
c4adefd
 
 
0bd155b
c4adefd
1e081e6
c4adefd
76c5b42
 
 
 
 
 
 
 
1e081e6
c4adefd
 
76c5b42
 
 
 
 
 
1e081e6
c4adefd
 
266c914
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bddfb4b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
import gradio as gr
import os
import json
import uuid
from datetime import datetime
import shutil
from huggingface_hub import HfApi, create_repo, upload_file, upload_folder

# Create directories for data storage
os.makedirs("uploaded_images", exist_ok=True)
os.makedirs("submissions", exist_ok=True)

# Hugging Face Dataset configuration
HF_TOKEN = os.environ.get("HF_TOKEN")  # You'll need to set this as a secret in your Space
DATASET_NAME = "srishtiy/se-culture-dataset-results"  # Change to your username/dataset-name
DATASET_CREATED = False

# States by country dictionary
states_by_country = {
    "India": [
        "Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana",
        "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur",
        "Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana",
        "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal", "Andaman and Nicobar Islands", "Chandigarh",
        "Dadra and Nagar Haveli and Daman and Diu", "Delhi", "Jammu and Kashmir", "Ladakh", "Lakshadweep", "Puducherry"
    ],
    "Pakistan": [
        "Balochistan", "Khyber Pakhtunkhwa", "Punjab", "Sindh", "Islamabad Capital Territory",
        "Azad Jammu and Kashmir", "Gilgit-Baltistan"
    ],
    "Bangladesh": [
        "Barisal", "Chittagong", "Dhaka", "Khulna", "Mymensingh", "Rajshahi", "Rangpur", "Sylhet"
    ],
    "Afghanistan": [
        "Badakhshan", "Badghis", "Baghlan", "Balkh", "Bamyan", "Daykundi", "Farah", "Faryab", "Ghazni", "Ghor",
        "Helmand", "Herat", "Jowzjan", "Kabul", "Kandahar", "Kapisa", "Khost", "Kunar", "Kunduz", "Laghman",
        "Logar", "Nangarhar", "Nimruz", "Nuristan", "Paktia", "Paktika", "Panjshir", "Parwan", "Samangan",
        "Sar-e Pol", "Takhar", "Uruzgan", "Wardak", "Zabul"
    ],
    "Bhutan": [
        "Bumthang", "Chukha", "Dagana", "Gasa", "Haa", "Lhuentse", "Mongar", "Paro", "Pemagatshel", "Punakha",
        "Samdrup Jongkhar", "Samtse", "Sarpang", "Thimphu", "Trashigang", "Trashiyangtse", "Trongsa", "Tsirang",
        "Wangdue Phodrang", "Zhemgang"
    ],
    "Nepal": [
        "Bagmati", "Gandaki", "Karnali", "Koshi", "Lumbini", "Madhesh", "Sudurpashchim"
    ],
    "Sri Lanka": [
        "Central", "Eastern", "North Central", "Northern", "North Western", "Sabaragamuwa", "Southern", "Uva", "Western"
    ]
}

def setup_hf_dataset():
    """Initialize the Hugging Face dataset if it doesn't exist"""
    global DATASET_CREATED
    if not DATASET_CREATED and HF_TOKEN:
        try:
            api = HfApi()
            create_repo(
                DATASET_NAME,
                repo_type="dataset",
                token=HF_TOKEN,
                exist_ok=True
            )
            DATASET_CREATED = True
            print(f"Dataset {DATASET_NAME} is ready")
        except Exception as e:
            print(f"Error setting up dataset: {e}")
    elif not HF_TOKEN:
        print("Warning: HF_TOKEN not set. Data will be stored locally only.")

def update_state_dropdown(country):
    """Update state dropdown based on selected country"""
    if country in states_by_country:
        return gr.Dropdown(choices=states_by_country[country], label=f"State/Province in {country}:", interactive=True)
    return gr.Dropdown(choices=[], label="State/Province:", interactive=True)

def process_submission(input_img, text_answer, country, state, city, se_asia_relevance, culture_knowledge, native_caption, english_caption):
    # Generate unique ID for this submission
    submission_id = str(uuid.uuid4())
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Save the image if provided
    image_path = None
    if input_img is not None:
        # Create filename with submission ID
        image_filename = f"{timestamp}_{submission_id}.jpg"
        image_path = os.path.join("uploaded_images", image_filename)
        
        # Save the image
        if isinstance(input_img, str):  # If it's a file path
            shutil.copy(input_img, image_path)
        else:  # If it's a PIL Image
            input_img.save(image_path)
    
    # Create a data structure for the submission
    submission_data = {
        "id": submission_id,
        "timestamp": timestamp,
        "image_filename": os.path.basename(image_path) if image_path else None,
        "cultural_relevance": text_answer,
        "country": country,
        "state": state,
        "city": city,
        "se_asia_relevance": se_asia_relevance,
        "cultural_knowledge_source": culture_knowledge,
        "native_caption": native_caption,
        "english_caption": english_caption
    }
    
    # Save the data as JSON
    json_filename = f"{timestamp}_{submission_id}.json"
    json_path = os.path.join("submissions", json_filename)
    with open(json_path, "w") as f:
        json.dump(submission_data, f, indent=2)
    
    # Upload to Hugging Face Dataset if token is available
    if HF_TOKEN and DATASET_CREATED:
        try:
            api = HfApi()
            # Upload the JSON data
            api.upload_file(
                path_or_fileobj=json_path,
                path_in_repo=f"submissions/{json_filename}",
                repo_id=DATASET_NAME,
                repo_type="dataset",
                token=HF_TOKEN
            )
            # Upload the image if it exists
            if image_path and os.path.exists(image_path):
                api.upload_file(
                    path_or_fileobj=image_path,
                    path_in_repo=f"images/{os.path.basename(image_path)}",
                    repo_id=DATASET_NAME,
                    repo_type="dataset",
                    token=HF_TOKEN
                )
            print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
        except Exception as e:
            print(f"Error uploading to dataset: {e}")
    
    # Return values to display in the interface
    location_info = f"Location: {city}, {state}, {country}" if state else f"Location: {city}, {country}"
    return input_img, f"Your text response: {text_answer}", f"Selected location: {location_info}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"

def clear_inputs():
    return None, "", None, None, "", None, None, "", ""

# Initialize the dataset
setup_hf_dataset()

with gr.Blocks() as gradio_app:
    gr.Markdown("# South Asian Image Data Collection")
    gr.Markdown("Upload an image and answer questions about its cultural significance.")
    
    with gr.Row():
        input_img = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
    
    with gr.Row():
        text_answer = gr.Textbox(label="The image portrays culturally-relevant information in:", placeholder="what culture does this image represent?")
    
    with gr.Row():
        country_dropdown = gr.Dropdown(
            choices=["India", "Pakistan", "Bangladesh", "Afghanistan", "Bhutan", "Nepal", "Sri Lanka"],
            label="Country where the image was taken:",
            interactive=True
        )
    
    with gr.Row():
        state_dropdown = gr.Dropdown(
            choices=[],
            label="State/Province:",
            interactive=True
        )
    
    with gr.Row():
        city_textbox = gr.Textbox(label="City where the image was taken:", placeholder="Enter city name")
    
    with gr.Row():
        se_asia_relevance = gr.Radio(
            choices=[
                "Yes. Unique to South Asia",
                "Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.",
                "Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia",
                "Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia",
                "No. Totally unrelated to South Asia"
            ],
            label="Is the image culturally relevant in South Asia?"
        )
    
    with gr.Row():
        culture_knowledge = gr.Radio(
            choices=[
                "I'm from this country/culture",
                "I checked online resources (e.g., Wikipedia, articles, blogs)"
            ],
            label="How do you know about this culture?",
            info="Please do not consult LLMs (e.g., GPT-4o, Claude, Command-R, etc.)"
        )
    
    with gr.Row():
        native_caption = gr.Textbox(label="Caption in Native Language:", placeholder="Enter caption in the native language of the culture depicted")
    
    with gr.Row():
        english_caption = gr.Textbox(label="English Caption:", placeholder="Enter caption in English")
    
    # Add two rows of buttons as shown in the image
    with gr.Row():
        clear_btn = gr.Button("Clear")
        submit_btn = gr.Button("Submit")
    
    with gr.Row():
        share_btn = gr.Button("Share via Link", variant="secondary", elem_classes=["share-button"])
    
    with gr.Row():
        output_img = gr.Image(label="Submitted Image")
    
    with gr.Row():
        output_text = gr.Text(label="Text Response")
    
    with gr.Row():
        output_location = gr.Text(label="Location Information")
    
    with gr.Row():
        output_relevance = gr.Text(label="South Asia Cultural Relevance")
    
    with gr.Row():
        output_knowledge = gr.Text(label="Cultural Knowledge Source")
    
    with gr.Row():
        output_native = gr.Text(label="Native Language Caption")
    
    with gr.Row():
        output_english = gr.Text(label="English Caption")
    
    # Set up event handlers
    country_dropdown.change(
        fn=update_state_dropdown,
        inputs=country_dropdown,
        outputs=state_dropdown
    )
    
    submit_btn.click(
        fn=process_submission,
        inputs=[
            input_img,
            text_answer,
            country_dropdown,
            state_dropdown,
            city_textbox,
            se_asia_relevance,
            culture_knowledge,
            native_caption,
            english_caption
        ],
        outputs=[
            output_img,
            output_text,
            output_location,
            output_relevance,
            output_knowledge,
            output_native,
            output_english
        ]
    )
    
    clear_btn.click(
        fn=clear_inputs,
        inputs=[],
        outputs=[
            input_img,
            text_answer,
            country_dropdown,
            state_dropdown,
            city_textbox,
            se_asia_relevance,
            culture_knowledge,
            native_caption,
            english_caption
        ]
    )

    gr.HTML("""
    <style>
    .share-button {
        background-color: #f0f0f0 !important;
        color: #000 !important;
        border: 1px solid #ccc !important;
    }
    </style>
    """)

if __name__ == "__main__":
    gradio_app.launch()