File size: 7,940 Bytes
bddfb4b
96e76ac
 
 
 
 
654128b
96e76ac
 
 
 
bddfb4b
654128b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bddfb4b
96e76ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654128b
96e76ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654128b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96e76ac
bddfb4b
 
654128b
 
 
bddfb4b
 
 
0bd155b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bddfb4b
1cf3975
 
 
 
 
bddfb4b
1cf3975
0bd155b
 
 
bddfb4b
 
 
 
 
0bd155b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bddfb4b
 
 
 
 
 
1cf3975
bddfb4b
 
 
 
1cf3975
bddfb4b
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import gradio as gr
import os
import json
import uuid
from datetime import datetime
import shutil
from huggingface_hub import HfApi, create_repo, upload_file, upload_folder

# Create directories for data storage
os.makedirs("uploaded_images", exist_ok=True)
os.makedirs("submissions", exist_ok=True)

# Hugging Face Dataset configuration
HF_TOKEN = os.environ.get("HF_TOKEN")  # You'll need to set this as a secret in your Space
DATASET_NAME = "srishtiy/se-culture-dataset-results"  # Change to your username/dataset-name
DATASET_CREATED = False

def setup_hf_dataset():
    """Initialize the Hugging Face dataset if it doesn't exist"""
    global DATASET_CREATED
    if not DATASET_CREATED and HF_TOKEN:
        try:
            api = HfApi()
            create_repo(
                DATASET_NAME,
                repo_type="dataset",
                token=HF_TOKEN,
                exist_ok=True
            )
            DATASET_CREATED = True
            print(f"Dataset {DATASET_NAME} is ready")
        except Exception as e:
            print(f"Error setting up dataset: {e}")
    elif not HF_TOKEN:
        print("Warning: HF_TOKEN not set. Data will be stored locally only.")

def process_submission(input_img, text_answer, multiple_choice, city, country, se_asia_relevance, culture_knowledge, native_caption, english_caption):
    # Generate unique ID for this submission
    submission_id = str(uuid.uuid4())
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Save the image if provided
    image_path = None
    if input_img is not None:
        # Create filename with submission ID
        image_filename = f"{timestamp}_{submission_id}.jpg"
        image_path = os.path.join("uploaded_images", image_filename)
        
        # Save the image
        if isinstance(input_img, str):  # If it's a file path
            shutil.copy(input_img, image_path)
        else:  # If it's a PIL Image
            input_img.save(image_path)
    
    # Create a data structure for the submission
    submission_data = {
        "id": submission_id,
        "timestamp": timestamp,
        "image_filename": os.path.basename(image_path) if image_path else None,
        "cultural_relevance": text_answer,
        "continent": multiple_choice,
        "city": city,
        "country": country,
        "se_asia_relevance": se_asia_relevance,
        "cultural_knowledge_source": culture_knowledge,
        "native_caption": native_caption,
        "english_caption": english_caption
    }
    
    # Save the data as JSON
    json_filename = f"{timestamp}_{submission_id}.json"
    json_path = os.path.join("submissions", json_filename)
    with open(json_path, "w") as f:
        json.dump(submission_data, f, indent=2)
    
    # Upload to Hugging Face Dataset if token is available
    if HF_TOKEN and DATASET_CREATED:
        try:
            api = HfApi()
            # Upload the JSON data
            api.upload_file(
                path_or_fileobj=json_path,
                path_in_repo=f"submissions/{json_filename}",
                repo_id=DATASET_NAME,
                repo_type="dataset",
                token=HF_TOKEN
            )
            # Upload the image if it exists
            if image_path and os.path.exists(image_path):
                api.upload_file(
                    path_or_fileobj=image_path,
                    path_in_repo=f"images/{os.path.basename(image_path)}",
                    repo_id=DATASET_NAME,
                    repo_type="dataset",
                    token=HF_TOKEN
                )
            print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
        except Exception as e:
            print(f"Error uploading to dataset: {e}")
    
    # Return values to display in the interface
    return input_img, f"Your text response: {text_answer}", f"Your selected option: {multiple_choice}", f"Location: {city}, {country}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"

# Initialize the dataset
setup_hf_dataset()

gradio_app = gr.Interface(
    process_submission,
    inputs=[
    with gr.Blocks() as gradio_app:
        selected_country = gr.Dropdown(
            choices=list(country_to_states.keys()),
            label="Country where the image was taken:",
            interactive=True
        )
        selected_state = gr.Dropdown(
            choices=[],
            label="State / Province",
            interactive=True
        )
        city_field = gr.Textbox(label="City (optional)")
    
        image_input = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
        cultural_text = gr.Textbox(label="The image portrays culturally-relevant information in:")
        
        sa_relevance = gr.Radio(
            choices=[
                "Yes. Unique to South Asia",
                "Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.",
                "Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia",
                "Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia",
                "No. Totally unrelated to South Asia"
            ],
            label="Is the image culturally relevant in South Asia?"
        )
    
        knowledge_source = gr.Radio(
            choices=[
                "I'm from this country/culture",
                "I checked online resources (e.g., Wikipedia, articles, blogs)"
            ],
            label="How do you know about this culture?",
            info="Please do not consult LLMs"
        )
    
        native_caption = gr.Textbox(label="Caption in Native Language:")
        english_caption = gr.Textbox(label="English Caption:")
    
        output_components = [
            gr.Image(label="Submitted Image"),
            gr.Text(label="Text Response"),
            gr.Text(label="Country and State"),
            gr.Text(label="Location Information"),
            gr.Text(label="South Asia Cultural Relevance"),
            gr.Text(label="Cultural Knowledge Source"),
            gr.Text(label="Native Language Caption"),
            gr.Text(label="English Caption")
        ]
    
        # Link dynamic dropdown update
        def update_states(selected_country):
            return gr.Dropdown.update(choices=country_to_states.get(selected_country, []), value=None)
    
        selected_country.change(fn=update_states, inputs=selected_country, outputs=selected_state)
    
        submit_btn = gr.Button("Submit")
    
        submit_btn.click(
            process_submission,
            inputs=[
                image_input,
                cultural_text,
                selected_country,
                selected_state,
                city_field,
                sa_relevance,
                knowledge_source,
                native_caption,
                english_caption
            ],
            outputs=output_components
        )
    
        gr.Markdown("## South Asian Image Data Collection")
        gr.Markdown("Upload an image and answer questions about its cultural significance.")
    ],
    outputs=[
        gr.Image(label="Submitted Image"),
        gr.Text(label="Text Response"),
        gr.Text(label="Multiple Choice Response"),
        gr.Text(label="Location Information"),
        gr.Text(label="South Asia Cultural Relevance"),
        gr.Text(label="Cultural Knowledge Source"),
        gr.Text(label="Native Language Caption"),
        gr.Text(label="English Caption")
    ],
    title="South Asian Image Data Collection",
    description="Upload an image and answer questions about its cultural significance."
)

if __name__ == "__main__":
    gradio_app.launch()