File size: 6,426 Bytes
bddfb4b
96e76ac
 
 
 
 
654128b
96e76ac
 
 
 
bddfb4b
654128b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bddfb4b
96e76ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654128b
96e76ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654128b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96e76ac
bddfb4b
 
654128b
 
 
bddfb4b
 
 
 
 
1cf3975
bddfb4b
 
 
 
1cf3975
 
 
 
 
bddfb4b
1cf3975
bddfb4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1cf3975
bddfb4b
 
 
 
1cf3975
bddfb4b
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import gradio as gr
import os
import json
import uuid
from datetime import datetime
import shutil
from huggingface_hub import HfApi, create_repo, upload_file, upload_folder

# Create directories for data storage
os.makedirs("uploaded_images", exist_ok=True)
os.makedirs("submissions", exist_ok=True)

# Hugging Face Dataset configuration
HF_TOKEN = os.environ.get("HF_TOKEN")  # You'll need to set this as a secret in your Space
DATASET_NAME = "srishtiy/se-culture-dataset-results"  # Change to your username/dataset-name
DATASET_CREATED = False

def setup_hf_dataset():
    """Initialize the Hugging Face dataset if it doesn't exist"""
    global DATASET_CREATED
    if not DATASET_CREATED and HF_TOKEN:
        try:
            api = HfApi()
            create_repo(
                DATASET_NAME,
                repo_type="dataset",
                token=HF_TOKEN,
                exist_ok=True
            )
            DATASET_CREATED = True
            print(f"Dataset {DATASET_NAME} is ready")
        except Exception as e:
            print(f"Error setting up dataset: {e}")
    elif not HF_TOKEN:
        print("Warning: HF_TOKEN not set. Data will be stored locally only.")

def process_submission(input_img, text_answer, multiple_choice, city, country, se_asia_relevance, culture_knowledge, native_caption, english_caption):
    # Generate unique ID for this submission
    submission_id = str(uuid.uuid4())
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Save the image if provided
    image_path = None
    if input_img is not None:
        # Create filename with submission ID
        image_filename = f"{timestamp}_{submission_id}.jpg"
        image_path = os.path.join("uploaded_images", image_filename)
        
        # Save the image
        if isinstance(input_img, str):  # If it's a file path
            shutil.copy(input_img, image_path)
        else:  # If it's a PIL Image
            input_img.save(image_path)
    
    # Create a data structure for the submission
    submission_data = {
        "id": submission_id,
        "timestamp": timestamp,
        "image_filename": os.path.basename(image_path) if image_path else None,
        "cultural_relevance": text_answer,
        "continent": multiple_choice,
        "city": city,
        "country": country,
        "se_asia_relevance": se_asia_relevance,
        "cultural_knowledge_source": culture_knowledge,
        "native_caption": native_caption,
        "english_caption": english_caption
    }
    
    # Save the data as JSON
    json_filename = f"{timestamp}_{submission_id}.json"
    json_path = os.path.join("submissions", json_filename)
    with open(json_path, "w") as f:
        json.dump(submission_data, f, indent=2)
    
    # Upload to Hugging Face Dataset if token is available
    if HF_TOKEN and DATASET_CREATED:
        try:
            api = HfApi()
            # Upload the JSON data
            api.upload_file(
                path_or_fileobj=json_path,
                path_in_repo=f"submissions/{json_filename}",
                repo_id=DATASET_NAME,
                repo_type="dataset",
                token=HF_TOKEN
            )
            # Upload the image if it exists
            if image_path and os.path.exists(image_path):
                api.upload_file(
                    path_or_fileobj=image_path,
                    path_in_repo=f"images/{os.path.basename(image_path)}",
                    repo_id=DATASET_NAME,
                    repo_type="dataset",
                    token=HF_TOKEN
                )
            print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
        except Exception as e:
            print(f"Error uploading to dataset: {e}")
    
    # Return values to display in the interface
    return input_img, f"Your text response: {text_answer}", f"Your selected option: {multiple_choice}", f"Location: {city}, {country}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"

# Initialize the dataset
setup_hf_dataset()

gradio_app = gr.Interface(
    process_submission,
    inputs=[
        gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil"),
        gr.Textbox(label="The image portrays culturally-relevant information in:", placeholder="what culture does this image represent?"),

        gr.Textbox(label="City where the image was taken:", placeholder="Enter city name"),
        gr.Textbox(label="Country where the image was taken:", placeholder="Enter country name"),
        gr.Radio(
            choices=[
                "Yes. Unique to South Asia",
                "Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.",
                "Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia",
                "Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia",
                "No. Totally unrelated to South Asia"
            ],
            label="Is the image culturally relevant in South Asia?"
        ),
        gr.Radio(
            choices=[
                "I'm from this country/culture",
                "I checked online resources (e.g., Wikipedia, articles, blogs)"
            ],
            label="How do you know about this culture?",
            info="Please do not consult LLMs (e.g., GPT-4o, Claude, Command-R, etc.)"
        ),
        gr.Textbox(label="Caption in Native Language:", placeholder="Enter caption in the native language of the culture depicted"),
        gr.Textbox(label="English Caption:", placeholder="Enter caption in English")
    ],
    outputs=[
        gr.Image(label="Submitted Image"),
        gr.Text(label="Text Response"),
        gr.Text(label="Multiple Choice Response"),
        gr.Text(label="Location Information"),
        gr.Text(label="South Asia Cultural Relevance"),
        gr.Text(label="Cultural Knowledge Source"),
        gr.Text(label="Native Language Caption"),
        gr.Text(label="English Caption")
    ],
    title="South Asian Image Data Collection",
    description="Upload an image and answer questions about its cultural significance."
)

if __name__ == "__main__":
    gradio_app.launch()