crowdsource

Running

App Files Files Community

crowdsource / app.py

minemaster01

updated city and state

0bd155b verified 12 days ago

raw

history blame

7.94 kB

	import gradio as gr
	import os
	import json
	import uuid
	from datetime import datetime
	import shutil
	from huggingface_hub import HfApi, create_repo, upload_file, upload_folder

	# Create directories for data storage
	os.makedirs("uploaded_images", exist_ok=True)
	os.makedirs("submissions", exist_ok=True)

	# Hugging Face Dataset configuration
	HF_TOKEN = os.environ.get("HF_TOKEN") # You'll need to set this as a secret in your Space
	DATASET_NAME = "srishtiy/se-culture-dataset-results" # Change to your username/dataset-name
	DATASET_CREATED = False

	def setup_hf_dataset():
	"""Initialize the Hugging Face dataset if it doesn't exist"""
	global DATASET_CREATED
	if not DATASET_CREATED and HF_TOKEN:
	try:
	api = HfApi()
	create_repo(
	DATASET_NAME,
	repo_type="dataset",
	token=HF_TOKEN,
	exist_ok=True
	)
	DATASET_CREATED = True
	print(f"Dataset {DATASET_NAME} is ready")
	except Exception as e:
	print(f"Error setting up dataset: {e}")
	elif not HF_TOKEN:
	print("Warning: HF_TOKEN not set. Data will be stored locally only.")

	def process_submission(input_img, text_answer, multiple_choice, city, country, se_asia_relevance, culture_knowledge, native_caption, english_caption):
	# Generate unique ID for this submission
	submission_id = str(uuid.uuid4())
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

	# Save the image if provided
	image_path = None
	if input_img is not None:
	# Create filename with submission ID
	image_filename = f"{timestamp}_{submission_id}.jpg"
	image_path = os.path.join("uploaded_images", image_filename)

	# Save the image
	if isinstance(input_img, str): # If it's a file path
	shutil.copy(input_img, image_path)
	else: # If it's a PIL Image
	input_img.save(image_path)

	# Create a data structure for the submission
	submission_data = {
	"id": submission_id,
	"timestamp": timestamp,
	"image_filename": os.path.basename(image_path) if image_path else None,
	"cultural_relevance": text_answer,
	"continent": multiple_choice,
	"city": city,
	"country": country,
	"se_asia_relevance": se_asia_relevance,
	"cultural_knowledge_source": culture_knowledge,
	"native_caption": native_caption,
	"english_caption": english_caption
	}

	# Save the data as JSON
	json_filename = f"{timestamp}_{submission_id}.json"
	json_path = os.path.join("submissions", json_filename)
	with open(json_path, "w") as f:
	json.dump(submission_data, f, indent=2)

	# Upload to Hugging Face Dataset if token is available
	if HF_TOKEN and DATASET_CREATED:
	try:
	api = HfApi()
	# Upload the JSON data
	api.upload_file(
	path_or_fileobj=json_path,
	path_in_repo=f"submissions/{json_filename}",
	repo_id=DATASET_NAME,
	repo_type="dataset",
	token=HF_TOKEN
	)
	# Upload the image if it exists
	if image_path and os.path.exists(image_path):
	api.upload_file(
	path_or_fileobj=image_path,
	path_in_repo=f"images/{os.path.basename(image_path)}",
	repo_id=DATASET_NAME,
	repo_type="dataset",
	token=HF_TOKEN
	)
	print(f"Submission {submission_id} uploaded to Hugging Face Dataset")
	except Exception as e:
	print(f"Error uploading to dataset: {e}")

	# Return values to display in the interface
	return input_img, f"Your text response: {text_answer}", f"Your selected option: {multiple_choice}", f"Location: {city}, {country}", f"SE Asia relevance: {se_asia_relevance}", f"Cultural knowledge source: {culture_knowledge}", f"Native caption: {native_caption}", f"English caption: {english_caption}"

	# Initialize the dataset
	setup_hf_dataset()

	gradio_app = gr.Interface(
	process_submission,
	inputs=[
	with gr.Blocks() as gradio_app:
	selected_country = gr.Dropdown(
	choices=list(country_to_states.keys()),
	label="Country where the image was taken:",
	interactive=True
	)
	selected_state = gr.Dropdown(
	choices=[],
	label="State / Province",
	interactive=True
	)
	city_field = gr.Textbox(label="City (optional)")

	image_input = gr.Image(label="Upload an image", sources=['upload', 'webcam'], type="pil")
	cultural_text = gr.Textbox(label="The image portrays culturally-relevant information in:")

	sa_relevance = gr.Radio(
	choices=[
	"Yes. Unique to South Asia",
	"Yes, people will likely think of South Asia when seeing the picture, but it may have low degree of similarity to other cultures.",
	"Maybe, this culture did not originate from South Asia, but it's quite dominant in South Asia",
	"Not really. It has some affiliation to South Asia, but actually does not represent South Asia or has stronger affiliation to cultures outside South Asia",
	"No. Totally unrelated to South Asia"
	],
	label="Is the image culturally relevant in South Asia?"
	)

	knowledge_source = gr.Radio(
	choices=[
	"I'm from this country/culture",
	"I checked online resources (e.g., Wikipedia, articles, blogs)"
	],
	label="How do you know about this culture?",
	info="Please do not consult LLMs"
	)

	native_caption = gr.Textbox(label="Caption in Native Language:")
	english_caption = gr.Textbox(label="English Caption:")

	output_components = [
	gr.Image(label="Submitted Image"),
	gr.Text(label="Text Response"),
	gr.Text(label="Country and State"),
	gr.Text(label="Location Information"),
	gr.Text(label="South Asia Cultural Relevance"),
	gr.Text(label="Cultural Knowledge Source"),
	gr.Text(label="Native Language Caption"),
	gr.Text(label="English Caption")
	]

	# Link dynamic dropdown update
	def update_states(selected_country):
	return gr.Dropdown.update(choices=country_to_states.get(selected_country, []), value=None)

	selected_country.change(fn=update_states, inputs=selected_country, outputs=selected_state)

	submit_btn = gr.Button("Submit")

	submit_btn.click(
	process_submission,
	inputs=[
	image_input,
	cultural_text,
	selected_country,
	selected_state,
	city_field,
	sa_relevance,
	knowledge_source,
	native_caption,
	english_caption
	],
	outputs=output_components
	)

	gr.Markdown("## South Asian Image Data Collection")
	gr.Markdown("Upload an image and answer questions about its cultural significance.")
	],
	outputs=[
	gr.Image(label="Submitted Image"),
	gr.Text(label="Text Response"),
	gr.Text(label="Multiple Choice Response"),
	gr.Text(label="Location Information"),
	gr.Text(label="South Asia Cultural Relevance"),
	gr.Text(label="Cultural Knowledge Source"),
	gr.Text(label="Native Language Caption"),
	gr.Text(label="English Caption")
	],
	title="South Asian Image Data Collection",
	description="Upload an image and answer questions about its cultural significance."
	)

	if __name__ == "__main__":
	gradio_app.launch()