Spaces:

leadingbridge
/

data-clean

Running

App Files Files Community

data-clean / app.py

leadingbridge

Update app.py

f440bdc verified 6 days ago

raw

history blame contribute delete

2.9 kB

	import gradio as gr
	import pandas as pd
	from datetime import datetime
	import pytz
	import os

	def process_file(uploaded_file):
	# 1. Read Excel file (first sheet automatically)
	fname = uploaded_file.name.lower()
	if not fname.endswith(('.xls', '.xlsx', '.xlsm')):
	return "❌ Unsupported format. Please upload .xls/.xlsx/.xlsm", None

	try:
	df = pd.read_excel(uploaded_file.name)
	except Exception as e:
	return f"❌ Error reading file: {e}", None

	# 2. Define the output headers in order
	output_headers = [
	"Usage", "District", "Address", "Longitude", "Latitude",
	"Floor", "Unit", "Area", "PriceInMillion", "PricePerSquareFeet",
	"InstrumentDate", "Year", "WeekNumber", "DeliveryDate", "MemoNo."
	]

	# 3. Prepare an empty DataFrame
	output_df = pd.DataFrame(index=df.index, columns=output_headers)

	# 4. Map the first 7 columns from the input to the relevant fields:
	# Col 1 → Address
	# Col 2 → Floor
	# Col 3 → Unit
	# Col 4 → Area
	# Col 5 → PriceInMillion
	# Col 6 → PricePerSquareFeet
	# Col 7 → InstrumentDate
	output_df["Address"] = df.iloc[:, 0]
	output_df["Floor"] = df.iloc[:, 1]
	output_df["Unit"] = df.iloc[:, 2]
	output_df["Area"] = df.iloc[:, 3]
	output_df["PriceInMillion"] = df.iloc[:, 4]
	output_df["PricePerSquareFeet"] = df.iloc[:, 5]
	output_df["InstrumentDate"] = pd.to_datetime(df.iloc[:, 6], errors="coerce")

	# 5. Derive Year and ISO Week Number from InstrumentDate
	# Any invalid dates become NaT and yield NaN year/week
	output_df["Year"] = output_df["InstrumentDate"].dt.year
	output_df["WeekNumber"] = output_df["InstrumentDate"].dt.isocalendar().week

	# 6. Leave Usage, District, Longitude, Latitude, DeliveryDate, MemoNo. empty
	# (or populate them here if you have logic to do so)

	# 7. Generate filename based on Hong Kong date
	hk_tz = pytz.timezone("Asia/Hong_Kong")
	today_hk = datetime.now(hk_tz).strftime("%y%m%d")
	out_fname = f"data-clean-{today_hk}.xlsx"

	# 8. Save to Excel (in the current working directory)
	output_df.to_excel(out_fname, index=False)

	# Return the DataFrame for preview and the path to download
	return output_df, out_fname

	with gr.Blocks(title="Data Cleaner") as demo:
	gr.Markdown("## 🗂️ Excel → Cleaned Data Mapping")
	with gr.Row():
	file_input = gr.File(label="Upload .xls/.xlsx/.xlsm")
	run_btn = gr.Button("Process")
	with gr.Row():
	df_out = gr.DataFrame(label="Mapped Data Preview")
	download_btn = gr.File(label="Download Cleaned File")
	run_btn.click(
	fn=process_file,
	inputs=[file_input],
	outputs=[df_out, download_btn]
	)

	if __name__ == "__main__":
	demo.launch()