Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
from datetime import datetime | |
import pytz | |
import os | |
def process_file(uploaded_file): | |
# 1. Read Excel file (first sheet automatically) | |
fname = uploaded_file.name.lower() | |
if not fname.endswith(('.xls', '.xlsx', '.xlsm')): | |
return "β Unsupported format. Please upload .xls/.xlsx/.xlsm", None | |
try: | |
df = pd.read_excel(uploaded_file.name) | |
except Exception as e: | |
return f"β Error reading file: {e}", None | |
# 2. Define the output headers in order | |
output_headers = [ | |
"Usage", "District", "Address", "Longitude", "Latitude", | |
"Floor", "Unit", "Area", "PriceInMillion", "PricePerSquareFeet", | |
"InstrumentDate", "Year", "WeekNumber", "DeliveryDate", "MemoNo." | |
] | |
# 3. Prepare an empty DataFrame | |
output_df = pd.DataFrame(index=df.index, columns=output_headers) | |
# 4. Map the first 7 columns from the input to the relevant fields: | |
# Col 1 β Address | |
# Col 2 β Floor | |
# Col 3 β Unit | |
# Col 4 β Area | |
# Col 5 β PriceInMillion | |
# Col 6 β PricePerSquareFeet | |
# Col 7 β InstrumentDate | |
output_df["Address"] = df.iloc[:, 0] | |
output_df["Floor"] = df.iloc[:, 1] | |
output_df["Unit"] = df.iloc[:, 2] | |
output_df["Area"] = df.iloc[:, 3] | |
output_df["PriceInMillion"] = df.iloc[:, 4] | |
output_df["PricePerSquareFeet"] = df.iloc[:, 5] | |
output_df["InstrumentDate"] = pd.to_datetime(df.iloc[:, 6], errors="coerce") | |
# 5. Derive Year and ISO Week Number from InstrumentDate | |
# Any invalid dates become NaT and yield NaN year/week | |
output_df["Year"] = output_df["InstrumentDate"].dt.year | |
output_df["WeekNumber"] = output_df["InstrumentDate"].dt.isocalendar().week | |
# 6. Leave Usage, District, Longitude, Latitude, DeliveryDate, MemoNo. empty | |
# (or populate them here if you have logic to do so) | |
# 7. Generate filename based on Hong Kong date | |
hk_tz = pytz.timezone("Asia/Hong_Kong") | |
today_hk = datetime.now(hk_tz).strftime("%y%m%d") | |
out_fname = f"data-clean-{today_hk}.xlsx" | |
# 8. Save to Excel (in the current working directory) | |
output_df.to_excel(out_fname, index=False) | |
# Return the DataFrame for preview and the path to download | |
return output_df, out_fname | |
with gr.Blocks(title="Data Cleaner") as demo: | |
gr.Markdown("## ποΈ Excel β Cleaned Data Mapping") | |
with gr.Row(): | |
file_input = gr.File(label="Upload .xls/.xlsx/.xlsm") | |
run_btn = gr.Button("Process") | |
with gr.Row(): | |
df_out = gr.DataFrame(label="Mapped Data Preview") | |
download_btn = gr.File(label="Download Cleaned File") | |
run_btn.click( | |
fn=process_file, | |
inputs=[file_input], | |
outputs=[df_out, download_btn] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |