import gradio as gr import pandas as pd from datetime import datetime import pytz import os def process_file(uploaded_file): # 1. Read Excel file (first sheet automatically) fname = uploaded_file.name.lower() if not fname.endswith(('.xls', '.xlsx', '.xlsm')): return "❌ Unsupported format. Please upload .xls/.xlsx/.xlsm", None try: df = pd.read_excel(uploaded_file.name) except Exception as e: return f"❌ Error reading file: {e}", None # 2. Define the output headers in order output_headers = [ "Usage", "District", "Address", "Longitude", "Latitude", "Floor", "Unit", "Area", "PriceInMillion", "PricePerSquareFeet", "InstrumentDate", "Year", "WeekNumber", "DeliveryDate", "MemoNo." ] # 3. Prepare an empty DataFrame output_df = pd.DataFrame(index=df.index, columns=output_headers) # 4. Map the first 7 columns from the input to the relevant fields: # Col 1 → Address # Col 2 → Floor # Col 3 → Unit # Col 4 → Area # Col 5 → PriceInMillion # Col 6 → PricePerSquareFeet # Col 7 → InstrumentDate output_df["Address"] = df.iloc[:, 0] output_df["Floor"] = df.iloc[:, 1] output_df["Unit"] = df.iloc[:, 2] output_df["Area"] = df.iloc[:, 3] output_df["PriceInMillion"] = df.iloc[:, 4] output_df["PricePerSquareFeet"] = df.iloc[:, 5] output_df["InstrumentDate"] = pd.to_datetime(df.iloc[:, 6], errors="coerce") # 5. Derive Year and ISO Week Number from InstrumentDate # Any invalid dates become NaT and yield NaN year/week output_df["Year"] = output_df["InstrumentDate"].dt.year output_df["WeekNumber"] = output_df["InstrumentDate"].dt.isocalendar().week # 6. Leave Usage, District, Longitude, Latitude, DeliveryDate, MemoNo. empty # (or populate them here if you have logic to do so) # 7. Generate filename based on Hong Kong date hk_tz = pytz.timezone("Asia/Hong_Kong") today_hk = datetime.now(hk_tz).strftime("%y%m%d") out_fname = f"data-clean-{today_hk}.xlsx" # 8. Save to Excel (in the current working directory) output_df.to_excel(out_fname, index=False) # Return the DataFrame for preview and the path to download return output_df, out_fname with gr.Blocks(title="Data Cleaner") as demo: gr.Markdown("## 🗂️ Excel → Cleaned Data Mapping") with gr.Row(): file_input = gr.File(label="Upload .xls/.xlsx/.xlsm") run_btn = gr.Button("Process") with gr.Row(): df_out = gr.DataFrame(label="Mapped Data Preview") download_btn = gr.File(label="Download Cleaned File") run_btn.click( fn=process_file, inputs=[file_input], outputs=[df_out, download_btn] ) if __name__ == "__main__": demo.launch()