Spaces:
Running
Running
File size: 2,901 Bytes
86bed75 14d7d35 f440bdc 0a5207e f440bdc 0a5207e f440bdc 0a5207e f440bdc 07f4651 bd7b118 f440bdc bd7b118 0a5207e f440bdc 0a5207e f440bdc 0a5207e f440bdc 86bed75 f440bdc 0a5207e f440bdc 0a5207e f440bdc 86bed75 f440bdc 86bed75 f440bdc 0a5207e f440bdc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
import pandas as pd
from datetime import datetime
import pytz
import os
def process_file(uploaded_file):
# 1. Read Excel file (first sheet automatically)
fname = uploaded_file.name.lower()
if not fname.endswith(('.xls', '.xlsx', '.xlsm')):
return "β Unsupported format. Please upload .xls/.xlsx/.xlsm", None
try:
df = pd.read_excel(uploaded_file.name)
except Exception as e:
return f"β Error reading file: {e}", None
# 2. Define the output headers in order
output_headers = [
"Usage", "District", "Address", "Longitude", "Latitude",
"Floor", "Unit", "Area", "PriceInMillion", "PricePerSquareFeet",
"InstrumentDate", "Year", "WeekNumber", "DeliveryDate", "MemoNo."
]
# 3. Prepare an empty DataFrame
output_df = pd.DataFrame(index=df.index, columns=output_headers)
# 4. Map the first 7 columns from the input to the relevant fields:
# Col 1 β Address
# Col 2 β Floor
# Col 3 β Unit
# Col 4 β Area
# Col 5 β PriceInMillion
# Col 6 β PricePerSquareFeet
# Col 7 β InstrumentDate
output_df["Address"] = df.iloc[:, 0]
output_df["Floor"] = df.iloc[:, 1]
output_df["Unit"] = df.iloc[:, 2]
output_df["Area"] = df.iloc[:, 3]
output_df["PriceInMillion"] = df.iloc[:, 4]
output_df["PricePerSquareFeet"] = df.iloc[:, 5]
output_df["InstrumentDate"] = pd.to_datetime(df.iloc[:, 6], errors="coerce")
# 5. Derive Year and ISO Week Number from InstrumentDate
# Any invalid dates become NaT and yield NaN year/week
output_df["Year"] = output_df["InstrumentDate"].dt.year
output_df["WeekNumber"] = output_df["InstrumentDate"].dt.isocalendar().week
# 6. Leave Usage, District, Longitude, Latitude, DeliveryDate, MemoNo. empty
# (or populate them here if you have logic to do so)
# 7. Generate filename based on Hong Kong date
hk_tz = pytz.timezone("Asia/Hong_Kong")
today_hk = datetime.now(hk_tz).strftime("%y%m%d")
out_fname = f"data-clean-{today_hk}.xlsx"
# 8. Save to Excel (in the current working directory)
output_df.to_excel(out_fname, index=False)
# Return the DataFrame for preview and the path to download
return output_df, out_fname
with gr.Blocks(title="Data Cleaner") as demo:
gr.Markdown("## ποΈ Excel β Cleaned Data Mapping")
with gr.Row():
file_input = gr.File(label="Upload .xls/.xlsx/.xlsm")
run_btn = gr.Button("Process")
with gr.Row():
df_out = gr.DataFrame(label="Mapped Data Preview")
download_btn = gr.File(label="Download Cleaned File")
run_btn.click(
fn=process_file,
inputs=[file_input],
outputs=[df_out, download_btn]
)
if __name__ == "__main__":
demo.launch()
|