Spaces:

leadingbridge
/

data-clean

Running

App Files Files Community

leadingbridge commited on Apr 20

Commit

86bed75

verified ·

1 Parent(s): 0a5207e

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -41

app.py CHANGED Viewed

@@ -1,68 +1,67 @@
 import pandas as pd
 from datetime import datetime
-import os
-def map_excel(input_path):
     # 1. Validate extension
-    ext = os.path.splitext(input_path)[1].lower()
-    if ext not in ('.xls', '.xlsx', '.xlsm'):
-        raise ValueError("Unsupported file format: please use .xls, .xlsx, or .xlsm")
-    # 2. Read the first sheet by position (0)
-    df = pd.read_excel(input_path, sheet_name=0)
-    # 3. Define output headers
-    headers = [
         "Usage", "District", "Address", "Longitude", "Latitude",
         "Floor", "Unit", "Area", "PriceInMillion",
         "InstrumentDate", "Year", "WeekNumber",
         "DeliveryDate", "MemoNo."
     ]
-    # 4. Prepare an empty DataFrame
-    output_df = pd.DataFrame(index=range(len(df)), columns=headers)
-    # 5. Map input columns by positional index:
-    #    Column 1 → AddressPricePerSquareFeet  (we’ll assume this fills "Usage" or adjust as needed)
     output_df["Usage"] = df.iloc[:, 0]
-    #    Column 2 → Floor
     output_df["Floor"] = df.iloc[:, 1]
-    #    Column 3 → Unit
     output_df["Unit"] = df.iloc[:, 2]
-    #    Column 4 → Area
     output_df["Area"] = df.iloc[:, 3]
-    #    Column 5 → PriceInMillion
     output_df["PriceInMillion"] = df.iloc[:, 4]
-    #    Column 6 → PricePerSquareFeet (maps into "District" if that’s your intended field)
     output_df["District"] = df.iloc[:, 5]
-    #    Column 7 → InstrumentDate
-    output_df["InstrumentDate"] = pd.to_datetime(df.iloc[:, 6], errors='coerce')
-    # 6. Derive Year and WeekNumber from InstrumentDate
     output_df["Year"] = output_df["InstrumentDate"].dt.year
     output_df["WeekNumber"] = output_df["InstrumentDate"].dt.isocalendar().week
-    # 7. Optionally compute DeliveryDate or MemoNo. if you have rules
-    #    Here we leave them blank or copy from another column:
-    # output_df["DeliveryDate"] = df.get("DeliveryDate", pd.NA)
-    # output_df["MemoNo."] = df.get("MemoNo.", pd.NA)
-    # 8. Generate output filename with current date suffix
     date_suffix = datetime.now().strftime("%y%m%d")
-    output_filename = f"data-clean-{date_suffix}.xlsx"
-    # 9. Save to Excel
-    output_df.to_excel(output_filename, index=False)
-    return output_filename
-if __name__ == "__main__":
-    input_file = "your-input-file.xlsx"  # replace with your path
-    out_file = map_excel(input_file)
-    print(f"Mapped data saved to: {out_file}")

+import gradio as gr
 import pandas as pd
 from datetime import datetime
+def process_file(file):
     # 1. Validate extension
+    name = file.name.lower()
+    if not name.endswith(('.xls', '.xlsx', '.xlsm')):
+        return "Error: Please upload a .xls, .xlsx or .xlsm file.", None
+    # 2. Read first sheet
+    df = pd.read_excel(file.name, sheet_name=0)
+    # 3. Prepare output headers
+    output_headers = [
         "Usage", "District", "Address", "Longitude", "Latitude",
         "Floor", "Unit", "Area", "PriceInMillion",
         "InstrumentDate", "Year", "WeekNumber",
         "DeliveryDate", "MemoNo."
     ]
+    output_df = pd.DataFrame("", index=range(len(df)), columns=output_headers)
+    # 4. Column‑by‑column mapping
+    #  Column 1 → Usage
     output_df["Usage"] = df.iloc[:, 0]
+    #  Column 2 → Floor
     output_df["Floor"] = df.iloc[:, 1]
+    #  Column 3 → Unit
     output_df["Unit"] = df.iloc[:, 2]
+    #  Column 4 → Area
     output_df["Area"] = df.iloc[:, 3]
+    #  Column 5 → PriceInMillion
     output_df["PriceInMillion"] = df.iloc[:, 4]
+    #  Column 6 → District  (mapped from PricePerSquareFeet)
     output_df["District"] = df.iloc[:, 5]
+    #  Column 7 → InstrumentDate
+    output_df["InstrumentDate"] = pd.to_datetime(df.iloc[:, 6], errors="coerce")
+    # 5. Derive Year & WeekNumber from InstrumentDate
     output_df["Year"] = output_df["InstrumentDate"].dt.year
     output_df["WeekNumber"] = output_df["InstrumentDate"].dt.isocalendar().week
+    # 6. (Optional) leave DeliveryDate & MemoNo. blank
+    #     or map from other columns if available:
+    # output_df["DeliveryDate"] = pd.to_datetime(df.get("DeliveryDate", pd.NA))
+    # output_df["MemoNo."] = df.get("MemoNo.", "")
+    # 7. Generate output filename: data-clean-YYMMDD.xlsx
     date_suffix = datetime.now().strftime("%y%m%d")
+    out_name = f"data-clean-{date_suffix}.xlsx"
+    # 8. Save to Excel
+    output_df.to_excel(out_name, index=False)
+    return output_df, out_name
+with gr.Blocks(title="Excel → data‑clean Mapper") as demo:
+    gr.Markdown("## Upload your Excel file (.xls/.xlsx/.xlsm) for data‑clean mapping")
+    with gr.Row():
+        file_in = gr.File(label="Input File")
+        btn = gr.Button("Process")
+    with gr.Row():
+        df_out = gr.Dataframe(label="Mapped Data")
+        download = gr.File(label="Download Mapped File")
+    btn.click(fn=process_file, inputs=[file_in], outputs=[df_out, download])
+demo.launch()