leadingbridge commited on
Commit
86bed75
·
verified ·
1 Parent(s): 0a5207e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -41
app.py CHANGED
@@ -1,68 +1,67 @@
 
1
  import pandas as pd
2
  from datetime import datetime
3
- import os
4
 
5
- def map_excel(input_path):
6
  # 1. Validate extension
7
- ext = os.path.splitext(input_path)[1].lower()
8
- if ext not in ('.xls', '.xlsx', '.xlsm'):
9
- raise ValueError("Unsupported file format: please use .xls, .xlsx, or .xlsm")
10
 
11
- # 2. Read the first sheet by position (0)
12
- df = pd.read_excel(input_path, sheet_name=0)
13
 
14
- # 3. Define output headers
15
- headers = [
16
  "Usage", "District", "Address", "Longitude", "Latitude",
17
  "Floor", "Unit", "Area", "PriceInMillion",
18
  "InstrumentDate", "Year", "WeekNumber",
19
  "DeliveryDate", "MemoNo."
20
  ]
 
21
 
22
- # 4. Prepare an empty DataFrame
23
- output_df = pd.DataFrame(index=range(len(df)), columns=headers)
24
-
25
- # 5. Map input columns by positional index:
26
- # Column 1 → AddressPricePerSquareFeet (we’ll assume this fills "Usage" or adjust as needed)
27
  output_df["Usage"] = df.iloc[:, 0]
28
-
29
- # Column 2 → Floor
30
  output_df["Floor"] = df.iloc[:, 1]
31
-
32
- # Column 3 → Unit
33
  output_df["Unit"] = df.iloc[:, 2]
34
-
35
- # Column 4 → Area
36
  output_df["Area"] = df.iloc[:, 3]
37
-
38
- # Column 5 → PriceInMillion
39
  output_df["PriceInMillion"] = df.iloc[:, 4]
40
-
41
- # Column 6 → PricePerSquareFeet (maps into "District" if that’s your intended field)
42
  output_df["District"] = df.iloc[:, 5]
 
 
43
 
44
- # Column 7 InstrumentDate
45
- output_df["InstrumentDate"] = pd.to_datetime(df.iloc[:, 6], errors='coerce')
46
-
47
- # 6. Derive Year and WeekNumber from InstrumentDate
48
  output_df["Year"] = output_df["InstrumentDate"].dt.year
49
  output_df["WeekNumber"] = output_df["InstrumentDate"].dt.isocalendar().week
50
 
51
- # 7. Optionally compute DeliveryDate or MemoNo. if you have rules
52
- # Here we leave them blank or copy from another column:
53
- # output_df["DeliveryDate"] = df.get("DeliveryDate", pd.NA)
54
- # output_df["MemoNo."] = df.get("MemoNo.", pd.NA)
55
 
56
- # 8. Generate output filename with current date suffix
57
  date_suffix = datetime.now().strftime("%y%m%d")
58
- output_filename = f"data-clean-{date_suffix}.xlsx"
 
 
 
59
 
60
- # 9. Save to Excel
61
- output_df.to_excel(output_filename, index=False)
62
 
63
- return output_filename
 
 
 
 
 
 
 
 
64
 
65
- if __name__ == "__main__":
66
- input_file = "your-input-file.xlsx" # replace with your path
67
- out_file = map_excel(input_file)
68
- print(f"Mapped data saved to: {out_file}")
 
1
+ import gradio as gr
2
  import pandas as pd
3
  from datetime import datetime
 
4
 
5
+ def process_file(file):
6
  # 1. Validate extension
7
+ name = file.name.lower()
8
+ if not name.endswith(('.xls', '.xlsx', '.xlsm')):
9
+ return "Error: Please upload a .xls, .xlsx or .xlsm file.", None
10
 
11
+ # 2. Read first sheet
12
+ df = pd.read_excel(file.name, sheet_name=0)
13
 
14
+ # 3. Prepare output headers
15
+ output_headers = [
16
  "Usage", "District", "Address", "Longitude", "Latitude",
17
  "Floor", "Unit", "Area", "PriceInMillion",
18
  "InstrumentDate", "Year", "WeekNumber",
19
  "DeliveryDate", "MemoNo."
20
  ]
21
+ output_df = pd.DataFrame("", index=range(len(df)), columns=output_headers)
22
 
23
+ # 4. Column‑by‑column mapping
24
+ # Column 1 Usage
 
 
 
25
  output_df["Usage"] = df.iloc[:, 0]
26
+ # Column 2 → Floor
 
27
  output_df["Floor"] = df.iloc[:, 1]
28
+ # Column 3 → Unit
 
29
  output_df["Unit"] = df.iloc[:, 2]
30
+ # Column 4 → Area
 
31
  output_df["Area"] = df.iloc[:, 3]
32
+ # Column 5 → PriceInMillion
 
33
  output_df["PriceInMillion"] = df.iloc[:, 4]
34
+ # Column 6 → District (mapped from PricePerSquareFeet)
 
35
  output_df["District"] = df.iloc[:, 5]
36
+ # Column 7 → InstrumentDate
37
+ output_df["InstrumentDate"] = pd.to_datetime(df.iloc[:, 6], errors="coerce")
38
 
39
+ # 5. Derive Year & WeekNumber from InstrumentDate
 
 
 
40
  output_df["Year"] = output_df["InstrumentDate"].dt.year
41
  output_df["WeekNumber"] = output_df["InstrumentDate"].dt.isocalendar().week
42
 
43
+ # 6. (Optional) leave DeliveryDate & MemoNo. blank
44
+ # or map from other columns if available:
45
+ # output_df["DeliveryDate"] = pd.to_datetime(df.get("DeliveryDate", pd.NA))
46
+ # output_df["MemoNo."] = df.get("MemoNo.", "")
47
 
48
+ # 7. Generate output filename: data-clean-YYMMDD.xlsx
49
  date_suffix = datetime.now().strftime("%y%m%d")
50
+ out_name = f"data-clean-{date_suffix}.xlsx"
51
+
52
+ # 8. Save to Excel
53
+ output_df.to_excel(out_name, index=False)
54
 
55
+ return output_df, out_name
 
56
 
57
+ with gr.Blocks(title="Excel → data‑clean Mapper") as demo:
58
+ gr.Markdown("## Upload your Excel file (.xls/.xlsx/.xlsm) for data‑clean mapping")
59
+ with gr.Row():
60
+ file_in = gr.File(label="Input File")
61
+ btn = gr.Button("Process")
62
+ with gr.Row():
63
+ df_out = gr.Dataframe(label="Mapped Data")
64
+ download = gr.File(label="Download Mapped File")
65
+ btn.click(fn=process_file, inputs=[file_in], outputs=[df_out, download])
66
 
67
+ demo.launch()