Peter Yang commited on
Commit
0955ea8
Β·
1 Parent(s): de0f1e2

fix the missing part

Browse files
pages/15_πŸ“ˆ_Table_Data_Cleaning.py CHANGED
@@ -7,77 +7,77 @@ import base64
7
  # st.set_page_config(layout="wide")
8
 
9
  # Function for the CSV Visualization App
10
- def app():
11
-
12
- hide_streamlit_style = """
13
- <style>
14
- #MainMenu {visibility: hidden;}
15
- footer {visibility: hidden;}
16
- </style>
17
- """
18
- st.markdown(hide_streamlit_style, unsafe_allow_html=True)
19
-
20
- st.title("CSV Data Cleaning Tool")
21
-
22
- st.markdown("Upload one or multiple CSV files to preprocess and clean your files quickly and stress free.")
23
-
24
- uploaded_files = st.file_uploader("Choose CSV files", type="csv", accept_multiple_files=True)
25
-
26
- dataframes = []
27
-
28
- if uploaded_files:
29
- for file in uploaded_files:
30
- file.seek(0)
31
- df = pd.read_csv(file)
32
- dataframes.append(df)
33
-
34
- if len(dataframes) > 1:
35
- merge = st.checkbox("Merge uploaded CSV files")
36
-
37
- if merge:
38
- # Merge options
39
- keep_first_header_only = st.selectbox("Keep only the header (first row) of the first file", ["Yes", "No"])
40
- remove_duplicate_rows = st.selectbox("Remove duplicate rows", ["No", "Yes"])
41
- remove_empty_rows = st.selectbox("Remove empty rows", ["Yes", "No"])
42
- end_line = st.selectbox("End line", ["\\n", "\\r\\n"])
43
-
44
- try:
45
- if keep_first_header_only == "Yes":
46
- for i, df in enumerate(dataframes[1:]):
47
- df.columns = dataframes[0].columns.intersection(df.columns)
48
- dataframes[i+1] = df
49
-
50
- merged_df = pd.concat(dataframes, ignore_index=True, join='outer')
51
-
52
- if remove_duplicate_rows == "Yes":
53
- merged_df.drop_duplicates(inplace=True)
54
-
55
- if remove_empty_rows == "Yes":
56
- merged_df.dropna(how="all", inplace=True)
57
-
58
- dataframes = [merged_df]
59
-
60
- except ValueError as e:
61
- st.error("Please make sure columns match in all files. If you don't want them to match, select 'No' in the first option.")
62
- st.stop()
63
-
64
- # Show or hide DataFrames
65
- show_dataframes = st.checkbox("Show DataFrames", value=True)
66
-
67
- if show_dataframes:
68
- for i, df in enumerate(dataframes):
69
- st.write(f"DataFrame {i + 1}")
70
- st.dataframe(df)
71
-
72
- if st.button("Download cleaned data"):
73
- for i, df in enumerate(dataframes):
74
- csv = df.to_csv(index=False)
75
- b64 = base64.b64encode(csv.encode()).decode()
76
- href = f'<a href="data:file/csv;base64,{b64}" download="cleaned_data_{i + 1}.csv">Download cleaned_data_{i + 1}.csv</a>'
77
- st.markdown(href, unsafe_allow_html=True)
78
- else:
79
- st.warning("Please upload CSV file(s).")
80
- st.stop()
81
- app()
82
 
83
 
 
7
  # st.set_page_config(layout="wide")
8
 
9
  # Function for the CSV Visualization App
10
+ # def app():
11
+
12
+ hide_streamlit_style = """
13
+ <style>
14
+ #MainMenu {visibility: hidden;}
15
+ footer {visibility: hidden;}
16
+ </style>
17
+ """
18
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
19
+
20
+ st.title("CSV Data Cleaning Tool")
21
+
22
+ st.markdown("Upload one or multiple CSV files to preprocess and clean your files quickly and stress free.")
23
+
24
+ uploaded_files = st.file_uploader("Choose CSV files", type="csv", accept_multiple_files=True)
25
+
26
+ dataframes = []
27
+
28
+ if uploaded_files:
29
+ for file in uploaded_files:
30
+ file.seek(0)
31
+ df = pd.read_csv(file)
32
+ dataframes.append(df)
33
+
34
+ if len(dataframes) > 1:
35
+ merge = st.checkbox("Merge uploaded CSV files")
36
+
37
+ if merge:
38
+ # Merge options
39
+ keep_first_header_only = st.selectbox("Keep only the header (first row) of the first file", ["Yes", "No"])
40
+ remove_duplicate_rows = st.selectbox("Remove duplicate rows", ["No", "Yes"])
41
+ remove_empty_rows = st.selectbox("Remove empty rows", ["Yes", "No"])
42
+ end_line = st.selectbox("End line", ["\\n", "\\r\\n"])
43
+
44
+ try:
45
+ if keep_first_header_only == "Yes":
46
+ for i, df in enumerate(dataframes[1:]):
47
+ df.columns = dataframes[0].columns.intersection(df.columns)
48
+ dataframes[i+1] = df
49
+
50
+ merged_df = pd.concat(dataframes, ignore_index=True, join='outer')
51
+
52
+ if remove_duplicate_rows == "Yes":
53
+ merged_df.drop_duplicates(inplace=True)
54
+
55
+ if remove_empty_rows == "Yes":
56
+ merged_df.dropna(how="all", inplace=True)
57
+
58
+ dataframes = [merged_df]
59
+
60
+ except ValueError as e:
61
+ st.error("Please make sure columns match in all files. If you don't want them to match, select 'No' in the first option.")
62
+ st.stop()
63
+
64
+ # Show or hide DataFrames
65
+ show_dataframes = st.checkbox("Show DataFrames", value=True)
66
+
67
+ if show_dataframes:
68
+ for i, df in enumerate(dataframes):
69
+ st.write(f"DataFrame {i + 1}")
70
+ st.dataframe(df)
71
+
72
+ if st.button("Download cleaned data"):
73
+ for i, df in enumerate(dataframes):
74
+ csv = df.to_csv(index=False)
75
+ b64 = base64.b64encode(csv.encode()).decode()
76
+ href = f'<a href="data:file/csv;base64,{b64}" download="cleaned_data_{i + 1}.csv">Download cleaned_data_{i + 1}.csv</a>'
77
+ st.markdown(href, unsafe_allow_html=True)
78
+ else:
79
+ st.warning("Please upload CSV file(s).")
80
+ st.stop()
81
+ # app()
82
 
83