NextDrought commited on
Commit
ad49fd9
Β·
1 Parent(s): b4c9157

Update to have cleaning data function

Browse files
pages/14_πŸ“ˆ_Table_Data_Visualization.py CHANGED
@@ -2,111 +2,101 @@ import streamlit as st
2
  import pandas as pd
3
  import seaborn as sns
4
  import matplotlib.pyplot as plt
 
 
 
5
 
6
  st.set_page_config(layout="wide")
7
 
8
  # Function for the CSV Visualization App
9
  def app():
10
- st.title('CSV Data Visualization')
11
 
 
 
12
  # File uploader allows user to add their own CSV
13
- uploaded_file = st.file_uploader("Upload your input CSV file", type=["csv"])
14
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  # Pandas DataFrame is created from the CSV file
16
- if uploaded_file is not None:
17
- df = pd.read_csv(uploaded_file)
18
- st.write(df) # Display the dataframe on the app
19
 
20
- # Create a selectbox for user to choose the column to visualize
21
- columns = df.columns.tolist()
22
- selected_column = st.selectbox('Select a column to visualize', columns)
23
 
24
- # Using seaborn to create a count plot
25
- fig, ax = plt.subplots()
26
- sns.countplot(data=df, x=selected_column, ax=ax)
27
- plt.xticks(rotation=45) # Rotate X-axis labels to 45 degrees
28
- # Show the plot
29
- st.pyplot(fig)
30
 
31
  app()
32
 
33
- # import io
34
- # import base64
35
-
36
- # st.set_page_config(page_title="Data Cleaning Tool")
37
-
38
- # hide_streamlit_style = """
39
- # <style>
40
- # #MainMenu {visibility: hidden;}
41
- # footer {visibility: hidden;}
42
- # </style>
43
- # """
44
- # st.markdown(hide_streamlit_style, unsafe_allow_html=True)
45
-
46
- # st.title("CSV Data Cleaning Tool")
47
-
48
- # st.markdown("Upload one or multiple CSV files to preprocess and clean your files quickly and stress free.")
49
-
50
- # uploaded_files = st.file_uploader("Choose CSV files", type="csv", accept_multiple_files=True)
51
-
52
- # dataframes = []
53
-
54
- # if uploaded_files:
55
- # for file in uploaded_files:
56
- # file.seek(0)
57
- # df = pd.read_csv(file)
58
- # dataframes.append(df)
59
-
60
- # if len(dataframes) > 1:
61
- # merge = st.checkbox("Merge uploaded CSV files")
62
-
63
- # if merge:
64
- # # Merge options
65
- # keep_first_header_only = st.selectbox("Keep only the header (first row) of the first file", ["Yes", "No"])
66
- # remove_duplicate_rows = st.selectbox("Remove duplicate rows", ["No", "Yes"])
67
- # remove_empty_rows = st.selectbox("Remove empty rows", ["Yes", "No"])
68
- # end_line = st.selectbox("End line", ["\\n", "\\r\\n"])
69
-
70
- # try:
71
- # if keep_first_header_only == "Yes":
72
- # for i, df in enumerate(dataframes[1:]):
73
- # df.columns = dataframes[0].columns.intersection(df.columns)
74
- # dataframes[i+1] = df
75
-
76
- # merged_df = pd.concat(dataframes, ignore_index=True, join='outer')
77
-
78
- # if remove_duplicate_rows == "Yes":
79
- # merged_df.drop_duplicates(inplace=True)
80
-
81
- # if remove_empty_rows == "Yes":
82
- # merged_df.dropna(how="all", inplace=True)
83
-
84
- # dataframes = [merged_df]
85
-
86
- # except ValueError as e:
87
- # st.error("Please make sure columns match in all files. If you don't want them to match, select 'No' in the first option.")
88
- # st.stop()
89
-
90
- # # Show or hide DataFrames
91
- # show_dataframes = st.checkbox("Show DataFrames", value=True)
92
-
93
- # if show_dataframes:
94
- # for i, df in enumerate(dataframes):
95
- # st.write(f"DataFrame {i + 1}")
96
- # st.dataframe(df)
97
-
98
- # if st.button("Download cleaned data"):
99
- # for i, df in enumerate(dataframes):
100
- # csv = df.to_csv(index=False)
101
- # b64 = base64.b64encode(csv.encode()).decode()
102
- # href = f'<a href="data:file/csv;base64,{b64}" download="cleaned_data_{i + 1}.csv">Download cleaned_data_{i + 1}.csv</a>'
103
- # st.markdown(href, unsafe_allow_html=True)
104
- # else:
105
- # st.warning("Please upload CSV file(s).")
106
- # st.stop()
107
-
108
- # st.markdown("")
109
- # st.markdown("---")
110
- # st.markdown("")
111
- # st.markdown("<p style='text-align: center'><a href='https://github.com/Kaludii'>Github</a> | <a href='https://huggingface.co/Kaludi'>HuggingFace</a></p>", unsafe_allow_html=True)
112
 
 
2
  import pandas as pd
3
  import seaborn as sns
4
  import matplotlib.pyplot as plt
5
+ import io
6
+ import base64
7
+
8
 
9
  st.set_page_config(layout="wide")
10
 
11
  # Function for the CSV Visualization App
12
  def app():
13
+ st.title('CSV Data Cleaning and Visualization')
14
 
15
+ st.markdown("Upload one or multiple CSV files to preprocess and clean your files quickly and stress free.")
16
+
17
  # File uploader allows user to add their own CSV
18
+
19
+ uploaded_files = st.file_uploader("Choose CSV files", type="csv", accept_multiple_files=True)
20
+
21
+ # dataframes = []
22
+
23
+ if uploaded_files:
24
+ for file in uploaded_files:
25
+ file.seek(0)
26
+ df = pd.read_csv(file)
27
+ dataframes.append(df)
28
+
29
+ if len(dataframes) > 1:
30
+ merge = st.checkbox("Merge uploaded CSV files")
31
+
32
+ if merge:
33
+ # Merge options
34
+ keep_first_header_only = st.selectbox("Keep only the header (first row) of the first file", ["Yes", "No"])
35
+ remove_duplicate_rows = st.selectbox("Remove duplicate rows", ["No", "Yes"])
36
+ remove_empty_rows = st.selectbox("Remove empty rows", ["Yes", "No"])
37
+ end_line = st.selectbox("End line", ["\\n", "\\r\\n"])
38
+
39
+ try:
40
+ if keep_first_header_only == "Yes":
41
+ for i, df in enumerate(dataframes[1:]):
42
+ df.columns = dataframes[0].columns.intersection(df.columns)
43
+ dataframes[i+1] = df
44
+
45
+ merged_df = pd.concat(dataframes, ignore_index=True, join='outer')
46
+
47
+ if remove_duplicate_rows == "Yes":
48
+ merged_df.drop_duplicates(inplace=True)
49
+
50
+ if remove_empty_rows == "Yes":
51
+ merged_df.dropna(how="all", inplace=True)
52
+
53
+ dataframes = [merged_df]
54
+
55
+ except ValueError as e:
56
+ st.error("Please make sure columns match in all files. If you don't want them to match, select 'No' in the first option.")
57
+ st.stop()
58
+
59
+ # Show or hide DataFrames
60
+ show_dataframes = st.checkbox("Show DataFrames", value=True)
61
+
62
+ if show_dataframes:
63
+ for i, df in enumerate(dataframes):
64
+ st.write(f"DataFrame {i + 1}")
65
+ st.dataframe(df)
66
+
67
+ if st.button("Download cleaned data"):
68
+ for i, df in enumerate(dataframes):
69
+ csv = df.to_csv(index=False)
70
+ b64 = base64.b64encode(csv.encode()).decode()
71
+ href = f'<a href="data:file/csv;base64,{b64}" download="cleaned_data_{i + 1}.csv">Download cleaned_data_{i + 1}.csv</a>'
72
+ st.markdown(href, unsafe_allow_html=True)
73
+ else:
74
+ st.warning("Please upload CSV file(s).")
75
+ st.stop()
76
+
77
+ st.markdown("")
78
+ st.markdown("---")
79
+ st.markdown("")
80
+ st.markdown("<p style='text-align: center'><a href='https://github.com/Kaludii'>Github</a> | <a href='https://huggingface.co/Kaludi'>HuggingFace</a></p>", unsafe_allow_html=True)
81
+
82
+
83
+ # uploaded_file = st.file_uploader("Upload your input CSV file", type=["csv"])
84
  # Pandas DataFrame is created from the CSV file
85
+ # if uploaded_file is not None:
86
+ # df = pd.read_csv(uploaded_file)
87
+ # st.write(df) # Display the dataframe on the app
88
 
89
+ # # Create a selectbox for user to choose the column to visualize
90
+ # columns = df.columns.tolist()
91
+ # selected_column = st.selectbox('Select a column to visualize', columns)
92
 
93
+ # # Using seaborn to create a count plot
94
+ # fig, ax = plt.subplots()
95
+ # sns.countplot(data=df, x=selected_column, ax=ax)
96
+ # plt.xticks(rotation=45) # Rotate X-axis labels to 45 degrees
97
+ # # Show the plot
98
+ # st.pyplot(fig)
99
 
100
  app()
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102