Peter Yang commited on
Commit
8015fc4
Β·
1 Parent(s): ad49fd9

back to visualization

Browse files
pages/14_πŸ“ˆ_Table_Data_Visualization.py CHANGED
@@ -11,91 +11,23 @@ st.set_page_config(layout="wide")
11
  # Function for the CSV Visualization App
12
  def app():
13
  st.title('CSV Data Cleaning and Visualization')
14
-
15
- st.markdown("Upload one or multiple CSV files to preprocess and clean your files quickly and stress free.")
16
-
17
- # File uploader allows user to add their own CSV
18
-
19
- uploaded_files = st.file_uploader("Choose CSV files", type="csv", accept_multiple_files=True)
20
-
21
- # dataframes = []
22
-
23
- if uploaded_files:
24
- for file in uploaded_files:
25
- file.seek(0)
26
- df = pd.read_csv(file)
27
- dataframes.append(df)
28
-
29
- if len(dataframes) > 1:
30
- merge = st.checkbox("Merge uploaded CSV files")
31
-
32
- if merge:
33
- # Merge options
34
- keep_first_header_only = st.selectbox("Keep only the header (first row) of the first file", ["Yes", "No"])
35
- remove_duplicate_rows = st.selectbox("Remove duplicate rows", ["No", "Yes"])
36
- remove_empty_rows = st.selectbox("Remove empty rows", ["Yes", "No"])
37
- end_line = st.selectbox("End line", ["\\n", "\\r\\n"])
38
-
39
- try:
40
- if keep_first_header_only == "Yes":
41
- for i, df in enumerate(dataframes[1:]):
42
- df.columns = dataframes[0].columns.intersection(df.columns)
43
- dataframes[i+1] = df
44
-
45
- merged_df = pd.concat(dataframes, ignore_index=True, join='outer')
46
-
47
- if remove_duplicate_rows == "Yes":
48
- merged_df.drop_duplicates(inplace=True)
49
-
50
- if remove_empty_rows == "Yes":
51
- merged_df.dropna(how="all", inplace=True)
52
-
53
- dataframes = [merged_df]
54
-
55
- except ValueError as e:
56
- st.error("Please make sure columns match in all files. If you don't want them to match, select 'No' in the first option.")
57
- st.stop()
58
-
59
- # Show or hide DataFrames
60
- show_dataframes = st.checkbox("Show DataFrames", value=True)
61
-
62
- if show_dataframes:
63
- for i, df in enumerate(dataframes):
64
- st.write(f"DataFrame {i + 1}")
65
- st.dataframe(df)
66
-
67
- if st.button("Download cleaned data"):
68
- for i, df in enumerate(dataframes):
69
- csv = df.to_csv(index=False)
70
- b64 = base64.b64encode(csv.encode()).decode()
71
- href = f'<a href="data:file/csv;base64,{b64}" download="cleaned_data_{i + 1}.csv">Download cleaned_data_{i + 1}.csv</a>'
72
- st.markdown(href, unsafe_allow_html=True)
73
- else:
74
- st.warning("Please upload CSV file(s).")
75
- st.stop()
76
-
77
- st.markdown("")
78
- st.markdown("---")
79
- st.markdown("")
80
- st.markdown("<p style='text-align: center'><a href='https://github.com/Kaludii'>Github</a> | <a href='https://huggingface.co/Kaludi'>HuggingFace</a></p>", unsafe_allow_html=True)
81
-
82
-
83
- # uploaded_file = st.file_uploader("Upload your input CSV file", type=["csv"])
84
  # Pandas DataFrame is created from the CSV file
85
- # if uploaded_file is not None:
86
- # df = pd.read_csv(uploaded_file)
87
- # st.write(df) # Display the dataframe on the app
88
-
89
- # # Create a selectbox for user to choose the column to visualize
90
- # columns = df.columns.tolist()
91
- # selected_column = st.selectbox('Select a column to visualize', columns)
92
-
93
- # # Using seaborn to create a count plot
94
- # fig, ax = plt.subplots()
95
- # sns.countplot(data=df, x=selected_column, ax=ax)
96
- # plt.xticks(rotation=45) # Rotate X-axis labels to 45 degrees
97
- # # Show the plot
98
- # st.pyplot(fig)
99
 
100
  app()
101
 
 
11
  # Function for the CSV Visualization App
12
  def app():
13
  st.title('CSV Data Cleaning and Visualization')
14
+
15
+ uploaded_file = st.file_uploader("Upload your input CSV file", type=["csv"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Pandas DataFrame is created from the CSV file
17
+ if uploaded_file is not None:
18
+ df = pd.read_csv(uploaded_file)
19
+ st.write(df) # Display the dataframe on the app
20
+
21
+ # Create a selectbox for user to choose the column to visualize
22
+ columns = df.columns.tolist()
23
+ selected_column = st.selectbox('Select a column to visualize', columns)
24
+
25
+ # Using seaborn to create a count plot
26
+ fig, ax = plt.subplots()
27
+ sns.countplot(data=df, x=selected_column, ax=ax)
28
+ plt.xticks(rotation=45) # Rotate X-axis labels to 45 degrees
29
+ # Show the plot
30
+ st.pyplot(fig)
31
 
32
  app()
33
 
pages/15_πŸ“ˆ_Table_Data_Clean.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import io
6
+ import base64
7
+
8
+
9
+ st.set_page_config(layout="wide")
10
+
11
+ # Function for the CSV Visualization App
12
+ def app():
13
+ st.title('CSV Data Cleaning and Visualization')
14
+
15
+ st.markdown("Upload one or multiple CSV files to preprocess and clean your files quickly and stress free.")
16
+
17
+ # File uploader allows user to add their own CSV
18
+
19
+ uploaded_files = st.file_uploader("Choose CSV files", type="csv", accept_multiple_files=True)
20
+
21
+ # dataframes = []
22
+
23
+ if uploaded_files:
24
+ for file in uploaded_files:
25
+ file.seek(0)
26
+ df = pd.read_csv(file)
27
+ dataframes.append(df)
28
+
29
+ if len(dataframes) > 1:
30
+ merge = st.checkbox("Merge uploaded CSV files")
31
+
32
+ if merge:
33
+ # Merge options
34
+ keep_first_header_only = st.selectbox("Keep only the header (first row) of the first file", ["Yes", "No"])
35
+ remove_duplicate_rows = st.selectbox("Remove duplicate rows", ["No", "Yes"])
36
+ remove_empty_rows = st.selectbox("Remove empty rows", ["Yes", "No"])
37
+ end_line = st.selectbox("End line", ["\\n", "\\r\\n"])
38
+
39
+ try:
40
+ if keep_first_header_only == "Yes":
41
+ for i, df in enumerate(dataframes[1:]):
42
+ df.columns = dataframes[0].columns.intersection(df.columns)
43
+ dataframes[i+1] = df
44
+
45
+ merged_df = pd.concat(dataframes, ignore_index=True, join='outer')
46
+
47
+ if remove_duplicate_rows == "Yes":
48
+ merged_df.drop_duplicates(inplace=True)
49
+
50
+ if remove_empty_rows == "Yes":
51
+ merged_df.dropna(how="all", inplace=True)
52
+
53
+ dataframes = [merged_df]
54
+
55
+ except ValueError as e:
56
+ st.error("Please make sure columns match in all files. If you don't want them to match, select 'No' in the first option.")
57
+ st.stop()
58
+
59
+ # Show or hide DataFrames
60
+ show_dataframes = st.checkbox("Show DataFrames", value=True)
61
+
62
+ if show_dataframes:
63
+ for i, df in enumerate(dataframes):
64
+ st.write(f"DataFrame {i + 1}")
65
+ st.dataframe(df)
66
+
67
+ if st.button("Download cleaned data"):
68
+ for i, df in enumerate(dataframes):
69
+ csv = df.to_csv(index=False)
70
+ b64 = base64.b64encode(csv.encode()).decode()
71
+ href = f'<a href="data:file/csv;base64,{b64}" download="cleaned_data_{i + 1}.csv">Download cleaned_data_{i + 1}.csv</a>'
72
+ st.markdown(href, unsafe_allow_html=True)
73
+ else:
74
+ st.warning("Please upload CSV file(s).")
75
+ st.stop()
76
+
77
+ st.markdown("")
78
+ st.markdown("---")
79
+ st.markdown("")
80
+ st.markdown("<p style='text-align: center'><a href='https://github.com/Kaludii'>Github</a> | <a href='https://huggingface.co/Kaludi'>HuggingFace</a></p>", unsafe_allow_html=True)
81
+
82
+
83
+ # uploaded_file = st.file_uploader("Upload your input CSV file", type=["csv"])
84
+ # Pandas DataFrame is created from the CSV file
85
+ # if uploaded_file is not None:
86
+ # df = pd.read_csv(uploaded_file)
87
+ # st.write(df) # Display the dataframe on the app
88
+
89
+ # # Create a selectbox for user to choose the column to visualize
90
+ # columns = df.columns.tolist()
91
+ # selected_column = st.selectbox('Select a column to visualize', columns)
92
+
93
+ # # Using seaborn to create a count plot
94
+ # fig, ax = plt.subplots()
95
+ # sns.countplot(data=df, x=selected_column, ax=ax)
96
+ # plt.xticks(rotation=45) # Rotate X-axis labels to 45 degrees
97
+ # # Show the plot
98
+ # st.pyplot(fig)
99
+
100
+ app()
101
+
102
+