Shreneek commited on
Commit
fe1c11d
·
verified ·
1 Parent(s): 1c11299

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -120
app.py CHANGED
@@ -1,130 +1,41 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import plotly.express as px
4
  from ydata_profiling import ProfileReport
5
- from statsmodels.stats.outliers_influence import variance_inflation_factor
6
 
7
- # 1. Set Page Configuration
8
- st.set_page_config(
9
- page_title="Enhanced Data Profiling",
10
- layout="wide",
11
- page_icon="📊"
12
- )
13
 
14
- # 2. Custom CSS for a Clean, White UI
15
- custom_css = """
16
- <style>
17
- /* Make the entire background white */
18
- body {
19
- background-color: #ffffff !important;
20
- font-family: 'Roboto', sans-serif;
21
- }
22
 
23
- /* Headers and titles */
24
- h1, h2, h3, h4 {
25
- color: #2c3e50;
26
- font-weight: 700;
27
- }
28
-
29
- /* The main Streamlit container */
30
- [data-testid="stAppViewContainer"] {
31
- background-color: #ffffff !important;
32
- }
33
-
34
- /* Individual content containers */
35
- .css-1d391kg, .css-hxt7ib {
36
- background-color: #ffffff !important;
37
- border-radius: 15px;
38
- padding: 30px;
39
- margin-bottom: 20px;
40
- box-shadow: 0 8px 16px rgba(0,0,0,0.1);
41
- }
42
-
43
- /* Sidebar styling */
44
- [data-testid="stSidebar"] {
45
- background-color: #34495e !important;
46
- color: #ecf0f1 !important;
47
- font-size: 16px;
48
- }
49
- [data-testid="stSidebar"] .css-1d391kg {
50
- background-color: #2c3e50 !important;
51
- border-radius: 10px;
52
- }
53
- </style>
54
- """
55
- st.markdown(custom_css, unsafe_allow_html=True)
56
-
57
- # 3. Title and Description
58
- st.title("Enhanced Data Profiling")
59
- st.markdown("<h4 style='text-align: center; color: #2c3e50;'>Upload your CSV and explore it thoroughly!</h4>", unsafe_allow_html=True)
60
-
61
- # 4. Sidebar for File Upload
62
- st.sidebar.header("Upload & Options")
63
- uploaded_file = st.sidebar.file_uploader("Upload a CSV file", type="csv")
64
-
65
- # Placeholder for the DataFrame
66
- df = None
67
 
68
  if uploaded_file is not None:
69
- # 4a. Read the CSV
70
- df = pd.read_csv(uploaded_file)
71
- st.success("File uploaded successfully!")
72
-
73
- # 5. KPI Metrics / Quick Summary
74
- st.subheader("Dataset Quick Summary")
75
- col1, col2, col3, col4 = st.columns(4)
76
- col1.metric("Rows", f"{df.shape[0]}")
77
- col2.metric("Columns", f"{df.shape[1]}")
78
- missing_percentage = (df.isnull().sum().sum() / df.size) * 100
79
- col3.metric("Missing %", f"{missing_percentage:.2f}%")
80
- duplicates = df.duplicated().sum()
81
- col4.metric("Duplicates", f"{duplicates}")
82
-
83
- st.write("---")
84
-
85
- # 6. Optional Data Transformation: Drop columns with > 50% missing
86
- if st.checkbox("Drop columns with > 50% missing data?"):
87
- threshold = df.shape[0] * 0.5
88
- before_cols = df.shape[1]
89
- df = df.loc[:, df.isnull().sum() < threshold]
90
- after_cols = df.shape[1]
91
- st.success(f"Dropped {before_cols - after_cols} columns. Remaining columns: {after_cols}")
92
-
93
- # 7. Optional Quick Histogram
94
- numeric_cols = df.select_dtypes(include="number").columns.tolist()
95
- if numeric_cols:
96
- st.subheader("Optional Quick Histogram")
97
- selected_col = st.selectbox("Select a numeric column", numeric_cols)
98
- if selected_col:
99
- fig_hist = px.histogram(df, x=selected_col, nbins=50, title=f"Histogram of {selected_col}")
100
- fig_hist.update_traces(opacity=0.8)
101
- st.plotly_chart(fig_hist, use_container_width=True)
102
-
103
- # 8. Generate ydata-profiling Report
104
- st.subheader("Comprehensive Profiling Report")
105
- with st.spinner("Generating profiling report..."):
106
- profile = ProfileReport(df, title="Profiling Report", explorative=True)
107
- report_html = profile.to_html()
108
-
109
- # 8a. Display the report in an iframe
110
- st.components.v1.html(report_html, height=1200, scrolling=True)
111
-
112
- # 8b. Download Button for HTML
113
- st.write("### Download the Profiling Report")
114
- st.download_button(
115
- label="Download HTML",
116
- data=report_html.encode('utf-8'),
117
- file_name="profiling_report.html",
118
- mime="text/html"
119
- )
120
  else:
121
  st.info("Awaiting CSV file upload.")
122
-
123
- # That's it!
124
- # Simply copy and paste this into your app.py on Hugging Face Spaces.
125
- # Make sure you have a requirements.txt that includes:
126
- # streamlit
127
- # pandas
128
- # ydata-profiling
129
- # plotly
130
- # statsmodels (for VIF, if you need it)
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  from ydata_profiling import ProfileReport
 
4
 
5
+ st.set_page_config(page_title="Dynamic Data Profiling", layout="wide", page_icon="📊")
 
 
 
 
 
6
 
7
+ st.title("Dynamic Data Profiling with ydata-profiling")
8
+ st.write("Upload your CSV file and get a complete interactive profiling report!")
 
 
 
 
 
 
9
 
10
+ uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  if uploaded_file is not None:
13
+ try:
14
+ # Read CSV into DataFrame
15
+ df = pd.read_csv(uploaded_file)
16
+ st.success("File uploaded successfully!")
17
+
18
+ # Generate the profile report
19
+ with st.spinner("Generating profile report..."):
20
+ profile = ProfileReport(df, title="Profiling Report", explorative=True)
21
+ # Convert report to HTML
22
+ report_html = profile.to_html()
23
+
24
+ # Show the report in an iframe
25
+ st.components.v1.html(report_html, height=1200, scrolling=True)
26
+
27
+ # Provide a download button for the HTML
28
+ st.write("### Download the Profiling Report")
29
+ # Convert HTML string to bytes
30
+ report_bytes = report_html.encode('utf-8')
31
+ st.download_button(
32
+ label="Download HTML",
33
+ data=report_bytes,
34
+ file_name="profiling_report.html",
35
+ mime="text/html"
36
+ )
37
+
38
+ except Exception as e:
39
+ st.error(f"An error occurred: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  else:
41
  st.info("Awaiting CSV file upload.")