ankithpatel commited on
Commit
1b8fed2
·
verified ·
1 Parent(s): 203694b

Create page2.py

Browse files
Files changed (1) hide show
  1. pages/page2.py +135 -0
pages/page2.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def background():
4
+ st.markdown(f"""
5
+ <style>
6
+ /* Set the background image for the entire app */
7
+ .stApp {{
8
+ background-color:rgba(96, 155, 124, 0.5);
9
+ background-size: 1300px;
10
+ background-repeat: no-repeat;
11
+ background-attachment: fixed;
12
+ background-position: center;
13
+ }}
14
+
15
+ </style>
16
+ """, unsafe_allow_html=True)
17
+ def page2():
18
+ background()
19
+ st.title("Data Collection")
20
+ st.header("1. What is Data?")
21
+ st.write(
22
+ "Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. "
23
+ "It serves as the foundation for any machine learning model."
24
+ )
25
+
26
+ st.header("2. Types of Data")
27
+ data_type = st.radio(
28
+ "Select a type of data to learn more:",
29
+ ("Structured", "Unstructured", "Semi-Structured")
30
+ )
31
+
32
+ if data_type == "Structured":
33
+ st.subheader("Structured Data")
34
+ st.write(
35
+ "Structured data is highly organized and easily searchable within databases. "
36
+ "It includes rows and columns, such as in relational databases."
37
+ )
38
+
39
+ st.write("Data Formats:")
40
+ format_selected = st.radio(
41
+ "Select a format to explore further:",
42
+ ("Excel", "CSV")
43
+ )
44
+
45
+ if format_selected == "Excel":
46
+ # Excel Data Format Section
47
+ st.subheader("Excel Data Format")
48
+ st.write("*What is it?*")
49
+ st.write(
50
+ "Excel files are spreadsheets used to organize and analyze data in rows and columns. "
51
+ "They are widely used due to their user-friendly nature and support for various data types."
52
+ )
53
+
54
+ st.write("*How to Read Excel Files?*")
55
+ st.code(
56
+ """
57
+ import pandas as pd
58
+ # Reading an Excel file
59
+ df = pd.read_excel('file.xlsx')
60
+ print(df.head())
61
+ """,
62
+ language="python"
63
+ )
64
+
65
+ st.write("*Common Issues When Handling Excel Files*")
66
+ st.write(
67
+ """
68
+ - Missing or corrupted files
69
+ - Version incompatibilities
70
+ - Incorrect file paths
71
+ - Handling large Excel files
72
+ """
73
+ )
74
+
75
+ st.write("*How to Overcome These Errors/Issues?*")
76
+ st.write(
77
+ """
78
+ - Use proper error handling with try-except.
79
+ - Convert Excel files to CSV for better compatibility.
80
+ - Use libraries like openpyxl or xlrd for specific Excel versions.
81
+ - Break large files into smaller chunks for processing.
82
+ """
83
+ )
84
+
85
+ # Button to open Jupyter Notebook or PDF
86
+ if st.button("Open Excel Documentation"):
87
+ st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).")
88
+
89
+ elif format_selected == "CSV":
90
+ # CSV Data Format Section
91
+ st.subheader("CSV Data Format")
92
+ st.write("*What is it?*")
93
+ st.write(
94
+ "CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, "
95
+ "and fields are separated by commas."
96
+ )
97
+
98
+ st.write("*How to Read CSV Files?*")
99
+ st.code(
100
+ """
101
+ import pandas as pd
102
+ # Reading a CSV file
103
+ df = pd.read_csv('file.csv')
104
+ print(df.head())
105
+ """,
106
+ language="python"
107
+ )
108
+
109
+ st.write("*Common Issues When Handling CSV Files*")
110
+ st.write(
111
+ """
112
+ - Encoding issues (e.g., UTF-8, ISO-8859-1)
113
+ - Inconsistent delimiters
114
+ - Missing or corrupted files
115
+ - Large file sizes causing memory errors
116
+ """
117
+ )
118
+
119
+ st.write("*How to Overcome These Errors/Issues?*")
120
+ st.write(
121
+ """
122
+ - Specify the correct encoding when reading files using encoding='utf-8' or similar.
123
+ - Use libraries like csv or pandas to handle different delimiters.
124
+ - Employ error handling to catch and manage missing/corrupted files.
125
+ - Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000).
126
+ """
127
+ )
128
+
129
+ # Button to open Jupyter Notebook or PDF
130
+ if st.button("Open CSV Documentation"):
131
+ st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).")
132
+
133
+ if st.button("Go to Home Page"):
134
+ st.session_state.page = 'Page1'
135
+ st.experimental_rerun()