shwetashweta05 commited on
Commit
30aabc7
·
verified ·
1 Parent(s): eb350c4

Update pages/6.Data Collection.py

Browse files
Files changed (1) hide show
  1. pages/6.Data Collection.py +102 -1
pages/6.Data Collection.py CHANGED
@@ -21,4 +21,105 @@ if data_type == "Structured":
21
  format_selected = st.radio(
22
  "Select a data format to learn more:", ["Excel", "CSV", "SQL Databases"]
23
  )
24
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  format_selected = st.radio(
22
  "Select a data format to learn more:", ["Excel", "CSV", "SQL Databases"]
23
  )
24
+
25
+ # Excel Format Section
26
+ if format_selected == "Excel":
27
+ st.write("#### Excel Format")
28
+
29
+ # Part (a) What it is
30
+ st.subheader("What is Excel?")
31
+ st.write("""
32
+ Excel is a popular file format used for storing structured data in tabular form.
33
+ It is commonly used in business and data analysis, and its file extensions include `.xlsx` and `.xls`.
34
+ """)
35
+
36
+ # Part (b) How to read these files
37
+ st.subheader("How to Read Excel Files?")
38
+ st.code("""
39
+ import pandas as pd
40
+ # Read an Excel file
41
+ df = pd.read_excel("file.xlsx")
42
+ print(df.head())
43
+ """)
44
+
45
+ # Part (c) Issues encountered
46
+ st.subheader("Common Issues Encountered When Handling Excel Files")
47
+ st.write("""
48
+ - **Missing Data**: Some cells may contain empty or null values.
49
+ - **Encoding Problems**: Files saved in non-standard formats may have encoding issues.
50
+ - **File Corruption**: The file may become unreadable if improperly saved or transferred.
51
+ - **Large Files**: Handling very large Excel files may exceed memory limits.
52
+ """)
53
+
54
+ # Part (d) How to overcome these errors/issues
55
+ st.subheader("How to Overcome These Issues?")
56
+ st.write("""
57
+ - **Missing Data**: Use data imputation techniques to fill in missing values.
58
+ - **Encoding Problems**: Specify the encoding format when reading the file, e.g., `encoding='utf-8'`.
59
+ - **File Corruption**: Use repair tools or convert to a compatible format like CSV.
60
+ - **Large Files**: Process the file in chunks using `pandas` or optimize it using external tools.
61
+ """)
62
+
63
+ # Downloadable Guide Button
64
+ st.markdown("### Download Coding Guide:")
65
+ if st.button("Download Excel Guide"):
66
+ # Provide a downloadable file
67
+ file_path = "Excel_guide.ipynb" # Ensure this file exists in the app directory
68
+ with open(file_path, "rb") as file:
69
+ st.download_button(
70
+ label="Download Excel Guide",
71
+ data=file,
72
+ file_name="Excel_guide.ipynb",
73
+ mime="application/octet-stream",
74
+ )
75
+
76
+ # CSV Format Section
77
+ elif format_selected == "CSV":
78
+ st.write("#### CSV Format")
79
+
80
+ # Part (a) What it is
81
+ st.subheader("What is CSV?")
82
+ st.write("""
83
+ CSV (Comma-Separated Values) is a lightweight text file format for structured data,
84
+ where values are separated by commas. It is widely used for data exchange between systems.
85
+ """)
86
+
87
+ # Part (b) How to read these files
88
+ st.subheader("How to Read CSV Files?")
89
+ st.code("""
90
+ import pandas as pd
91
+ # Read a CSV file
92
+ df = pd.read_csv("file.csv")
93
+ print(df.head())
94
+ """)
95
+
96
+ # Part (c) Issues encountered
97
+ st.subheader("Common Issues Encountered When Handling CSV Files")
98
+ st.write("""
99
+ - **Misaligned Rows**: Extra or missing delimiters can lead to misaligned rows.
100
+ - **Encoding Problems**: Non-standard characters may cause encoding errors.
101
+ - **Large Files**: Processing large CSV files can be resource-intensive.
102
+ """)
103
+
104
+ # Part (d) How to overcome these errors/issues
105
+ st.subheader("How to Overcome These Issues?")
106
+ st.write("""
107
+ - **Misaligned Rows**: Use a consistent delimiter and validate the file before processing.
108
+ - **Encoding Problems**: Explicitly specify the encoding format, e.g., `encoding='utf-8'`.
109
+ - **Large Files**: Process the file in chunks using `pandas` (`chunksize` parameter).
110
+ """)
111
+
112
+ # Downloadable Guide Button
113
+ st.markdown("### Download Coding Guide:")
114
+ if st.button("Download CSV Guide"):
115
+ # Provide a downloadable file
116
+ file_path = "CSV_guide.ipynb" # Ensure this file exists in the app directory
117
+ with open(file_path, "rb") as file:
118
+ st.download_button(
119
+ label="Download CSV Guide",
120
+ data=file,
121
+ file_name="CSV_guide.ipynb",
122
+ mime="application/octet-stream",
123
+ )
124
+
125
+ # Add similar sections for "Unstructured" and "Semi-Structured" data types as needed.