File size: 8,018 Bytes
a66704a
22da260
 
ed6802a
22da260
 
 
 
 
 
 
 
 
 
 
 
 
9772547
22da260
 
 
 
 
 
9772547
22da260
 
 
 
 
9772547
22da260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b05d633
22da260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a5a2cb
 
 
851546d
2b9b7c3
51d558f
2b9b7c3
51d558f
851546d
 
a67bb36
c946560
51d558f
851546d
 
 
f01f2ba
dedab40
ad600a9
f01f2ba
 
 
 
 
 
 
 
ad600a9
 
73c8a3e
f01f2ba
ad600a9
f01f2ba
ad600a9
 
 
 
 
 
 
 
 
f01f2ba
 
 
 
ad600a9
f01f2ba
 
ad600a9
 
f01f2ba
ad600a9
f01f2ba
 
 
 
 
 
 
 
 
73c8a3e
ad600a9
f01f2ba
 
 
 
 
 
 
 
 
c9edecf
 
 
 
 
 
 
 
73c8a3e
 
c9edecf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73c8a3e
 
c9edecf
 
73c8a3e
 
c9edecf
73c8a3e
c9edecf
 
 
 
 
 
 
 
73c8a3e
c9edecf
 
 
 
 
 
 
 
 
 
 
851546d
ad600a9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
import streamlit as st
import pandas as pd
import numpy as np

st.markdown(f"""
<style>
    /* Set the background image for the entire app */
    .stApp {{
        background-color:rgba(96, 155, 124, 0.5);
        background-size: 1300px;
        background-repeat: no-repeat;
        background-attachment: fixed;
        background-position: center;
    }}
    
    </style>
""", unsafe_allow_html=True)

import streamlit as st

# Navigation
st.title("Life Cycle of ML")
if 'page' not in st.session_state:
    st.session_state['page'] = 'home'

# Main Navigation
if st.session_state['page'] == 'home':
    st.subheader("Explore the Life Cycle Stages")
    if st.button("Data Collection"):
        st.session_state['page'] = 'data_collection'

elif st.session_state['page'] == 'data_collection':
    # Data Collection Page
    st.title("Data Collection")
    st.header("1. What is Data?")
    st.write(
        "Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. "
        "It serves as the foundation for any machine learning model."
    )
    
    st.header("2. Types of Data")
    data_type = st.radio(
        "Select a type of data to learn more:",
        ("Structured", "Unstructured", "Semi-Structured")
    )
    
    if data_type == "Structured":
        st.subheader("Structured Data")
        st.write(
            "Structured data is highly organized and easily searchable within databases. "
            "It includes rows and columns, such as in relational databases."
        )
        
        st.write("Data Formats:")
        format_selected = st.radio(
            "Select a format to explore further:",
            ("Excel", "CSV")
        )
        
        if format_selected == "Excel":
            # Excel Data Format Section
            st.subheader("Excel Data Format")
            st.write("*What is it?*")
            st.write(
                "Excel files are spreadsheets used to organize and analyze data in rows and columns. "
                "They are widely used due to their user-friendly nature and support for various data types."
            )
            
            st.write("*How to Read Excel Files?*")
            st.code(
                """
import pandas as pd
# Reading an Excel file
df = pd.read_excel('file.xlsx')
print(df.head())
                """,
                language="python"
            )
            
            st.write("*Common Issues When Handling Excel Files*")
            st.write(
                """
- Missing or corrupted files
- Version incompatibilities
- Incorrect file paths
- Handling large Excel files
                """
            )
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Use proper error handling with try-except.
- Convert Excel files to CSV for better compatibility.
- Use libraries like openpyxl or xlrd for specific Excel versions.
- Break large files into smaller chunks for processing.
                """
            )
            
            # Button to open Jupyter Notebook or PDF
            if st.button("Open Excel Documentation"):
                st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).")

        elif format_selected == "CSV":
            # CSV Data Format Section
            st.subheader("CSV Data Format")
            st.write("*What is it?*")
            st.write(
                "CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, "
                "and fields are separated by commas."
            )
            
            st.write("*How to Read CSV Files?*")
            st.code(
                """
import pandas as pd
# Reading a CSV file
df = pd.read_csv('file.csv')
print(df.head())
                """,
                language="python"
            )
            
            st.write("*Common Issues When Handling CSV Files*")
            st.write(
                """
- Encoding issues (e.g., UTF-8, ISO-8859-1)
- Inconsistent delimiters
- Missing or corrupted files
- Large file sizes causing memory errors
                """
            )
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Specify the correct encoding when reading files using encoding='utf-8' or similar.
- Use libraries like csv or pandas to handle different delimiters.
- Employ error handling to catch and manage missing/corrupted files.
- Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000).
                """
            )
            
            # Button to open Jupyter Notebook or PDF
            if st.button("Open CSV Documentation"):
                st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).")



    elif data_type == "Unstructured":
        
        st.subheader("Unstructured Data")
        
        st.write(
            "Unstructured data refers to information that lacks a predefined format or organization, making it challenging to analyze using traditional tools." 
            "Examples include text, images, videos, audio, and social media posts."
        )
        
        st.write("Data Formats:")
        format_selected = st.radio(
            "Select a format to explore further:",
            ("IMAGE","VIDEO", "AUDIO")
        )

        #HOW TO READ TEXT
        if format_selected == "IMAGE":
            
            st.subheader("IMAGE Data Format")
            st.write("*What is it?*")
            st.write(
                "Photos, medical scans, satellite images. "
                )
            
            
            st.write("*How to Read IMAGE Files?*")
            st.code(
                
                """
            
from PIL import Image
image = Image.open('example.jpg')
image.show()
                """,
                 language="python"
                
               
            )




                


            
            
            st.write("*Common Issues When Handling image Files*")
            
            st.write(
                """
- data augumentation and overfitting
- image processing challenges
- Data Imbalance
- High Dimensionality
                """
            )
            
            
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Data Augumentaion.
- Consistent image processing
- Handling Class Imbalance.
- Dimensionality Reduction and Feature Extraction
                """
            )

        elif format_selected == "VIDEO":
            
            st.subheader("VIDEO Data Format")
            st.write("*What is it?*")
            st.write(
                "PNG,GIF,BNP,RAW videos,TIFF "
                )
            
            
            st.write("*How to Read VIDEO Files?*")
            st.code(
                
                """
            

pip install opencv-python
import cv2

# Open the video file
video_path = 'path_to_your_video.mp4'
cap = cv2.VideoCapture(video_path)
                """,
                 language="python"
                
               
            )




                


            
            
            st.write("*Common Issues When Handling vdeo Files*")
            
            st.write(
                """
- File not found or Corrupted.
- Incompatible Codec or Format.
- Performance Issues with Large Videos.
- Frame Dropping or Skipping.
                """
            )
            
            
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
-  Ensure Correct File Path and Handle Corrupted Files.
- Install Missing Codecs or Use Supported Formats.
- Optimize Performance for Large Videos
- Control Frame Rate and Prevent Skipping
                """
            )