File size: 5,500 Bytes
a66704a
22da260
 
ed6802a
22da260
 
 
 
 
 
 
 
 
 
 
 
 
9772547
22da260
 
 
 
 
 
9772547
22da260
 
 
 
 
9772547
22da260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b05d633
22da260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a5a2cb
 
 
851546d
 
 
 
 
a67bb36
c946560
851546d
 
 
 
dedab40
851546d
 
 
 
8a5a2cb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import streamlit as st
import pandas as pd
import numpy as np

st.markdown(f"""
<style>
    /* Set the background image for the entire app */
    .stApp {{
        background-color:rgba(96, 155, 124, 0.5);
        background-size: 1300px;
        background-repeat: no-repeat;
        background-attachment: fixed;
        background-position: center;
    }}
    
    </style>
""", unsafe_allow_html=True)

import streamlit as st

# Navigation
st.title("Life Cycle of ML")
if 'page' not in st.session_state:
    st.session_state['page'] = 'home'

# Main Navigation
if st.session_state['page'] == 'home':
    st.subheader("Explore the Life Cycle Stages")
    if st.button("Data Collection"):
        st.session_state['page'] = 'data_collection'

elif st.session_state['page'] == 'data_collection':
    # Data Collection Page
    st.title("Data Collection")
    st.header("1. What is Data?")
    st.write(
        "Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. "
        "It serves as the foundation for any machine learning model."
    )
    
    st.header("2. Types of Data")
    data_type = st.radio(
        "Select a type of data to learn more:",
        ("Structured", "Unstructured", "Semi-Structured")
    )
    
    if data_type == "Structured":
        st.subheader("Structured Data")
        st.write(
            "Structured data is highly organized and easily searchable within databases. "
            "It includes rows and columns, such as in relational databases."
        )
        
        st.write("Data Formats:")
        format_selected = st.radio(
            "Select a format to explore further:",
            ("Excel", "CSV")
        )
        
        if format_selected == "Excel":
            # Excel Data Format Section
            st.subheader("Excel Data Format")
            st.write("*What is it?*")
            st.write(
                "Excel files are spreadsheets used to organize and analyze data in rows and columns. "
                "They are widely used due to their user-friendly nature and support for various data types."
            )
            
            st.write("*How to Read Excel Files?*")
            st.code(
                """
import pandas as pd
# Reading an Excel file
df = pd.read_excel('file.xlsx')
print(df.head())
                """,
                language="python"
            )
            
            st.write("*Common Issues When Handling Excel Files*")
            st.write(
                """
- Missing or corrupted files
- Version incompatibilities
- Incorrect file paths
- Handling large Excel files
                """
            )
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Use proper error handling with try-except.
- Convert Excel files to CSV for better compatibility.
- Use libraries like openpyxl or xlrd for specific Excel versions.
- Break large files into smaller chunks for processing.
                """
            )
            
            # Button to open Jupyter Notebook or PDF
            if st.button("Open Excel Documentation"):
                st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).")

        elif format_selected == "CSV":
            # CSV Data Format Section
            st.subheader("CSV Data Format")
            st.write("*What is it?*")
            st.write(
                "CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, "
                "and fields are separated by commas."
            )
            
            st.write("*How to Read CSV Files?*")
            st.code(
                """
import pandas as pd
# Reading a CSV file
df = pd.read_csv('file.csv')
print(df.head())
                """,
                language="python"
            )
            
            st.write("*Common Issues When Handling CSV Files*")
            st.write(
                """
- Encoding issues (e.g., UTF-8, ISO-8859-1)
- Inconsistent delimiters
- Missing or corrupted files
- Large file sizes causing memory errors
                """
            )
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Specify the correct encoding when reading files using encoding='utf-8' or similar.
- Use libraries like csv or pandas to handle different delimiters.
- Employ error handling to catch and manage missing/corrupted files.
- Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000).
                """
            )
            
            # Button to open Jupyter Notebook or PDF
            if st.button("Open CSV Documentation"):
                st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).")



    if data_type == "UnStructured":
        st.subheader("UnStructured Data")
        st.write(
            "Unstructured data refers to information that lacks a predefined format or organization, making it challenging to analyze using traditional tools." 
            "Examples include text, images, videos, audio, and social media posts."
        )
        st.write("Data Formats:")
        format_selected = st.radio(
            "Select a format to explore further:",
            ("TEXT","IMAGE","VIDEO", "AUDIO")
        )