File size: 4,935 Bytes
f312daa
b05d633
a66704a
 
9772547
a66704a
 
 
 
636adf8
a66704a
 
 
 
 
 
 
 
9772547
a66704a
9772547
a66704a
 
 
 
9772547
a66704a
 
 
 
 
9772547
a66704a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b05d633
a66704a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

import streamlit as st
import pandas as pd
import numpy as np

st.markdown(f"""
<style>
    /* Set the background image for the entire app */
    .stApp {{
        background-color:rgba(0,0,0, 0.5);
        background-size: 1300px;
        background-repeat: no-repeat;
        background-attachment: fixed;
        background-position: center;
    }}
    
    </style>
""", unsafe_allow_html=True)

import streamlit as st

# Navigation
st.title("Life Cycle of ML")
if 'page' not in st.session_state:
    st.session_state['page'] = 'home'

# Main Navigation
if st.session_state['page'] == 'home':
    st.subheader("Explore the Life Cycle Stages")
    if st.button("Data Collection"):
        st.session_state['page'] = 'data_collection'

elif st.session_state['page'] == 'data_collection':
    # Data Collection Page
    st.title("Data Collection")
    st.header("1. What is Data?")
    st.write(
        "Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. "
        "It serves as the foundation for any machine learning model."
    )
    
    st.header("2. Types of Data")
    data_type = st.radio(
        "Select a type of data to learn more:",
        ("Structured", "Unstructured", "Semi-Structured")
    )
    
    if data_type == "Structured":
        st.subheader("Structured Data")
        st.write(
            "Structured data is highly organized and easily searchable within databases. "
            "It includes rows and columns, such as in relational databases."
        )
        
        st.write("Data Formats:")
        format_selected = st.radio(
            "Select a format to explore further:",
            ("Excel", "CSV")
        )
        
        if format_selected == "Excel":
            # Excel Data Format Section
            st.subheader("Excel Data Format")
            st.write("*What is it?*")
            st.write(
                "Excel files are spreadsheets used to organize and analyze data in rows and columns. "
                "They are widely used due to their user-friendly nature and support for various data types."
            )
            
            st.write("*How to Read Excel Files?*")
            st.code(
                """
import pandas as pd
# Reading an Excel file
df = pd.read_excel('file.xlsx')
print(df.head())
                """,
                language="python"
            )
            
            st.write("*Common Issues When Handling Excel Files*")
            st.write(
                """
- Missing or corrupted files
- Version incompatibilities
- Incorrect file paths
- Handling large Excel files
                """
            )
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Use proper error handling with try-except.
- Convert Excel files to CSV for better compatibility.
- Use libraries like openpyxl or xlrd for specific Excel versions.
- Break large files into smaller chunks for processing.
                """
            )
            
            # Button to open Jupyter Notebook or PDF
            if st.button("Open Excel Documentation"):
                st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).")

        elif format_selected == "CSV":
            # CSV Data Format Section
            st.subheader("CSV Data Format")
            st.write("*What is it?*")
            st.write(
                "CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, "
                "and fields are separated by commas."
            )
            
            st.write("*How to Read CSV Files?*")
            st.code(
                """
import pandas as pd
# Reading a CSV file
df = pd.read_csv('file.csv')
print(df.head())
                """,
                language="python"
            )
            
            st.write("*Common Issues When Handling CSV Files*")
            st.write(
                """
- Encoding issues (e.g., UTF-8, ISO-8859-1)
- Inconsistent delimiters
- Missing or corrupted files
- Large file sizes causing memory errors
                """
            )
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Specify the correct encoding when reading files using encoding='utf-8' or similar.
- Use libraries like csv or pandas to handle different delimiters.
- Employ error handling to catch and manage missing/corrupted files.
- Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000).
                """
            )
            
            # Button to open Jupyter Notebook or PDF
            if st.button("Open CSV Documentation"):
                st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).")