zero_to_hero_ML / pages /Lifecycle of Machine Learning.py
ankithpatel's picture
Update pages/Lifecycle of Machine Learning.py
dedab40 verified
raw
history blame
5.5 kB
import streamlit as st
import pandas as pd
import numpy as np
st.markdown(f"""
<style>
/* Set the background image for the entire app */
.stApp {{
background-color:rgba(96, 155, 124, 0.5);
background-size: 1300px;
background-repeat: no-repeat;
background-attachment: fixed;
background-position: center;
}}
</style>
""", unsafe_allow_html=True)
import streamlit as st
# Navigation
st.title("Life Cycle of ML")
if 'page' not in st.session_state:
st.session_state['page'] = 'home'
# Main Navigation
if st.session_state['page'] == 'home':
st.subheader("Explore the Life Cycle Stages")
if st.button("Data Collection"):
st.session_state['page'] = 'data_collection'
elif st.session_state['page'] == 'data_collection':
# Data Collection Page
st.title("Data Collection")
st.header("1. What is Data?")
st.write(
"Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. "
"It serves as the foundation for any machine learning model."
)
st.header("2. Types of Data")
data_type = st.radio(
"Select a type of data to learn more:",
("Structured", "Unstructured", "Semi-Structured")
)
if data_type == "Structured":
st.subheader("Structured Data")
st.write(
"Structured data is highly organized and easily searchable within databases. "
"It includes rows and columns, such as in relational databases."
)
st.write("Data Formats:")
format_selected = st.radio(
"Select a format to explore further:",
("Excel", "CSV")
)
if format_selected == "Excel":
# Excel Data Format Section
st.subheader("Excel Data Format")
st.write("*What is it?*")
st.write(
"Excel files are spreadsheets used to organize and analyze data in rows and columns. "
"They are widely used due to their user-friendly nature and support for various data types."
)
st.write("*How to Read Excel Files?*")
st.code(
"""
import pandas as pd
# Reading an Excel file
df = pd.read_excel('file.xlsx')
print(df.head())
""",
language="python"
)
st.write("*Common Issues When Handling Excel Files*")
st.write(
"""
- Missing or corrupted files
- Version incompatibilities
- Incorrect file paths
- Handling large Excel files
"""
)
st.write("*How to Overcome These Errors/Issues?*")
st.write(
"""
- Use proper error handling with try-except.
- Convert Excel files to CSV for better compatibility.
- Use libraries like openpyxl or xlrd for specific Excel versions.
- Break large files into smaller chunks for processing.
"""
)
# Button to open Jupyter Notebook or PDF
if st.button("Open Excel Documentation"):
st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).")
elif format_selected == "CSV":
# CSV Data Format Section
st.subheader("CSV Data Format")
st.write("*What is it?*")
st.write(
"CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, "
"and fields are separated by commas."
)
st.write("*How to Read CSV Files?*")
st.code(
"""
import pandas as pd
# Reading a CSV file
df = pd.read_csv('file.csv')
print(df.head())
""",
language="python"
)
st.write("*Common Issues When Handling CSV Files*")
st.write(
"""
- Encoding issues (e.g., UTF-8, ISO-8859-1)
- Inconsistent delimiters
- Missing or corrupted files
- Large file sizes causing memory errors
"""
)
st.write("*How to Overcome These Errors/Issues?*")
st.write(
"""
- Specify the correct encoding when reading files using encoding='utf-8' or similar.
- Use libraries like csv or pandas to handle different delimiters.
- Employ error handling to catch and manage missing/corrupted files.
- Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000).
"""
)
# Button to open Jupyter Notebook or PDF
if st.button("Open CSV Documentation"):
st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).")
if data_type == "UnStructured":
st.subheader("UnStructured Data")
st.write(
"Unstructured data refers to information that lacks a predefined format or organization, making it challenging to analyze using traditional tools."
"Examples include text, images, videos, audio, and social media posts."
)
st.write("Data Formats:")
format_selected = st.radio(
"Select a format to explore further:",
("TEXT","IMAGE","VIDEO", "AUDIO")
)