Spaces:
Sleeping
Sleeping
File size: 4,935 Bytes
f312daa b05d633 a66704a 9772547 a66704a 636adf8 a66704a 9772547 a66704a 9772547 a66704a 9772547 a66704a 9772547 a66704a b05d633 a66704a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import streamlit as st
import pandas as pd
import numpy as np
st.markdown(f"""
<style>
/* Set the background image for the entire app */
.stApp {{
background-color:rgba(0,0,0, 0.5);
background-size: 1300px;
background-repeat: no-repeat;
background-attachment: fixed;
background-position: center;
}}
</style>
""", unsafe_allow_html=True)
import streamlit as st
# Navigation
st.title("Life Cycle of ML")
if 'page' not in st.session_state:
st.session_state['page'] = 'home'
# Main Navigation
if st.session_state['page'] == 'home':
st.subheader("Explore the Life Cycle Stages")
if st.button("Data Collection"):
st.session_state['page'] = 'data_collection'
elif st.session_state['page'] == 'data_collection':
# Data Collection Page
st.title("Data Collection")
st.header("1. What is Data?")
st.write(
"Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. "
"It serves as the foundation for any machine learning model."
)
st.header("2. Types of Data")
data_type = st.radio(
"Select a type of data to learn more:",
("Structured", "Unstructured", "Semi-Structured")
)
if data_type == "Structured":
st.subheader("Structured Data")
st.write(
"Structured data is highly organized and easily searchable within databases. "
"It includes rows and columns, such as in relational databases."
)
st.write("Data Formats:")
format_selected = st.radio(
"Select a format to explore further:",
("Excel", "CSV")
)
if format_selected == "Excel":
# Excel Data Format Section
st.subheader("Excel Data Format")
st.write("*What is it?*")
st.write(
"Excel files are spreadsheets used to organize and analyze data in rows and columns. "
"They are widely used due to their user-friendly nature and support for various data types."
)
st.write("*How to Read Excel Files?*")
st.code(
"""
import pandas as pd
# Reading an Excel file
df = pd.read_excel('file.xlsx')
print(df.head())
""",
language="python"
)
st.write("*Common Issues When Handling Excel Files*")
st.write(
"""
- Missing or corrupted files
- Version incompatibilities
- Incorrect file paths
- Handling large Excel files
"""
)
st.write("*How to Overcome These Errors/Issues?*")
st.write(
"""
- Use proper error handling with try-except.
- Convert Excel files to CSV for better compatibility.
- Use libraries like openpyxl or xlrd for specific Excel versions.
- Break large files into smaller chunks for processing.
"""
)
# Button to open Jupyter Notebook or PDF
if st.button("Open Excel Documentation"):
st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).")
elif format_selected == "CSV":
# CSV Data Format Section
st.subheader("CSV Data Format")
st.write("*What is it?*")
st.write(
"CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, "
"and fields are separated by commas."
)
st.write("*How to Read CSV Files?*")
st.code(
"""
import pandas as pd
# Reading a CSV file
df = pd.read_csv('file.csv')
print(df.head())
""",
language="python"
)
st.write("*Common Issues When Handling CSV Files*")
st.write(
"""
- Encoding issues (e.g., UTF-8, ISO-8859-1)
- Inconsistent delimiters
- Missing or corrupted files
- Large file sizes causing memory errors
"""
)
st.write("*How to Overcome These Errors/Issues?*")
st.write(
"""
- Specify the correct encoding when reading files using encoding='utf-8' or similar.
- Use libraries like csv or pandas to handle different delimiters.
- Employ error handling to catch and manage missing/corrupted files.
- Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000).
"""
)
# Button to open Jupyter Notebook or PDF
if st.button("Open CSV Documentation"):
st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).") |