Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
st.markdown(f""" | |
<style> | |
/* Set the background image for the entire app */ | |
.stApp {{ | |
background-color:rgba(0,0,0, 0.5); | |
background-size: 1300px; | |
background-repeat: no-repeat; | |
background-attachment: fixed; | |
background-position: center; | |
}} | |
</style> | |
""", unsafe_allow_html=True) | |
import streamlit as st | |
# Navigation | |
st.title("Life Cycle of ML") | |
if 'page' not in st.session_state: | |
st.session_state['page'] = 'home' | |
# Main Navigation | |
if st.session_state['page'] == 'home': | |
st.subheader("Explore the Life Cycle Stages") | |
if st.button("Data Collection"): | |
st.session_state['page'] = 'data_collection' | |
elif st.session_state['page'] == 'data_collection': | |
# Data Collection Page | |
st.title("Data Collection") | |
st.header("1. What is Data?") | |
st.write( | |
"Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. " | |
"It serves as the foundation for any machine learning model." | |
) | |
st.header("2. Types of Data") | |
data_type = st.radio( | |
"Select a type of data to learn more:", | |
("Structured", "Unstructured", "Semi-Structured") | |
) | |
if data_type == "Structured": | |
st.subheader("Structured Data") | |
st.write( | |
"Structured data is highly organized and easily searchable within databases. " | |
"It includes rows and columns, such as in relational databases." | |
) | |
st.write("Data Formats:") | |
format_selected = st.radio( | |
"Select a format to explore further:", | |
("Excel", "CSV") | |
) | |
if format_selected == "Excel": | |
# Excel Data Format Section | |
st.subheader("Excel Data Format") | |
st.write("*What is it?*") | |
st.write( | |
"Excel files are spreadsheets used to organize and analyze data in rows and columns. " | |
"They are widely used due to their user-friendly nature and support for various data types." | |
) | |
st.write("*How to Read Excel Files?*") | |
st.code( | |
""" | |
import pandas as pd | |
# Reading an Excel file | |
df = pd.read_excel('file.xlsx') | |
print(df.head()) | |
""", | |
language="python" | |
) | |
st.write("*Common Issues When Handling Excel Files*") | |
st.write( | |
""" | |
- Missing or corrupted files | |
- Version incompatibilities | |
- Incorrect file paths | |
- Handling large Excel files | |
""" | |
) | |
st.write("*How to Overcome These Errors/Issues?*") | |
st.write( | |
""" | |
- Use proper error handling with try-except. | |
- Convert Excel files to CSV for better compatibility. | |
- Use libraries like openpyxl or xlrd for specific Excel versions. | |
- Break large files into smaller chunks for processing. | |
""" | |
) | |
# Button to open Jupyter Notebook or PDF | |
if st.button("Open Excel Documentation"): | |
st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).") | |
elif format_selected == "CSV": | |
# CSV Data Format Section | |
st.subheader("CSV Data Format") | |
st.write("*What is it?*") | |
st.write( | |
"CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, " | |
"and fields are separated by commas." | |
) | |
st.write("*How to Read CSV Files?*") | |
st.code( | |
""" | |
import pandas as pd | |
# Reading a CSV file | |
df = pd.read_csv('file.csv') | |
print(df.head()) | |
""", | |
language="python" | |
) | |
st.write("*Common Issues When Handling CSV Files*") | |
st.write( | |
""" | |
- Encoding issues (e.g., UTF-8, ISO-8859-1) | |
- Inconsistent delimiters | |
- Missing or corrupted files | |
- Large file sizes causing memory errors | |
""" | |
) | |
st.write("*How to Overcome These Errors/Issues?*") | |
st.write( | |
""" | |
- Specify the correct encoding when reading files using encoding='utf-8' or similar. | |
- Use libraries like csv or pandas to handle different delimiters. | |
- Employ error handling to catch and manage missing/corrupted files. | |
- Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000). | |
""" | |
) | |
# Button to open Jupyter Notebook or PDF | |
if st.button("Open CSV Documentation"): | |
st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).") |