import streamlit as st import pandas as pd import numpy as np st.markdown(f""" """, unsafe_allow_html=True) import streamlit as st # Navigation st.title("Life Cycle of ML") if 'page' not in st.session_state: st.session_state['page'] = 'home' # Main Navigation if st.session_state['page'] == 'home': st.subheader("Explore the Life Cycle Stages") if st.button("Data Collection"): st.session_state['page'] = 'data_collection' elif st.session_state['page'] == 'data_collection': # Data Collection Page st.title("Data Collection") st.header("1. What is Data?") st.write( "Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. " "It serves as the foundation for any machine learning model." ) st.header("2. Types of Data") data_type = st.radio( "Select a type of data to learn more:", ("Structured", "Unstructured", "Semi-Structured") ) if data_type == "Structured": st.subheader("Structured Data") st.write( "Structured data is highly organized and easily searchable within databases. " "It includes rows and columns, such as in relational databases." ) st.write("Data Formats:") format_selected = st.radio( "Select a format to explore further:", ("Excel", "CSV") ) if format_selected == "Excel": # Excel Data Format Section st.subheader("Excel Data Format") st.write("*What is it?*") st.write( "Excel files are spreadsheets used to organize and analyze data in rows and columns. " "They are widely used due to their user-friendly nature and support for various data types." ) st.write("*How to Read Excel Files?*") st.code( """ import pandas as pd # Reading an Excel file df = pd.read_excel('file.xlsx') print(df.head()) """, language="python" ) st.write("*Common Issues When Handling Excel Files*") st.write( """ - Missing or corrupted files - Version incompatibilities - Incorrect file paths - Handling large Excel files """ ) st.write("*How to Overcome These Errors/Issues?*") st.write( """ - Use proper error handling with try-except. - Convert Excel files to CSV for better compatibility. - Use libraries like openpyxl or xlrd for specific Excel versions. - Break large files into smaller chunks for processing. """ ) # Button to open Jupyter Notebook or PDF if st.button("Open Excel Documentation"): st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).") elif format_selected == "CSV": # CSV Data Format Section st.subheader("CSV Data Format") st.write("*What is it?*") st.write( "CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, " "and fields are separated by commas." ) st.write("*How to Read CSV Files?*") st.code( """ import pandas as pd # Reading a CSV file df = pd.read_csv('file.csv') print(df.head()) """, language="python" ) st.write("*Common Issues When Handling CSV Files*") st.write( """ - Encoding issues (e.g., UTF-8, ISO-8859-1) - Inconsistent delimiters - Missing or corrupted files - Large file sizes causing memory errors """ ) st.write("*How to Overcome These Errors/Issues?*") st.write( """ - Specify the correct encoding when reading files using encoding='utf-8' or similar. - Use libraries like csv or pandas to handle different delimiters. - Employ error handling to catch and manage missing/corrupted files. - Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000). """ ) # Button to open Jupyter Notebook or PDF if st.button("Open CSV Documentation"): st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).")