Spaces:

ankithpatel
/

zero_to_hero_ML

Sleeping

File size: 4,694 Bytes

1b8fed2

import streamlit as st

def background():
    st.markdown(f"""
    <style>
        /* Set the background image for the entire app */
        .stApp {{
            background-color:rgba(96, 155, 124, 0.5);
            background-size: 1300px;
            background-repeat: no-repeat;
            background-attachment: fixed;
            background-position: center;
        }}
        
        </style>
    """, unsafe_allow_html=True)
def page2():
    background()
    st.title("Data Collection")
    st.header("1. What is Data?")
    st.write(
        "Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. "
        "It serves as the foundation for any machine learning model."
    )
    
    st.header("2. Types of Data")
    data_type = st.radio(
        "Select a type of data to learn more:",
        ("Structured", "Unstructured", "Semi-Structured")
    )
    
    if data_type == "Structured":
        st.subheader("Structured Data")
        st.write(
            "Structured data is highly organized and easily searchable within databases. "
            "It includes rows and columns, such as in relational databases."
        )
        
        st.write("Data Formats:")
        format_selected = st.radio(
            "Select a format to explore further:",
            ("Excel", "CSV")
        )
        
        if format_selected == "Excel":
            # Excel Data Format Section
            st.subheader("Excel Data Format")
            st.write("*What is it?*")
            st.write(
                "Excel files are spreadsheets used to organize and analyze data in rows and columns. "
                "They are widely used due to their user-friendly nature and support for various data types."
            )
            
            st.write("*How to Read Excel Files?*")
            st.code(
                """
import pandas as pd
# Reading an Excel file
df = pd.read_excel('file.xlsx')
print(df.head())
                """,
                language="python"
            )
            
            st.write("*Common Issues When Handling Excel Files*")
            st.write(
                """
- Missing or corrupted files
- Version incompatibilities
- Incorrect file paths
- Handling large Excel files
                """
            )
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Use proper error handling with try-except.
- Convert Excel files to CSV for better compatibility.
- Use libraries like openpyxl or xlrd for specific Excel versions.
- Break large files into smaller chunks for processing.
                """
            )
            
            # Button to open Jupyter Notebook or PDF
            if st.button("Open Excel Documentation"):
                st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).")

        elif format_selected == "CSV":
            # CSV Data Format Section
            st.subheader("CSV Data Format")
            st.write("*What is it?*")
            st.write(
                "CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, "
                "and fields are separated by commas."
            )
            
            st.write("*How to Read CSV Files?*")
            st.code(
                """
import pandas as pd
# Reading a CSV file
df = pd.read_csv('file.csv')
print(df.head())
                """,
                language="python"
            )
            
            st.write("*Common Issues When Handling CSV Files*")
            st.write(
                """
- Encoding issues (e.g., UTF-8, ISO-8859-1)
- Inconsistent delimiters
- Missing or corrupted files
- Large file sizes causing memory errors
                """
            )
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Specify the correct encoding when reading files using encoding='utf-8' or similar.
- Use libraries like csv or pandas to handle different delimiters.
- Employ error handling to catch and manage missing/corrupted files.
- Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000).
                """
            )
            
            # Button to open Jupyter Notebook or PDF
            if st.button("Open CSV Documentation"):
                st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).")

    if st.button("Go to Home Page"):
        st.session_state.page = 'Page1'
        st.experimental_rerun()