Spaces:

ankithpatel
/

zero_to_hero_ML

Sleeping

File size: 15,043 Bytes

import streamlit as st
import pandas as pd
import numpy as np

st.markdown(f"""
<style>
    /* Set the background image for the entire app */
    .stApp {{
        background-color:rgba(96, 155, 124, 0.5);
        background-size: 1300px;
        background-repeat: no-repeat;
        background-attachment: fixed;
        background-position: center;
    }}
    
    </style>
""", unsafe_allow_html=True)

import streamlit as st

# Navigation
st.title("Life Cycle of ML")
if 'page' not in st.session_state:
    st.session_state['page'] = 'home'

# Main Navigation
if st.session_state['page'] == 'home':
    st.subheader("Explore the Life Cycle Stages")
    if st.button("Data Collection"):
        st.session_state['page'] = 'data_collection'

elif st.session_state['page'] == 'data_collection':
    # Data Collection Page
    st.title("Data Collection")
    st.header("1. What is Data?")
    st.write(
        "Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. "
        "It serves as the foundation for any machine learning model."
    )
    
    st.header("2. Types of Data")
    data_type = st.radio(
        "Select a type of data to learn more:",
        ("Structured", "Unstructured", "Semi-Structured")
    )
    
    if data_type == "Structured":
        st.subheader("Structured Data")
        st.write(
            "Structured data is highly organized and easily searchable within databases. "
            "It includes rows and columns, such as in relational databases."
        )
        
        st.write("Data Formats:")
        format_selected = st.radio(
            "Select a format to explore further:",
            ("Excel", "CSV")
        )
        
        if format_selected == "Excel":
            # Excel Data Format Section
            st.subheader("Excel Data Format")
            st.write("*What is it?*")
            st.write(
                "Excel files are spreadsheets used to organize and analyze data in rows and columns. "
                "They are widely used due to their user-friendly nature and support for various data types."
            )
            
            st.write("*How to Read Excel Files?*")
            st.code(
                """
import pandas as pd
# Reading an Excel file
df = pd.read_excel('file.xlsx')
print(df.head())
                """,
                language="python"
            )
            
            st.write("*Common Issues When Handling Excel Files*")
            st.write(
                """
- Missing or corrupted files
- Version incompatibilities
- Incorrect file paths
- Handling large Excel files
                """
            )
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Use proper error handling with try-except.
- Convert Excel files to CSV for better compatibility.
- Use libraries like openpyxl or xlrd for specific Excel versions.
- Break large files into smaller chunks for processing.
                """
            )
            
            # Button to open Jupyter Notebook or PDF
            if st.button("Open Excel Documentation"):
                st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).")

        elif format_selected == "CSV":
            # CSV Data Format Section
            st.subheader("CSV Data Format")
            st.write("*What is it?*")
            st.write(
                "CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, "
                "and fields are separated by commas."
            )
            
            st.write("*How to Read CSV Files?*")
            st.code(
                """
import pandas as pd
# Reading a CSV file
df = pd.read_csv('file.csv')
print(df.head())
                """,
                language="python"
            )
            
            st.write("*Common Issues When Handling CSV Files*")
            st.write(
                """
- Encoding issues (e.g., UTF-8, ISO-8859-1)
- Inconsistent delimiters
- Missing or corrupted files
- Large file sizes causing memory errors
                """
            )
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Specify the correct encoding when reading files using encoding='utf-8' or similar.
- Use libraries like csv or pandas to handle different delimiters.
- Employ error handling to catch and manage missing/corrupted files.
- Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000).
                """
            )
            
            # Button to open Jupyter Notebook or PDF
            if st.button("Open CSV Documentation"):
                st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).")



    elif data_type == "Unstructured":
        
        st.subheader("Unstructured Data")
        
        st.write(
            "Unstructured data refers to information that lacks a predefined format or organization, making it challenging to analyze using traditional tools." 
            "Examples include text, images, videos, audio, and social media posts."
        )
        
        st.write("Data Formats:")
        format_selected = st.radio(
            "Select a format to explore further:",
            ("IMAGE","VIDEO", "AUDIO")
        )

        #HOW TO READ TEXT
        if format_selected == "IMAGE":
            
            st.subheader("IMAGE Data Format")
            st.write("*What is it?*")
            st.write(
                "Photos, medical scans, satellite images. "
                )
            
            
            st.write("*How to Read IMAGE Files?*")
            st.code(
                
                """
            
from PIL import Image
image = Image.open('example.jpg')
image.show()
                """,
                 language="python"
                
               
            )




                


            
            
            st.write("*Common Issues When Handling image Files*")
            
            st.write(
                """
- data augumentation and overfitting
- image processing challenges
- Data Imbalance
- High Dimensionality
                """
            )
            
            
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Data Augumentaion.
- Consistent image processing
- Handling Class Imbalance.
- Dimensionality Reduction and Feature Extraction
                """
            )
            # Button to open Jupyter Notebook or PDF
            if st.button("Open IMAGE Documentation"):
                st.write("Download the [documentation notebook](path/to/image_notebook.ipynb) or [PDF](path/to/image_documentation.pdf).")


        elif format_selected == "VIDEO":
            
            st.subheader("VIDEO Data Format")
            st.write("*What is it?*")
            st.write(
                "PNG,GIF,BNP,RAW videos,TIFF "
                )
            
            
            st.write("*How to Read VIDEO Files?*")
            st.code(
                
                """
            

pip install opencv-python
import cv2

# Open the video file
video_path = 'path_to_your_video.mp4'
cap = cv2.VideoCapture(video_path)
                """,
                 language="python"
                
               
            )




                


            
            
            st.write("*Common Issues When Handling video Files*")
            
            st.write(
                """
- File not found or Corrupted.
- Incompatible Codec or Format.
- Performance Issues with Large Videos.
- Frame Dropping or Skipping.
                """
            )
            
            
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
-  Ensure Correct File Path and Handle Corrupted Files.
- Install Missing Codecs or Use Supported Formats.
- Optimize Performance for Large Videos
- Control Frame Rate and Prevent Skipping
                """
            )
            # Button to open Jupyter Notebook or PDF
            if st.button("Open VIDEOS Documentation"):
                st.write("Download the [documentation notebook](path/to/videos_notebook.ipynb) or [PDF](path/to/videos_documentation.pdf).")

        elif format_selected == "AUDIO":
            
            st.subheader("AUDIO Data Format")
            st.write("*What is it?*")
            st.write(
                "MP3,WAV,FLAC,AAC,OGG "
                )
            
            
            st.write("*How to Read AUDIO Files?*")
            st.code(
                
                """
            

pip install librosa
import librosa

# Load the audio file
audio_path = 'path_to_audio_file.wav'
y, sr = librosa.load(audio_path, sr=None)  # sr=None to preserve the original sampling rate
                """,
                 language="python"
                
               
            )




                


            
            
            st.write("*Common Issues When Handling audio Files*")
            
            st.write(
                """
- File not found or Corrupted.
- Incompatible Codec or Format.
- Memory Overload or Performance Issues with Large Audios.
-  Encoding or File Corruption Issues
                """
            )
            
            
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
-  File Not Found or Corrupted: Always check if the file exists before attempting to load it. Handle errors gracefully with try-except.
- Incompatible Format or Codec: Use pydub or ffmpeg to handle multiple formats, or convert the file to a more compatible format.
- Memory Overload or Performance Issues: Process the audio in chunks or downsample large files to reduce memory consumption.
- Encoding or File Corruption Issues: Ensure proper encoding and re-encode files using tools like ffmpeg if necessary.
                """
            )
            # Button to open Jupyter Notebook or PDF
            if st.button("Open AUDIO Documentation"):
                st.write("Download the [documentation notebook](path/to/audio_notebook.ipynb) or [PDF](path/to/audio_documentation.pdf).")


    elif data_type == "Semi-Structured":
        
        st.subheader("Semi-structured Data")
        
        st.write(
            "Semi-structured data is data that doesn’t fit into a rigid structure like relational databases but has some organizational properties, such as tags or key-value pairs, making it easier to analyze.")
        
        st.write("Data Formats:")
        format_selected = st.radio(
            "Select a format to explore further:",
            ("JSON","XML")
        )
               
        #HOW TO READ TEXT
        if format_selected == "JSON":
            
            st.subheader("JSON Data Format")
            st.write("*What is it?*")
            st.write(
                "JSON is a lightweight data-interchange format that uses key-value pairs. It is commonly used in web services and APIs for exchanging data. "
                )
            
            
            st.write("*How to Read JSON Files?*")
            st.code(
                
                """
            
import json

# Open and read the JSON file
with open('data.json', 'r') as file:
    data = json.load(file)
                """,
                
                 language="python"
                
               
            )




                


            
            
            st.write("*Common Issues When Handling json Files*")
            
            st.write(
                """
- File Encoding Issues
-  Invalid JSON Syntax
- Large JSON Files Causing Memory Issues
- Inconsistent Data Structure
                """
            )
            
            
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
- Validate JSON Syntax: Use tools like JSONLint or json.decoder.JSONDecodeError in Python to ensure valid JSON format.

- Handle Encoding: Specify the encoding when opening the file in Python (e.g., open('file.json', 'r', encoding='utf-8')).

- Use Chunking or Streaming for Large Files: For large JSON files, load the file in chunks or use libraries that support JSON streaming like ijson or jsonlines.

- Consistent Structure: Ensure consistent data structure when creating JSON files, or write code to handle missing or extra fields gracefully.
                """
            )
            # Button to open Jupyter Notebook or PDF
            if st.button("Open JSON Documentation"):
                st.write("Download the [documentation notebook](path/to/JSON_notebook.ipynb) or [PDF](path/to/JSON_documentation.pdf).")


        elif format_selected == "XML":
            
            st.subheader("XML Data Format")
            st.write("*What is it?*")
            st.write(
                "XML is a flexible, structured data format used to store and transport data, utilizing tags to define elements, attributes, and hierarchical relationships between different pieces of information. "
                )
            
            
            st.write("*How to Read XML Files?*")
            st.code(
                
                """
            
import pandas as pd
pd.read_xml("Data_path")
                """ ,
                
                 language="python"
                
            )    
                
                
                
               
            




                


            
            
            st.write("*Common Issues When Handling XML Files*")
            
            st.write(
                """
-  Invalid XML Syntax.
- Encoding Issues.
-  Large XML Files.
- Inconsistent Structure.
                """
            )
            
            
            
            st.write("*How to Overcome These Errors/Issues?*")
            st.write(
                """
-  Validate XML Syntax: Use XML validators and try-except blocks to catch and fix syntax errors during parsing.
- Handle Encoding Issues: Specify the encoding when reading files and use libraries like chardet to detect encoding automatically.
- Process Large Files Efficiently: Use streaming parsers (e.g., iterparse()) and iterative parsing to handle large files without consuming too much memory.
- Ensure Consistent Structure: Check for missing elements before accessing them and handle inconsistencies with default values or conditional logic.



                """
            )
            # Button to open Jupyter Notebook or PDF
            if st.button("Open XML Documentation"):
                st.write("Download the [documentation notebook](path/to/XML_notebook.ipynb) or [PDF](path/to/XML_documentation.pdf).")