Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
st.markdown(f""" | |
<style> | |
/* Set the background image for the entire app */ | |
.stApp {{ | |
background-color:rgba(96, 155, 124, 0.5); | |
background-size: 1300px; | |
background-repeat: no-repeat; | |
background-attachment: fixed; | |
background-position: center; | |
}} | |
</style> | |
""", unsafe_allow_html=True) | |
import streamlit as st | |
# Navigation | |
st.title("Life Cycle of ML") | |
if 'page' not in st.session_state: | |
st.session_state['page'] = 'home' | |
# Main Navigation | |
if st.session_state['page'] == 'home': | |
st.subheader("Explore the Life Cycle Stages") | |
if st.button("Data Collection"): | |
st.session_state['page'] = 'data_collection' | |
elif st.session_state['page'] == 'data_collection': | |
# Data Collection Page | |
st.title("Data Collection") | |
st.header("1. What is Data?") | |
st.write( | |
"Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. " | |
"It serves as the foundation for any machine learning model." | |
) | |
st.header("2. Types of Data") | |
data_type = st.radio( | |
"Select a type of data to learn more:", | |
("Structured", "Unstructured", "Semi-Structured") | |
) | |
if data_type == "Structured": | |
st.subheader("Structured Data") | |
st.write( | |
"Structured data is highly organized and easily searchable within databases. " | |
"It includes rows and columns, such as in relational databases." | |
) | |
st.write("Data Formats:") | |
format_selected = st.radio( | |
"Select a format to explore further:", | |
("Excel", "CSV") | |
) | |
if format_selected == "Excel": | |
# Excel Data Format Section | |
st.subheader("Excel Data Format") | |
st.write("*What is it?*") | |
st.write( | |
"Excel files are spreadsheets used to organize and analyze data in rows and columns. " | |
"They are widely used due to their user-friendly nature and support for various data types." | |
) | |
st.write("*How to Read Excel Files?*") | |
st.code( | |
""" | |
import pandas as pd | |
# Reading an Excel file | |
df = pd.read_excel('file.xlsx') | |
print(df.head()) | |
""", | |
language="python" | |
) | |
st.write("*Common Issues When Handling Excel Files*") | |
st.write( | |
""" | |
- Missing or corrupted files | |
- Version incompatibilities | |
- Incorrect file paths | |
- Handling large Excel files | |
""" | |
) | |
st.write("*How to Overcome These Errors/Issues?*") | |
st.write( | |
""" | |
- Use proper error handling with try-except. | |
- Convert Excel files to CSV for better compatibility. | |
- Use libraries like openpyxl or xlrd for specific Excel versions. | |
- Break large files into smaller chunks for processing. | |
""" | |
) | |
# Button to open Jupyter Notebook or PDF | |
if st.button("Open Excel Documentation"): | |
st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).") | |
elif format_selected == "CSV": | |
# CSV Data Format Section | |
st.subheader("CSV Data Format") | |
st.write("*What is it?*") | |
st.write( | |
"CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, " | |
"and fields are separated by commas." | |
) | |
st.write("*How to Read CSV Files?*") | |
st.code( | |
""" | |
import pandas as pd | |
# Reading a CSV file | |
df = pd.read_csv('file.csv') | |
print(df.head()) | |
""", | |
language="python" | |
) | |
st.write("*Common Issues When Handling CSV Files*") | |
st.write( | |
""" | |
- Encoding issues (e.g., UTF-8, ISO-8859-1) | |
- Inconsistent delimiters | |
- Missing or corrupted files | |
- Large file sizes causing memory errors | |
""" | |
) | |
st.write("*How to Overcome These Errors/Issues?*") | |
st.write( | |
""" | |
- Specify the correct encoding when reading files using encoding='utf-8' or similar. | |
- Use libraries like csv or pandas to handle different delimiters. | |
- Employ error handling to catch and manage missing/corrupted files. | |
- Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000). | |
""" | |
) | |
# Button to open Jupyter Notebook or PDF | |
if st.button("Open CSV Documentation"): | |
st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).") | |
elif data_type == "Unstructured": | |
st.subheader("Unstructured Data") | |
st.write( | |
"Unstructured data refers to information that lacks a predefined format or organization, making it challenging to analyze using traditional tools." | |
"Examples include text, images, videos, audio, and social media posts." | |
) | |
st.write("Data Formats:") | |
format_selected = st.radio( | |
"Select a format to explore further:", | |
("IMAGE","VIDEO", "AUDIO") | |
) | |
#HOW TO READ TEXT | |
if format_selected == "IMAGE": | |
st.subheader("IMAGE Data Format") | |
st.write("*What is it?*") | |
st.write( | |
"Photos, medical scans, satellite images. " | |
) | |
st.write("*How to Read IMAGE Files?*") | |
st.code( | |
""" | |
from PIL import Image | |
image = Image.open('example.jpg') | |
image.show() | |
""", | |
language="python" | |
) | |
st.write("*Common Issues When Handling image Files*") | |
st.write( | |
""" | |
- data augumentation and overfitting | |
- image processing challenges | |
- Data Imbalance | |
- High Dimensionality | |
""" | |
) | |
st.write("*How to Overcome These Errors/Issues?*") | |
st.write( | |
""" | |
- Data Augumentaion. | |
- Consistent image processing | |
- Handling Class Imbalance. | |
- Dimensionality Reduction and Feature Extraction | |
""" | |
) | |
# Button to open Jupyter Notebook or PDF | |
if st.button("Open IMAGE Documentation"): | |
st.write("Download the [documentation notebook](path/to/image_notebook.ipynb) or [PDF](path/to/image_documentation.pdf).") | |
elif format_selected == "VIDEO": | |
st.subheader("VIDEO Data Format") | |
st.write("*What is it?*") | |
st.write( | |
"PNG,GIF,BNP,RAW videos,TIFF " | |
) | |
st.write("*How to Read VIDEO Files?*") | |
st.code( | |
""" | |
pip install opencv-python | |
import cv2 | |
# Open the video file | |
video_path = 'path_to_your_video.mp4' | |
cap = cv2.VideoCapture(video_path) | |
""", | |
language="python" | |
) | |
st.write("*Common Issues When Handling video Files*") | |
st.write( | |
""" | |
- File not found or Corrupted. | |
- Incompatible Codec or Format. | |
- Performance Issues with Large Videos. | |
- Frame Dropping or Skipping. | |
""" | |
) | |
st.write("*How to Overcome These Errors/Issues?*") | |
st.write( | |
""" | |
- Ensure Correct File Path and Handle Corrupted Files. | |
- Install Missing Codecs or Use Supported Formats. | |
- Optimize Performance for Large Videos | |
- Control Frame Rate and Prevent Skipping | |
""" | |
) | |
# Button to open Jupyter Notebook or PDF | |
if st.button("Open VIDEOS Documentation"): | |
st.write("Download the [documentation notebook](path/to/videos_notebook.ipynb) or [PDF](path/to/videos_documentation.pdf).") | |
elif format_selected == "AUDIO": | |
st.subheader("AUDIO Data Format") | |
st.write("*What is it?*") | |
st.write( | |
"MP3,WAV,FLAC,AAC,OGG " | |
) | |
st.write("*How to Read AUDIO Files?*") | |
st.code( | |
""" | |
pip install librosa | |
import librosa | |
# Load the audio file | |
audio_path = 'path_to_audio_file.wav' | |
y, sr = librosa.load(audio_path, sr=None) # sr=None to preserve the original sampling rate | |
""", | |
language="python" | |
) | |
st.write("*Common Issues When Handling audio Files*") | |
st.write( | |
""" | |
- File not found or Corrupted. | |
- Incompatible Codec or Format. | |
- Memory Overload or Performance Issues with Large Audios. | |
- Encoding or File Corruption Issues | |
""" | |
) | |
st.write("*How to Overcome These Errors/Issues?*") | |
st.write( | |
""" | |
- File Not Found or Corrupted: Always check if the file exists before attempting to load it. Handle errors gracefully with try-except. | |
- Incompatible Format or Codec: Use pydub or ffmpeg to handle multiple formats, or convert the file to a more compatible format. | |
- Memory Overload or Performance Issues: Process the audio in chunks or downsample large files to reduce memory consumption. | |
- Encoding or File Corruption Issues: Ensure proper encoding and re-encode files using tools like ffmpeg if necessary. | |
""" | |
) | |
# Button to open Jupyter Notebook or PDF | |
if st.button("Open AUDIO Documentation"): | |
st.write("Download the [documentation notebook](path/to/audio_notebook.ipynb) or [PDF](path/to/audio_documentation.pdf).") | |
elif data_type == "Semi-Structured": | |
st.subheader("Semi-structured Data") | |
st.write( | |
"Semi-structured data is data that doesn’t fit into a rigid structure like relational databases but has some organizational properties, such as tags or key-value pairs, making it easier to analyze.") | |
st.write("Data Formats:") | |
format_selected = st.radio( | |
"Select a format to explore further:", | |
("JSON","XML") | |
) | |
#HOW TO READ TEXT | |
if format_selected == "JSON": | |
st.subheader("JSON Data Format") | |
st.write("*What is it?*") | |
st.write( | |
"JSON is a lightweight data-interchange format that uses key-value pairs. It is commonly used in web services and APIs for exchanging data. " | |
) | |
st.write("*How to Read JSON Files?*") | |
st.code( | |
""" | |
import json | |
# Open and read the JSON file | |
with open('data.json', 'r') as file: | |
data = json.load(file) | |
""", | |
language="python" | |
) | |
st.write("*Common Issues When Handling json Files*") | |
st.write( | |
""" | |
- File Encoding Issues | |
- Invalid JSON Syntax | |
- Large JSON Files Causing Memory Issues | |
- Inconsistent Data Structure | |
""" | |
) | |
st.write("*How to Overcome These Errors/Issues?*") | |
st.write( | |
""" | |
- Validate JSON Syntax: Use tools like JSONLint or json.decoder.JSONDecodeError in Python to ensure valid JSON format. | |
- Handle Encoding: Specify the encoding when opening the file in Python (e.g., open('file.json', 'r', encoding='utf-8')). | |
- Use Chunking or Streaming for Large Files: For large JSON files, load the file in chunks or use libraries that support JSON streaming like ijson or jsonlines. | |
- Consistent Structure: Ensure consistent data structure when creating JSON files, or write code to handle missing or extra fields gracefully. | |
""" | |
) | |
# Button to open Jupyter Notebook or PDF | |
if st.button("Open JSON Documentation"): | |
st.write("Download the [documentation notebook](path/to/JSON_notebook.ipynb) or [PDF](path/to/JSON_documentation.pdf).") | |
elif format_selected == "XML": | |
st.subheader("XML Data Format") | |
st.write("*What is it?*") | |
st.write( | |
"XML is a flexible, structured data format used to store and transport data, utilizing tags to define elements, attributes, and hierarchical relationships between different pieces of information. " | |
) | |
st.write("*How to Read XML Files?*") | |
st.code( | |
""" | |
import pandas as pd | |
pd.read_xml("Data_path") | |
""" , | |
language="python" | |
) | |
st.write("*Common Issues When Handling XML Files*") | |
st.write( | |
""" | |
- Invalid XML Syntax. | |
- Encoding Issues. | |
- Large XML Files. | |
- Inconsistent Structure. | |
""" | |
) | |
st.write("*How to Overcome These Errors/Issues?*") | |
st.write( | |
""" | |
- Validate XML Syntax: Use XML validators and try-except blocks to catch and fix syntax errors during parsing. | |
- Handle Encoding Issues: Specify the encoding when reading files and use libraries like chardet to detect encoding automatically. | |
- Process Large Files Efficiently: Use streaming parsers (e.g., iterparse()) and iterative parsing to handle large files without consuming too much memory. | |
- Ensure Consistent Structure: Check for missing elements before accessing them and handle inconsistencies with default values or conditional logic. | |
""" | |
) | |
# Button to open Jupyter Notebook or PDF | |
if st.button("Open XML Documentation"): | |
st.write("Download the [documentation notebook](path/to/XML_notebook.ipynb) or [PDF](path/to/XML_documentation.pdf).") | |