Spaces:

ankithpatel
/

zero_to_hero_ML

Sleeping

App Files Files Community

zero_to_hero_ML / pages /Lifecycle of Machine Learning.py

ankithpatel

Update pages/Lifecycle of Machine Learning.py

0e851e5 verified 5 months ago

raw

history blame contribute delete

15 kB

	import streamlit as st
	import pandas as pd
	import numpy as np

	st.markdown(f"""
	<style>
	/* Set the background image for the entire app */
	.stApp {{
	background-color:rgba(96, 155, 124, 0.5);
	background-size: 1300px;
	background-repeat: no-repeat;
	background-attachment: fixed;
	background-position: center;
	}}

	</style>
	""", unsafe_allow_html=True)

	import streamlit as st

	# Navigation
	st.title("Life Cycle of ML")
	if 'page' not in st.session_state:
	st.session_state['page'] = 'home'

	# Main Navigation
	if st.session_state['page'] == 'home':
	st.subheader("Explore the Life Cycle Stages")
	if st.button("Data Collection"):
	st.session_state['page'] = 'data_collection'

	elif st.session_state['page'] == 'data_collection':
	# Data Collection Page
	st.title("Data Collection")
	st.header("1. What is Data?")
	st.write(
	"Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. "
	"It serves as the foundation for any machine learning model."
	)

	st.header("2. Types of Data")
	data_type = st.radio(
	"Select a type of data to learn more:",
	("Structured", "Unstructured", "Semi-Structured")
	)

	if data_type == "Structured":
	st.subheader("Structured Data")
	st.write(
	"Structured data is highly organized and easily searchable within databases. "
	"It includes rows and columns, such as in relational databases."
	)

	st.write("Data Formats:")
	format_selected = st.radio(
	"Select a format to explore further:",
	("Excel", "CSV")
	)

	if format_selected == "Excel":
	# Excel Data Format Section
	st.subheader("Excel Data Format")
	st.write("What is it?")
	st.write(
	"Excel files are spreadsheets used to organize and analyze data in rows and columns. "
	"They are widely used due to their user-friendly nature and support for various data types."
	)

	st.write("How to Read Excel Files?")
	st.code(
	"""
	import pandas as pd
	# Reading an Excel file
	df = pd.read_excel('file.xlsx')
	print(df.head())
	""",
	language="python"
	)

	st.write("Common Issues When Handling Excel Files")
	st.write(
	"""
	- Missing or corrupted files
	- Version incompatibilities
	- Incorrect file paths
	- Handling large Excel files
	"""
	)

	st.write("How to Overcome These Errors/Issues?")
	st.write(
	"""
	- Use proper error handling with try-except.
	- Convert Excel files to CSV for better compatibility.
	- Use libraries like openpyxl or xlrd for specific Excel versions.
	- Break large files into smaller chunks for processing.
	"""
	)

	# Button to open Jupyter Notebook or PDF
	if st.button("Open Excel Documentation"):
	st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).")

	elif format_selected == "CSV":
	# CSV Data Format Section
	st.subheader("CSV Data Format")
	st.write("What is it?")
	st.write(
	"CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, "
	"and fields are separated by commas."
	)

	st.write("How to Read CSV Files?")
	st.code(
	"""
	import pandas as pd
	# Reading a CSV file
	df = pd.read_csv('file.csv')
	print(df.head())
	""",
	language="python"
	)

	st.write("Common Issues When Handling CSV Files")
	st.write(
	"""
	- Encoding issues (e.g., UTF-8, ISO-8859-1)
	- Inconsistent delimiters
	- Missing or corrupted files
	- Large file sizes causing memory errors
	"""
	)

	st.write("How to Overcome These Errors/Issues?")
	st.write(
	"""
	- Specify the correct encoding when reading files using encoding='utf-8' or similar.
	- Use libraries like csv or pandas to handle different delimiters.
	- Employ error handling to catch and manage missing/corrupted files.
	- Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000).
	"""
	)

	# Button to open Jupyter Notebook or PDF
	if st.button("Open CSV Documentation"):
	st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).")



	elif data_type == "Unstructured":

	st.subheader("Unstructured Data")

	st.write(
	"Unstructured data refers to information that lacks a predefined format or organization, making it challenging to analyze using traditional tools."
	"Examples include text, images, videos, audio, and social media posts."
	)

	st.write("Data Formats:")
	format_selected = st.radio(
	"Select a format to explore further:",
	("IMAGE","VIDEO", "AUDIO")
	)

	#HOW TO READ TEXT
	if format_selected == "IMAGE":

	st.subheader("IMAGE Data Format")
	st.write("What is it?")
	st.write(
	"Photos, medical scans, satellite images. "
	)


	st.write("How to Read IMAGE Files?")
	st.code(

	"""

	from PIL import Image
	image = Image.open('example.jpg')
	image.show()
	""",
	language="python"


	)









	st.write("Common Issues When Handling image Files")

	st.write(
	"""
	- data augumentation and overfitting
	- image processing challenges
	- Data Imbalance
	- High Dimensionality
	"""
	)



	st.write("How to Overcome These Errors/Issues?")
	st.write(
	"""
	- Data Augumentaion.
	- Consistent image processing
	- Handling Class Imbalance.
	- Dimensionality Reduction and Feature Extraction
	"""
	)
	# Button to open Jupyter Notebook or PDF
	if st.button("Open IMAGE Documentation"):
	st.write("Download the [documentation notebook](path/to/image_notebook.ipynb) or [PDF](path/to/image_documentation.pdf).")


	elif format_selected == "VIDEO":

	st.subheader("VIDEO Data Format")
	st.write("What is it?")
	st.write(
	"PNG,GIF,BNP,RAW videos,TIFF "
	)


	st.write("How to Read VIDEO Files?")
	st.code(

	"""


	pip install opencv-python
	import cv2

	# Open the video file
	video_path = 'path_to_your_video.mp4'
	cap = cv2.VideoCapture(video_path)
	""",
	language="python"


	)









	st.write("Common Issues When Handling video Files")

	st.write(
	"""
	- File not found or Corrupted.
	- Incompatible Codec or Format.
	- Performance Issues with Large Videos.
	- Frame Dropping or Skipping.
	"""
	)



	st.write("How to Overcome These Errors/Issues?")
	st.write(
	"""
	- Ensure Correct File Path and Handle Corrupted Files.
	- Install Missing Codecs or Use Supported Formats.
	- Optimize Performance for Large Videos
	- Control Frame Rate and Prevent Skipping
	"""
	)
	# Button to open Jupyter Notebook or PDF
	if st.button("Open VIDEOS Documentation"):
	st.write("Download the [documentation notebook](path/to/videos_notebook.ipynb) or [PDF](path/to/videos_documentation.pdf).")

	elif format_selected == "AUDIO":

	st.subheader("AUDIO Data Format")
	st.write("What is it?")
	st.write(
	"MP3,WAV,FLAC,AAC,OGG "
	)


	st.write("How to Read AUDIO Files?")
	st.code(

	"""


	pip install librosa
	import librosa

	# Load the audio file
	audio_path = 'path_to_audio_file.wav'
	y, sr = librosa.load(audio_path, sr=None) # sr=None to preserve the original sampling rate
	""",
	language="python"


	)









	st.write("Common Issues When Handling audio Files")

	st.write(
	"""
	- File not found or Corrupted.
	- Incompatible Codec or Format.
	- Memory Overload or Performance Issues with Large Audios.
	- Encoding or File Corruption Issues
	"""
	)



	st.write("How to Overcome These Errors/Issues?")
	st.write(
	"""
	- File Not Found or Corrupted: Always check if the file exists before attempting to load it. Handle errors gracefully with try-except.
	- Incompatible Format or Codec: Use pydub or ffmpeg to handle multiple formats, or convert the file to a more compatible format.
	- Memory Overload or Performance Issues: Process the audio in chunks or downsample large files to reduce memory consumption.
	- Encoding or File Corruption Issues: Ensure proper encoding and re-encode files using tools like ffmpeg if necessary.
	"""
	)
	# Button to open Jupyter Notebook or PDF
	if st.button("Open AUDIO Documentation"):
	st.write("Download the [documentation notebook](path/to/audio_notebook.ipynb) or [PDF](path/to/audio_documentation.pdf).")


	elif data_type == "Semi-Structured":

	st.subheader("Semi-structured Data")

	st.write(
	"Semi-structured data is data that doesn’t fit into a rigid structure like relational databases but has some organizational properties, such as tags or key-value pairs, making it easier to analyze.")

	st.write("Data Formats:")
	format_selected = st.radio(
	"Select a format to explore further:",
	("JSON","XML")
	)

	#HOW TO READ TEXT
	if format_selected == "JSON":

	st.subheader("JSON Data Format")
	st.write("What is it?")
	st.write(
	"JSON is a lightweight data-interchange format that uses key-value pairs. It is commonly used in web services and APIs for exchanging data. "
	)


	st.write("How to Read JSON Files?")
	st.code(

	"""

	import json

	# Open and read the JSON file
	with open('data.json', 'r') as file:
	data = json.load(file)
	""",

	language="python"


	)









	st.write("Common Issues When Handling json Files")

	st.write(
	"""
	- File Encoding Issues
	- Invalid JSON Syntax
	- Large JSON Files Causing Memory Issues
	- Inconsistent Data Structure
	"""
	)



	st.write("How to Overcome These Errors/Issues?")
	st.write(
	"""
	- Validate JSON Syntax: Use tools like JSONLint or json.decoder.JSONDecodeError in Python to ensure valid JSON format.

	- Handle Encoding: Specify the encoding when opening the file in Python (e.g., open('file.json', 'r', encoding='utf-8')).

	- Use Chunking or Streaming for Large Files: For large JSON files, load the file in chunks or use libraries that support JSON streaming like ijson or jsonlines.

	- Consistent Structure: Ensure consistent data structure when creating JSON files, or write code to handle missing or extra fields gracefully.
	"""
	)
	# Button to open Jupyter Notebook or PDF
	if st.button("Open JSON Documentation"):
	st.write("Download the [documentation notebook](path/to/JSON_notebook.ipynb) or [PDF](path/to/JSON_documentation.pdf).")


	elif format_selected == "XML":

	st.subheader("XML Data Format")
	st.write("What is it?")
	st.write(
	"XML is a flexible, structured data format used to store and transport data, utilizing tags to define elements, attributes, and hierarchical relationships between different pieces of information. "
	)


	st.write("How to Read XML Files?")
	st.code(

	"""

	import pandas as pd
	pd.read_xml("Data_path")
	""" ,

	language="python"

	)














	st.write("Common Issues When Handling XML Files")

	st.write(
	"""
	- Invalid XML Syntax.
	- Encoding Issues.
	- Large XML Files.
	- Inconsistent Structure.
	"""
	)



	st.write("How to Overcome These Errors/Issues?")
	st.write(
	"""
	- Validate XML Syntax: Use XML validators and try-except blocks to catch and fix syntax errors during parsing.
	- Handle Encoding Issues: Specify the encoding when reading files and use libraries like chardet to detect encoding automatically.
	- Process Large Files Efficiently: Use streaming parsers (e.g., iterparse()) and iterative parsing to handle large files without consuming too much memory.
	- Ensure Consistent Structure: Check for missing elements before accessing them and handle inconsistencies with default values or conditional logic.



	"""
	)
	# Button to open Jupyter Notebook or PDF
	if st.button("Open XML Documentation"):
	st.write("Download the [documentation notebook](path/to/XML_notebook.ipynb) or [PDF](path/to/XML_documentation.pdf).")