Spaces:

ankithpatel
/

zero_to_hero_ML

Sleeping

App Files Files Community

ankithpatel commited on Dec 12, 2024

Commit

a66704a

verified ·

1 Parent(s): 11afda8

Update pages/Lifecycle of Machine Learning.py

Browse files

Files changed (1) hide show

pages/Lifecycle of Machine Learning.py +139 -47

pages/Lifecycle of Machine Learning.py CHANGED Viewed

@@ -1,54 +1,146 @@
-pip install streamlit
 import streamlit as st
-from sklearn.datasets import load_iris
-from sklearn.model_selection import train_test_split
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.metrics import accuracy_score
-# Page Title
-st.title("Machine Learning Life Cycle in Streamlit")
-# Buttons for each stage
-if st.button("1. Data Collection"):
-    st.header("Data Collection")
-    st.write("Using Iris dataset for demonstration.")
-    data = load_iris(as_frame=True)
-    st.write(data.frame.head())
-elif st.button("2. Data Preprocessing"):
-    st.header("Data Preprocessing")
-    st.write("Splitting the data into train and test sets.")
-    data = load_iris(as_frame=True)
-    X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
-    st.write(f"Train size: {len(X_train)}; Test size: {len(X_test)}")
-elif st.button("3. Model Training"):
-    st.header("Model Training")
-    st.write("Training a Random Forest Classifier.")
-    data = load_iris(as_frame=True)
-    X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
-    model = RandomForestClassifier()
-    model.fit(X_train, y_train)
-    st.write("Model trained successfully.")
-elif st.button("4. Model Evaluation"):
-    st.header("Model Evaluation")
-    st.write("Evaluating the model on the test data.")
-    data = load_iris(as_frame=True)
-    X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
-    model = RandomForestClassifier()
-    model.fit(X_train, y_train)
-    predictions = model.predict(X_test)
-    accuracy = accuracy_score(y_test, predictions)
-    st.write(f"Accuracy: {accuracy:.2f}")
-elif st.button("5. Model Deployment"):
-    st.header("Model Deployment")
-    st.write("This step involves deploying the model for usage.")
-    st.write("You can expose the model via APIs or integrate it into an application.")
-else:
-    st.write("Use the buttons above to navigate through the Machine Learning life cycle.")

 import streamlit as st
+import pandas as pd
+import numpy as np
+st.markdown(f"""
+<style>
+    /* Set the background image for the entire app */
+    .stApp {{
+        background-color:rgba(96, 155, 124, 0.5);
+        background-size: 1300px;
+        background-repeat: no-repeat;
+        background-attachment: fixed;
+        background-position: center;
+    }}
+    </style>
+""", unsafe_allow_html=True)
+import streamlit as st
+# Navigation
+st.title("Life Cycle of ML")
+if 'page' not in st.session_state:
+    st.session_state['page'] = 'home'
+# Main Navigation
+if st.session_state['page'] == 'home':
+    st.subheader("Explore the Life Cycle Stages")
+    if st.button("Data Collection"):
+        st.session_state['page'] = 'data_collection'
+elif st.session_state['page'] == 'data_collection':
+    # Data Collection Page
+    st.title("Data Collection")
+    st.header("1. What is Data?")
+    st.write(
+        "Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. "
+        "It serves as the foundation for any machine learning model."
+    )
+    st.header("2. Types of Data")
+    data_type = st.radio(
+        "Select a type of data to learn more:",
+        ("Structured", "Unstructured", "Semi-Structured")
+    )
+    if data_type == "Structured":
+        st.subheader("Structured Data")
+        st.write(
+            "Structured data is highly organized and easily searchable within databases. "
+            "It includes rows and columns, such as in relational databases."
+        )
+        st.write("Data Formats:")
+        format_selected = st.radio(
+            "Select a format to explore further:",
+            ("Excel", "CSV")
+        )
+        if format_selected == "Excel":
+            # Excel Data Format Section
+            st.subheader("Excel Data Format")
+            st.write("*What is it?*")
+            st.write(
+                "Excel files are spreadsheets used to organize and analyze data in rows and columns. "
+                "They are widely used due to their user-friendly nature and support for various data types."
+            )
+            st.write("*How to Read Excel Files?*")
+            st.code(
+                """
+import pandas as pd
+# Reading an Excel file
+df = pd.read_excel('file.xlsx')
+print(df.head())
+                """,
+                language="python"
+            )
+            st.write("*Common Issues When Handling Excel Files*")
+            st.write(
+                """
+- Missing or corrupted files
+- Version incompatibilities
+- Incorrect file paths
+- Handling large Excel files
+                """
+            )
+            st.write("*How to Overcome These Errors/Issues?*")
+            st.write(
+                """
+- Use proper error handling with try-except.
+- Convert Excel files to CSV for better compatibility.
+- Use libraries like openpyxl or xlrd for specific Excel versions.
+- Break large files into smaller chunks for processing.
+                """
+            )
+            # Button to open Jupyter Notebook or PDF
+            if st.button("Open Excel Documentation"):
+                st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).")
+        elif format_selected == "CSV":
+            # CSV Data Format Section
+            st.subheader("CSV Data Format")
+            st.write("*What is it?*")
+            st.write(
+                "CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, "
+                "and fields are separated by commas."
+            )
+            st.write("*How to Read CSV Files?*")
+            st.code(
+                """
+import pandas as pd
+# Reading a CSV file
+df = pd.read_csv('file.csv')
+print(df.head())
+                """,
+                language="python"
+            )
+            st.write("*Common Issues When Handling CSV Files*")
+            st.write(
+                """
+- Encoding issues (e.g., UTF-8, ISO-8859-1)
+- Inconsistent delimiters
+- Missing or corrupted files
+- Large file sizes causing memory errors
+                """
+            )
+            st.write("*How to Overcome These Errors/Issues?*")
+            st.write(
+                """
+- Specify the correct encoding when reading files using encoding='utf-8' or similar.
+- Use libraries like csv or pandas to handle different delimiters.
+- Employ error handling to catch and manage missing/corrupted files.
+- Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000).
+                """
+            )
+            # Button to open Jupyter Notebook or PDF
+            if st.button("Open CSV Documentation"):
+                st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).")