ankithpatel commited on
Commit
a66704a
·
verified ·
1 Parent(s): 11afda8

Update pages/Lifecycle of Machine Learning.py

Browse files
Files changed (1) hide show
  1. pages/Lifecycle of Machine Learning.py +139 -47
pages/Lifecycle of Machine Learning.py CHANGED
@@ -1,54 +1,146 @@
1
- pip install streamlit
2
  import streamlit as st
3
- from sklearn.datasets import load_iris
4
- from sklearn.model_selection import train_test_split
5
- from sklearn.ensemble import RandomForestClassifier
6
- from sklearn.metrics import accuracy_score
7
 
8
- # Page Title
9
- st.title("Machine Learning Life Cycle in Streamlit")
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- # Buttons for each stage
12
- if st.button("1. Data Collection"):
13
- st.header("Data Collection")
14
- st.write("Using Iris dataset for demonstration.")
15
- data = load_iris(as_frame=True)
16
- st.write(data.frame.head())
17
-
18
- elif st.button("2. Data Preprocessing"):
19
- st.header("Data Preprocessing")
20
- st.write("Splitting the data into train and test sets.")
21
- data = load_iris(as_frame=True)
22
- X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
23
- st.write(f"Train size: {len(X_train)}; Test size: {len(X_test)}")
24
-
25
- elif st.button("3. Model Training"):
26
- st.header("Model Training")
27
- st.write("Training a Random Forest Classifier.")
28
- data = load_iris(as_frame=True)
29
- X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
30
- model = RandomForestClassifier()
31
- model.fit(X_train, y_train)
32
- st.write("Model trained successfully.")
33
-
34
- elif st.button("4. Model Evaluation"):
35
- st.header("Model Evaluation")
36
- st.write("Evaluating the model on the test data.")
37
- data = load_iris(as_frame=True)
38
- X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
39
- model = RandomForestClassifier()
40
- model.fit(X_train, y_train)
41
- predictions = model.predict(X_test)
42
- accuracy = accuracy_score(y_test, predictions)
43
- st.write(f"Accuracy: {accuracy:.2f}")
44
-
45
- elif st.button("5. Model Deployment"):
46
- st.header("Model Deployment")
47
- st.write("This step involves deploying the model for usage.")
48
- st.write("You can expose the model via APIs or integrate it into an application.")
49
 
50
- else:
51
- st.write("Use the buttons above to navigate through the Machine Learning life cycle.")
 
 
52
 
 
 
 
 
 
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
 
 
4
 
5
+ st.markdown(f"""
6
+ <style>
7
+ /* Set the background image for the entire app */
8
+ .stApp {{
9
+ background-color:rgba(96, 155, 124, 0.5);
10
+ background-size: 1300px;
11
+ background-repeat: no-repeat;
12
+ background-attachment: fixed;
13
+ background-position: center;
14
+ }}
15
+
16
+ </style>
17
+ """, unsafe_allow_html=True)
18
 
19
+ import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # Navigation
22
+ st.title("Life Cycle of ML")
23
+ if 'page' not in st.session_state:
24
+ st.session_state['page'] = 'home'
25
 
26
+ # Main Navigation
27
+ if st.session_state['page'] == 'home':
28
+ st.subheader("Explore the Life Cycle Stages")
29
+ if st.button("Data Collection"):
30
+ st.session_state['page'] = 'data_collection'
31
 
32
+ elif st.session_state['page'] == 'data_collection':
33
+ # Data Collection Page
34
+ st.title("Data Collection")
35
+ st.header("1. What is Data?")
36
+ st.write(
37
+ "Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. "
38
+ "It serves as the foundation for any machine learning model."
39
+ )
40
+
41
+ st.header("2. Types of Data")
42
+ data_type = st.radio(
43
+ "Select a type of data to learn more:",
44
+ ("Structured", "Unstructured", "Semi-Structured")
45
+ )
46
+
47
+ if data_type == "Structured":
48
+ st.subheader("Structured Data")
49
+ st.write(
50
+ "Structured data is highly organized and easily searchable within databases. "
51
+ "It includes rows and columns, such as in relational databases."
52
+ )
53
+
54
+ st.write("Data Formats:")
55
+ format_selected = st.radio(
56
+ "Select a format to explore further:",
57
+ ("Excel", "CSV")
58
+ )
59
+
60
+ if format_selected == "Excel":
61
+ # Excel Data Format Section
62
+ st.subheader("Excel Data Format")
63
+ st.write("*What is it?*")
64
+ st.write(
65
+ "Excel files are spreadsheets used to organize and analyze data in rows and columns. "
66
+ "They are widely used due to their user-friendly nature and support for various data types."
67
+ )
68
+
69
+ st.write("*How to Read Excel Files?*")
70
+ st.code(
71
+ """
72
+ import pandas as pd
73
+ # Reading an Excel file
74
+ df = pd.read_excel('file.xlsx')
75
+ print(df.head())
76
+ """,
77
+ language="python"
78
+ )
79
+
80
+ st.write("*Common Issues When Handling Excel Files*")
81
+ st.write(
82
+ """
83
+ - Missing or corrupted files
84
+ - Version incompatibilities
85
+ - Incorrect file paths
86
+ - Handling large Excel files
87
+ """
88
+ )
89
+
90
+ st.write("*How to Overcome These Errors/Issues?*")
91
+ st.write(
92
+ """
93
+ - Use proper error handling with try-except.
94
+ - Convert Excel files to CSV for better compatibility.
95
+ - Use libraries like openpyxl or xlrd for specific Excel versions.
96
+ - Break large files into smaller chunks for processing.
97
+ """
98
+ )
99
+
100
+ # Button to open Jupyter Notebook or PDF
101
+ if st.button("Open Excel Documentation"):
102
+ st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).")
103
 
104
+ elif format_selected == "CSV":
105
+ # CSV Data Format Section
106
+ st.subheader("CSV Data Format")
107
+ st.write("*What is it?*")
108
+ st.write(
109
+ "CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, "
110
+ "and fields are separated by commas."
111
+ )
112
+
113
+ st.write("*How to Read CSV Files?*")
114
+ st.code(
115
+ """
116
+ import pandas as pd
117
+ # Reading a CSV file
118
+ df = pd.read_csv('file.csv')
119
+ print(df.head())
120
+ """,
121
+ language="python"
122
+ )
123
+
124
+ st.write("*Common Issues When Handling CSV Files*")
125
+ st.write(
126
+ """
127
+ - Encoding issues (e.g., UTF-8, ISO-8859-1)
128
+ - Inconsistent delimiters
129
+ - Missing or corrupted files
130
+ - Large file sizes causing memory errors
131
+ """
132
+ )
133
+
134
+ st.write("*How to Overcome These Errors/Issues?*")
135
+ st.write(
136
+ """
137
+ - Specify the correct encoding when reading files using encoding='utf-8' or similar.
138
+ - Use libraries like csv or pandas to handle different delimiters.
139
+ - Employ error handling to catch and manage missing/corrupted files.
140
+ - Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000).
141
+ """
142
+ )
143
+
144
+ # Button to open Jupyter Notebook or PDF
145
+ if st.button("Open CSV Documentation"):
146
+ st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).")