raymondEDS commited on
Commit
dd039c2
·
1 Parent(s): 52cb672

Updating week_1 and week_3

Browse files
Reference files/Data_cleaning_lab.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
app/__pycache__/main.cpython-311.pyc CHANGED
Binary files a/app/__pycache__/main.cpython-311.pyc and b/app/__pycache__/main.cpython-311.pyc differ
 
app/components/__pycache__/login.cpython-311.pyc CHANGED
Binary files a/app/components/__pycache__/login.cpython-311.pyc and b/app/components/__pycache__/login.cpython-311.pyc differ
 
app/components/login.py CHANGED
@@ -7,8 +7,8 @@ def login():
7
  st.title("Login to Data Science Course App")
8
 
9
  #usernames
10
- usernames = ["admin", "student", "manxiii"]
11
- passwords = ["admin", "123", "manxi123"]
12
 
13
  # Create a form for login
14
  with st.form("login_form"):
 
7
  st.title("Login to Data Science Course App")
8
 
9
  #usernames
10
+ usernames = ["admin", "student", "manxiii","zhu"]
11
+ passwords = ["admin", "123", "manxi123","zhu123"]
12
 
13
  # Create a form for login
14
  with st.form("login_form"):
app/main.py CHANGED
@@ -15,6 +15,7 @@ from app.components.login import login
15
  # Import week pages
16
  from app.pages import week_1
17
  from app.pages import week_2
 
18
 
19
  # Page configuration
20
  st.set_page_config(
@@ -136,6 +137,8 @@ def show_week_content():
136
  week_1.show()
137
  elif st.session_state.current_week == 2:
138
  week_2.show()
 
 
139
  else:
140
  st.warning("Content for this week is not yet available.")
141
 
@@ -148,7 +151,7 @@ def main():
148
  return
149
 
150
  # User is logged in, show course content
151
- if st.session_state.current_week in [1, 2]:
152
  show_week_content()
153
  else:
154
  st.title("Data Science Research Paper Course")
 
15
  # Import week pages
16
  from app.pages import week_1
17
  from app.pages import week_2
18
+ from app.pages import week_3
19
 
20
  # Page configuration
21
  st.set_page_config(
 
137
  week_1.show()
138
  elif st.session_state.current_week == 2:
139
  week_2.show()
140
+ elif st.session_state.current_week == 3:
141
+ week_3.show()
142
  else:
143
  st.warning("Content for this week is not yet available.")
144
 
 
151
  return
152
 
153
  # User is logged in, show course content
154
+ if st.session_state.current_week in [1, 2, 3]:
155
  show_week_content()
156
  else:
157
  st.title("Data Science Research Paper Course")
app/pages/__pycache__/week_1.cpython-311.pyc CHANGED
Binary files a/app/pages/__pycache__/week_1.cpython-311.pyc and b/app/pages/__pycache__/week_1.cpython-311.pyc differ
 
app/pages/__pycache__/week_2.cpython-311.pyc CHANGED
Binary files a/app/pages/__pycache__/week_2.cpython-311.pyc and b/app/pages/__pycache__/week_2.cpython-311.pyc differ
 
app/pages/__pycache__/week_3.cpython-311.pyc ADDED
Binary file (15.6 kB). View file
 
app/pages/week_1.py CHANGED
@@ -5,14 +5,122 @@ from sklearn.linear_model import LinearRegression
5
 
6
  # Week 1 content in person
7
  def show():
 
 
 
 
8
  st.markdown("""
9
- ## Week 1 content in person
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- # Week 1 content online
13
- def show():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  st.markdown("""
15
- ## Week 1 content not online yet
 
 
 
 
 
16
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  if __name__ == "__main__":
18
  show()
 
5
 
6
  # Week 1 content in person
7
  def show():
8
+ st.title("Week 1: Developing Research Interests")
9
+
10
+ # Section 1: How to do research
11
+ st.header("1. Research Fundamentals")
12
  st.markdown("""
13
+
14
+ ### What is research?
15
+ Research is a systematic process of investigation to discover new knowledge or validate existing knowledge.
16
+
17
+ #### How to get started learning about your topic:
18
+ 1. Websearch the topic area in general
19
+ 1. Wikipedia
20
+ 2. Google
21
+ 3. Stanford Encyclopedia of Philosophy
22
+ 4. News Reports
23
+ 2. Look for specific topics in Google Scholar or other scholarly databases
24
+ 1. [CNKI](https://www.cnki.net/index/)
25
+ 3. Read research papers
26
+
27
+ - **Google Scholar**: Your gateway to academic literature
28
+ - Add key terms from your general search
29
+ - Look through academic papers
30
+ - Find data sources
31
+ - Utilitize literature reviews
32
+
33
  """)
34
+ st.header("1A. How to Read a Research paper")
35
+ st.markdown("""
36
+ 1. Introduction and conclusion/results are the most important sections
37
+ - These provide key context and findings
38
+ - Focus on these first for quick understanding
39
+
40
+ 2. Approach and methodology sections are also critical
41
+ - Help understand how the research was conducted
42
+ - Important for evaluating validity
43
+
44
+ 3. Citations and footnotes provide valuable context
45
+ - Help trace development of ideas
46
+ - Point to related work and background material
47
+ - Essential for understanding the broader research area
48
 
49
+ """)
50
+ col1, col2 = st.columns(2)
51
+
52
+ with col1:
53
+ st.header("1B. Literature Reviews")
54
+ st.markdown("""
55
+ - **Literature Review**:
56
+ - Full Literature Review
57
+ - General problem/task definition: What are these papers trying to solve, and why?
58
+ - Concise summaries of the articles: Do not simply copy the article text in full. We can read them ourselves. Put in your own words the major contributions of each article.
59
+ - Compare and contrast: Point out the similarities and differences of the papers. Do they agree with each other? Are results seemingly in conflict? If the papers address different subtasks, how are they related? (If they are not related, then you may have made poor choices for a lit review...). This section is probably the most valuable for the final project, as it can become the basis for a lit review section.
60
+ - Future work: Make several suggestions for how the work can be extended. Are there open questions to answer? This would presumably include how the papers relate to your final project idea.
61
+ - References section: The entries should appear alphabetically and give at least full author name(s), year of publication, title, and outlet if applicable (e.g., journal name or proceedings name). Beyond that, we are not picky about the format. Electronic references are fine but need to include the above information in addition to the link.[^1]
62
+
63
+ [^1]: Credit: Stanford CS224U
64
+ """)
65
+
66
+ with col2:
67
+ st.header("1C. Research Memos")
68
+ st.markdown("""
69
+ ### Article Summary Memo
70
+
71
+ - The central research question (what are they studying?)
72
+ - The context of the study (where is the study taking place?)
73
+ - What type of data is being studied?
74
+ - What do the authors find?
75
+
76
+ *Credit: University Chicago – DPSS program*
77
+ """)
78
+ # Section 2: Research Question Formulation
79
+ st.header("2. Formulating Research Questions")
80
+ st.markdown("""
81
+ A good research question is the foundation of any research project. It should be:
82
+ - Clear and focused
83
+ - Researchable
84
+ - Feasible
85
+ - Significant
86
+ - Ethical
87
+ """)
88
+
89
+ # Display the research question image
90
+ st.image("assets/Pictures/research_question.jpg", caption="Research Question Formulation Framework")
91
+
92
  st.markdown("""
93
+ ### Steps to Formulate Your Research Question:
94
+ 1. Start with a broad topic
95
+ 2. Do preliminary research
96
+ 3. Narrow down to specific aspects
97
+ 4. Formulate your question
98
+ 5. Refine and test your question
99
  """)
100
+
101
+ # Section 3: Homework
102
+ st.header("3. Homework Assignment")
103
+ st.markdown("""
104
+ ### Tasks for this week:
105
+ 1. **Article Analysis**
106
+ - Provide a summary of the articles by answering the following questions:
107
+ - The central research question (what are they studying?)
108
+ - The context of the study (where is the study taking place?)
109
+ - What type of data is being studied?
110
+ - What do the authors find?
111
+
112
+ 2. **Research Questions**
113
+ - Think about what research questions you would like to answer:
114
+ - What are the similarities between the studies?
115
+ - What are the differences between the studies?
116
+ - Come up with 5 potential research questions for your own research
117
+
118
+ 3. **Reference Papers**
119
+ - [Peer Review](https://cogcomp.github.io/iclr_database/)
120
+ - [OpenDebateEvidence](https://arxiv.org/pdf/2406.14657)
121
+
122
+
123
+ """)
124
+
125
  if __name__ == "__main__":
126
  show()
app/pages/week_3.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import io
7
+ import sys
8
+ from contextlib import redirect_stdout
9
+
10
+ # Initialize session state for notebook-like cells
11
+ if 'cells' not in st.session_state:
12
+ st.session_state.cells = []
13
+ if 'df' not in st.session_state:
14
+ st.session_state.df = None
15
+
16
+ def capture_output(code, df=None):
17
+ """Helper function to capture print output"""
18
+ f = io.StringIO()
19
+ with redirect_stdout(f):
20
+ try:
21
+ # Create a dictionary of variables to use in exec
22
+ variables = {'pd': pd, 'np': np, 'plt': plt, 'sns': sns}
23
+ if df is not None:
24
+ variables['df'] = df
25
+ exec(code, variables)
26
+ except Exception as e:
27
+ return f"Error: {str(e)}"
28
+ return f.getvalue()
29
+
30
+ def show():
31
+ st.title("Week 3: Data Cleaning and Exploratory Data Analysis")
32
+
33
+ # Section 1: Introduction to EDA
34
+ st.header("1. Introduction to Exploratory Data Analysis")
35
+ st.markdown("""
36
+ Exploratory Data Analysis (EDA) is a crucial step in any data science project. Whether EDA is the main purpose of your project or is being used for feature selection/feature engineering in a machine learning context, it's important to understand the relationships between your features and target variables.
37
+
38
+ In this module, we'll focus on:
39
+ - Understanding categorical variables
40
+ - Data cleaning techniques
41
+ - Visualizing relationships in data
42
+ - Identifying patterns and insights
43
+ """)
44
+
45
+ # Section 2: The Titanic Dataset
46
+ st.header("2. Working with the Titanic Dataset")
47
+ st.markdown("""
48
+ We'll use the famous Titanic dataset to demonstrate data cleaning and EDA techniques. This dataset contains information about passengers aboard the Titanic and whether they survived.
49
+
50
+ ### Dataset Description
51
+ | Variable | Definition | Key |
52
+ | -------- | ---------- | --- |
53
+ | survival | Survival | 0 = No, 1 = Yes |
54
+ | pclass | Ticket class | 1 = 1st, 2 = 2nd, 3 = 3rd |
55
+ | sex | Sex | |
56
+ | Age | Age in years | |
57
+ | sibsp | # of siblings / spouses aboard | |
58
+ | parch | # of parents / children aboard | |
59
+ | ticket | Ticket number | |
60
+ | fare | Passenger fare | |
61
+ | cabin | Cabin number | |
62
+ | embarked | Port of Embarkation | C = Cherbourg, Q = Queenstown, S = Southampton |
63
+ """)
64
+
65
+ # Load and display the dataset
66
+ @st.cache_data
67
+ def load_data():
68
+ return pd.read_csv("https://raw.githubusercontent.com/hoffm386/eda-with-categorical-variables/master/titanic.csv")
69
+
70
+ df = load_data()
71
+ st.session_state.df = df
72
+
73
+ st.subheader("Dataset Preview")
74
+ st.dataframe(df.head())
75
+
76
+ # Interactive Data Loading Example
77
+ st.subheader("Try loading the data yourself!")
78
+ load_code = st.text_area("Try loading the Titanic dataset:",
79
+ 'import pandas as pd\n\ndf = pd.read_csv("https://raw.githubusercontent.com/hoffm386/eda-with-categorical-variables/master/titanic.csv")\nprint(df.head())',
80
+ height=100)
81
+ st.code(load_code, language="python", line_numbers=True)
82
+ if st.button("Run Data Loading Code"):
83
+ output = capture_output(load_code, df)
84
+ st.code(output, language="python", line_numbers=True)
85
+
86
+ # Basic Dataset Information
87
+ st.subheader("Dataset Information")
88
+ st.markdown("""
89
+ Let's explore some basic information about our dataset. Try these commands:
90
+ """)
91
+
92
+ info_code = st.text_area("Try getting dataset information:",
93
+ 'print("Dataset Shape:", df.shape)\nprint("\\nColumn Names:", df.columns.tolist())\nprint("\\nData Types:\\n", df.dtypes)\nprint("\\nMissing Values:\\n", df.isnull().sum())',
94
+ height=150)
95
+ st.code(info_code, language="python", line_numbers=True)
96
+ if st.button("Run Info Code"):
97
+ output = capture_output(info_code, df)
98
+ st.code(output, language="python", line_numbers=True)
99
+
100
+ # Section 3: Data Cleaning
101
+ st.header("3. Data Cleaning Techniques")
102
+
103
+ # Missing Value Handling
104
+ st.subheader("Missing Value Analysis")
105
+ st.markdown("""
106
+ Let's analyze and handle missing values in our dataset. Try these examples:
107
+ """)
108
+
109
+ missing_code = st.text_area("Try analyzing missing values:",
110
+ 'missing_percent = (df.isnull().sum() / len(df)) * 100\nprint("Percentage of missing values:\\n", missing_percent[missing_percent > 0])\n\n# Try filling missing values\ndf_filled = df.copy()\ndf_filled["Age"].fillna(df_filled["Age"].median(), inplace=True)\nprint("\\nMissing values after filling Age:", df_filled["Age"].isnull().sum())',
111
+ height=150)
112
+ st.code(missing_code, language="python", line_numbers=True)
113
+ if st.button("Run Missing Value Code"):
114
+ output = capture_output(missing_code, df)
115
+ st.code(output, language="python", line_numbers=True)
116
+
117
+ # Data Type Conversion
118
+ st.subheader("Data Type Conversion")
119
+ st.markdown("""
120
+ Let's convert categorical variables to the appropriate data types:
121
+ """)
122
+
123
+ type_code = st.text_area("Try converting data types:",
124
+ 'df_cat = df.copy()\ndf_cat["Sex"] = df_cat["Sex"].astype("category")\ndf_cat["Embarked"] = df_cat["Embarked"].astype("category")\nprint("Data types after conversion:\\n", df_cat.dtypes)',
125
+ height=100)
126
+ st.code(type_code, language="python", line_numbers=True)
127
+ if st.button("Run Type Conversion Code"):
128
+ output = capture_output(type_code, df)
129
+ st.code(output, language="python", line_numbers=True)
130
+
131
+ # Section 4: EDA with Categorical Variables
132
+ st.header("4. EDA with Categorical Variables")
133
+
134
+ # Interactive Visualizations
135
+ st.subheader("Create Your Own Visualizations")
136
+ st.markdown("""
137
+ Let's explore different types of visualizations to understand our data better:
138
+
139
+ 1. **Basic Count Plots**
140
+ First, let's look at the distribution of passengers by class and survival:
141
+ """)
142
+
143
+ viz_code = st.text_area("Try creating basic visualizations:",
144
+ '''import matplotlib.pyplot as plt
145
+ import seaborn as sns
146
+
147
+ # Create a figure with two subplots
148
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
149
+
150
+ # Count plot for Sex
151
+ sns.countplot(data=df, x="Sex", ax=ax1)
152
+ ax1.set_title("Passenger Count by Sex")
153
+
154
+ # Bar plot for survival rate by Pclass
155
+ sns.barplot(data=df, x="Pclass", y="Survived", ax=ax2)
156
+ ax2.set_title("Survival Rate by Passenger Class")
157
+
158
+ plt.tight_layout()
159
+ st.pyplot(fig)''',
160
+ height=200)
161
+ st.code(viz_code, language="python", line_numbers=True)
162
+ if st.button("Run Basic Visualization Code"):
163
+ output = capture_output(viz_code, df)
164
+ st.pyplot(plt.gcf())
165
+
166
+ # Advanced Visualizations
167
+ st.subheader("Advanced Visualizations")
168
+ st.markdown("""
169
+ Now let's create more complex visualizations to understand relationships between variables:
170
+
171
+ 2. **Survival Analysis by Class**
172
+ Let's analyze survival rates across different passenger classes with a stacked bar chart:
173
+ """)
174
+
175
+ advanced_viz_code = st.text_area("Try creating advanced visualizations:",
176
+ '''import matplotlib.pyplot as plt
177
+ import seaborn as sns
178
+ from matplotlib.patches import Patch
179
+
180
+ # Create figure and axis
181
+ fig, ax = plt.subplots(figsize=(10, 6))
182
+
183
+ # Create countplot with custom colors
184
+ sns.countplot(x="Pclass", hue="Survived", data=df,
185
+ palette={1: "blue", 0: "red"}, ax=ax)
186
+
187
+ # Customize the plot
188
+ ax.set_xlabel("Passenger Class")
189
+ ax.set_title("Survival Distribution by Passenger Class")
190
+
191
+ # Create custom legend
192
+ legend_elements = [
193
+ Patch(facecolor="blue", label="Survived"),
194
+ Patch(facecolor="red", label="Did Not Survive")
195
+ ]
196
+ ax.legend(handles=legend_elements)
197
+
198
+ plt.tight_layout()
199
+ st.pyplot(fig)
200
+
201
+ # Create a second figure for percentage analysis
202
+ fig2, ax2 = plt.subplots(figsize=(10, 6))
203
+
204
+ # Calculate percentages
205
+ survival_by_class = df.groupby("Pclass")["Survived"].value_counts(normalize=True).unstack()
206
+ survival_by_class.plot(kind="bar", stacked=True, ax=ax2)
207
+
208
+ # Customize the plot
209
+ ax2.set_xlabel("Passenger Class")
210
+ ax2.set_ylabel("Percentage")
211
+ ax2.set_title("Survival Rate by Passenger Class")
212
+ ax2.legend(title="Survived", labels=["No", "Yes"])
213
+
214
+ plt.tight_layout()
215
+ st.pyplot(fig2)''',
216
+ height=400)
217
+ st.code(advanced_viz_code, language="python", line_numbers=True)
218
+ if st.button("Run Advanced Visualization Code"):
219
+ output = capture_output(advanced_viz_code, df)
220
+ st.pyplot(plt.gcf())
221
+
222
+ # Age Distribution Analysis
223
+ st.subheader("Age Distribution Analysis")
224
+ st.markdown("""
225
+ 3. **Age Distribution by Survival**
226
+ Let's examine how age relates to survival:
227
+ """)
228
+
229
+ age_viz_code = st.text_area("Try creating age distribution visualizations:",
230
+ '''import matplotlib.pyplot as plt
231
+
232
+ # Create figure and axis
233
+ fig, ax = plt.subplots()
234
+
235
+ # Plot histograms for survived and non-survived passengers
236
+ ax.hist(df[df["Survived"]==1]["Age"], bins=15, alpha=0.5, color="blue", label="survived")
237
+ ax.hist(df[df["Survived"]==0]["Age"], bins=15, alpha=0.5, color="green", label="did not survive")
238
+
239
+ # Customize the plot
240
+ ax.set_xlabel("Age")
241
+ ax.set_ylabel("Count of passengers")
242
+ ax.set_title("Age vs. Survival for Titanic Passengers")
243
+ ax.legend()
244
+
245
+ plt.tight_layout()
246
+ st.pyplot(fig)''',
247
+ height=200)
248
+ st.code(age_viz_code, language="python", line_numbers=True)
249
+ if st.button("Run Age Distribution Code"):
250
+ output = capture_output(age_viz_code, df)
251
+ st.pyplot(plt.gcf())
252
+
253
+ # Age and Fare Analysis
254
+ st.subheader("Age and Fare Analysis")
255
+ st.markdown("""
256
+ 4. **Survival by Age and Fare**
257
+ Let's analyze how both age and fare relate to survival:
258
+ """)
259
+
260
+ age_fare_viz_code = st.text_area("Try creating age and fare visualizations:",
261
+ '''import matplotlib.pyplot as plt
262
+ from matplotlib.lines import Line2D
263
+
264
+ # Create figure and axis
265
+ fig, ax = plt.subplots(figsize=(10, 5))
266
+
267
+ # Plot scatter points for survived and non-survived passengers
268
+ ax.scatter(df[df["Survived"]==1]["Age"], df[df["Survived"]==1]["Fare"],
269
+ c="blue", alpha=0.5, label="survived")
270
+ ax.scatter(df[df["Survived"]==0]["Age"], df[df["Survived"]==0]["Fare"],
271
+ c="green", alpha=0.5, label="did not survive")
272
+
273
+ # Customize the plot
274
+ ax.set_xlabel("Age")
275
+ ax.set_ylabel("Fare")
276
+ ax.set_title("Survival by Age and Fare for Titanic Passengers")
277
+
278
+ # Create custom legend
279
+ color_patches = [
280
+ Line2D([0], [0], marker='o', color='w', label='survived',
281
+ markerfacecolor='b', markersize=10),
282
+ Line2D([0], [0], marker='o', color='w', label='did not survive',
283
+ markerfacecolor='g', markersize=10)
284
+ ]
285
+ ax.legend(handles=color_patches)
286
+
287
+ plt.tight_layout()
288
+ st.pyplot(fig)''',
289
+ height=250)
290
+ st.code(age_fare_viz_code, language="python", line_numbers=True)
291
+ if st.button("Run Age and Fare Visualization Code"):
292
+ output = capture_output(age_fare_viz_code, df)
293
+ st.pyplot(plt.gcf())
294
+
295
+ # Section 5: Hands-on Exercise
296
+ st.header("5. Hands-on Exercise")
297
+ st.markdown("""
298
+ ### Tasks for this week:
299
+
300
+ 1. **Data Cleaning Exercise**
301
+ - Load the Titanic dataset
302
+ - Identify and handle missing values
303
+ - Convert categorical variables
304
+ - Create summary statistics
305
+
306
+ 2. **EDA Analysis**
307
+ - Create visualizations for key variables
308
+ - Analyze relationships between variables
309
+ - Identify patterns in survival rates
310
+
311
+ 3. **Report Writing**
312
+ - Document your findings
313
+ - Create a presentation of key insights
314
+ - Suggest potential next steps
315
+ """)
316
+
317
+ # Interactive Exercise
318
+ st.subheader("Try Your Own Analysis")
319
+ exercise_code = st.text_area("Write your own analysis code here:",
320
+ '# Your code here\n# Try analyzing the relationship between Age and Survival\n# Or create your own visualizations\n# Or perform any other analysis you find interesting',
321
+ height=150)
322
+ st.code(exercise_code, language="python", line_numbers=True)
323
+ if st.button("Run Exercise Code"):
324
+ output = capture_output(exercise_code, df)
325
+ st.code(output, language="python", line_numbers=True)
326
+
327
+ # Section 6: Resources
328
+ st.header("6. Additional Resources")
329
+ st.markdown("""
330
+ - [EDA with Categorical Variables](https://github.com/hoffm386/eda-with-categorical-variables)
331
+ - [Kaggle EDA Tutorial](https://www.kaggle.com/code/kashnitsky/topic-1-exploratory-data-analysis-with-pandas)
332
+ - [Pandas Documentation](https://pandas.pydata.org/docs/)
333
+ - [Seaborn Documentation](https://seaborn.pydata.org/)
334
+ """)
335
+
336
+ if __name__ == "__main__":
337
+ show()
assets/Pictures/research_question.html ADDED
The diff for this file is too large to render. See raw diff
 
assets/Pictures/research_question.jpg ADDED