ProfessorLeVesseur commited on
Commit
f2086cb
·
verified ·
1 Parent(s): c2354bc

Update data_processor.py

Browse files
Files changed (1) hide show
  1. data_processor.py +0 -183
data_processor.py CHANGED
@@ -1,186 +1,3 @@
1
- # import pandas as pd
2
- # import os
3
- # import re
4
- # from huggingface_hub import InferenceClient
5
- # # from graphviz import Digraph
6
-
7
- # class DataProcessor:
8
- # INTERVENTION_COLUMN = 'Did the intervention happen today?'
9
- # ENGAGED_STR = 'Engaged (Respect, Responsibility, Effort)'
10
- # PARTIALLY_ENGAGED_STR = 'Partially Engaged (about 50%)'
11
- # NOT_ENGAGED_STR = 'Not Engaged (less than 50%)'
12
-
13
- # def __init__(self, student_metrics_df=None):
14
- # self.hf_api_key = os.getenv('HF_API_KEY')
15
- # if not self.hf_api_key:
16
- # raise ValueError("HF_API_KEY not set in environment variables")
17
- # self.client = InferenceClient(api_key=self.hf_api_key)
18
- # self.student_metrics_df = student_metrics_df
19
-
20
- # def read_excel(self, uploaded_file):
21
- # return pd.read_excel(uploaded_file)
22
-
23
- # def format_session_data(self, df):
24
- # # Look for "Date of Session" or "Date" column
25
- # date_column = next((col for col in df.columns if col in ["Date of Session", "Date"]), None)
26
- # if date_column:
27
- # df[date_column] = pd.to_datetime(df[date_column], errors='coerce').dt.date
28
- # else:
29
- # print("Warning: Neither 'Date of Session' nor 'Date' column found in the dataframe.")
30
-
31
- # df['Timestamp'] = self.safe_convert_to_datetime(df['Timestamp'], '%I:%M %p')
32
- # df['Session Start Time'] = self.safe_convert_to_time(df['Session Start Time'], '%I:%M %p')
33
- # df['Session End Time'] = self.safe_convert_to_time(df['Session End Time'], '%I:%M %p')
34
- # return df
35
-
36
- # def safe_convert_to_time(self, series, format_str='%I:%M %p'):
37
- # try:
38
- # converted = pd.to_datetime(series, format='%H:%M:%S', errors='coerce')
39
- # if format_str:
40
- # return converted.dt.strftime(format_str)
41
- # return converted
42
- # except Exception as e:
43
- # print(f"Error converting series to time: {e}")
44
- # return series
45
-
46
- # def safe_convert_to_datetime(self, series, format_str=None):
47
- # try:
48
- # converted = pd.to_datetime(series, errors='coerce')
49
- # if format_str:
50
- # return converted.dt.strftime(format_str)
51
- # return converted
52
- # except Exception as e:
53
- # print(f"Error converting series to datetime: {e}")
54
- # return series
55
-
56
- # def replace_student_names_with_initials(self, df):
57
- # updated_columns = []
58
- # for col in df.columns:
59
- # if col.startswith('Student Attendance'):
60
- # match = re.match(r'Student Attendance \[(.+?)\]', col)
61
- # if match:
62
- # name = match.group(1)
63
- # initials = ''.join([part[0] for part in name.split()])
64
- # updated_columns.append(f'Student Attendance [{initials}]')
65
- # else:
66
- # updated_columns.append(col)
67
- # else:
68
- # updated_columns.append(col)
69
- # df.columns = updated_columns
70
- # return df
71
-
72
- # def compute_intervention_statistics(self, df):
73
- # total_days = len(df)
74
- # sessions_held = df[self.INTERVENTION_COLUMN].str.strip().str.lower().eq('yes').sum()
75
- # intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
76
- # return pd.DataFrame({
77
- # 'Intervention Dosage (%)': [round(intervention_frequency, 0)],
78
- # 'Intervention Sessions Held': [sessions_held],
79
- # 'Intervention Sessions Not Held': [total_days - sessions_held],
80
- # 'Total Number of Days Available': [total_days]
81
- # })
82
-
83
- # def compute_student_metrics(self, df):
84
- # intervention_df = df[df[self.INTERVENTION_COLUMN].str.strip().str.lower() == 'yes']
85
- # intervention_sessions_held = len(intervention_df)
86
- # student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
87
-
88
- # student_metrics = {}
89
- # for col in student_columns:
90
- # student_name = col.replace('Student Attendance [', '').replace(']', '').strip()
91
- # student_data = intervention_df[[col]].copy()
92
- # student_data[col] = student_data[col].fillna('Absent')
93
-
94
- # attendance_values = student_data[col].apply(lambda x: 1 if x in [
95
- # self.ENGAGED_STR,
96
- # self.PARTIALLY_ENGAGED_STR,
97
- # self.NOT_ENGAGED_STR
98
- # ] else 0)
99
-
100
- # sessions_attended = attendance_values.sum()
101
- # attendance_pct = (sessions_attended / intervention_sessions_held) * 100 if intervention_sessions_held > 0 else 0
102
- # attendance_pct = round(attendance_pct)
103
-
104
- # engagement_counts = {
105
- # 'Engaged': 0,
106
- # 'Partially Engaged': 0,
107
- # 'Not Engaged': 0,
108
- # 'Absent': 0
109
- # }
110
-
111
- # for x in student_data[col]:
112
- # if x == self.ENGAGED_STR:
113
- # engagement_counts['Engaged'] += 1
114
- # elif x == self.PARTIALLY_ENGAGED_STR:
115
- # engagement_counts['Partially Engaged'] += 1
116
- # elif x == self.NOT_ENGAGED_STR:
117
- # engagement_counts['Not Engaged'] += 1
118
- # else:
119
- # engagement_counts['Absent'] += 1 # Count as Absent if not engaged
120
-
121
- # # Calculate percentages for engagement states
122
- # total_sessions = sum(engagement_counts.values())
123
-
124
- # # Engagement (%)
125
- # engagement_pct = (engagement_counts['Engaged'] / total_sessions * 100) if total_sessions > 0 else 0
126
- # engagement_pct = round(engagement_pct)
127
-
128
- # engaged_pct = (engagement_counts['Engaged'] / total_sessions * 100) if total_sessions > 0 else 0
129
- # engaged_pct = round(engaged_pct)
130
-
131
- # partially_engaged_pct = (engagement_counts['Partially Engaged'] / total_sessions * 100) if total_sessions > 0 else 0
132
- # partially_engaged_pct = round(partially_engaged_pct)
133
-
134
- # not_engaged_pct = (engagement_counts['Not Engaged'] / total_sessions * 100) if total_sessions > 0 else 0
135
- # not_engaged_pct = round(not_engaged_pct)
136
-
137
- # absent_pct = (engagement_counts['Absent'] / total_sessions * 100) if total_sessions > 0 else 0
138
- # absent_pct = round(absent_pct)
139
-
140
- # # Determine if the student attended ≥ 90% of sessions
141
- # attended_90 = "Yes" if attendance_pct >= 90 else "No"
142
-
143
- # # Determine if the student was engaged ≥ 80% of the time
144
- # engaged_80 = "Yes" if engaged_pct >= 80 else "No"
145
-
146
- # # Store metrics in the required order
147
- # student_metrics[student_name] = {
148
- # 'Attended ≥ 90%': attended_90,
149
- # 'Engagement ≥ 80%': engaged_80,
150
- # 'Attendance (%)': attendance_pct,
151
- # # 'Attendance #': sessions_attended,
152
- # 'Engagement (%)': engagement_pct,
153
- # 'Engaged (%)': engaged_pct,
154
- # 'Partially Engaged (%)': partially_engaged_pct,
155
- # 'Not Engaged (%)': not_engaged_pct,
156
- # 'Absent (%)': absent_pct
157
- # }
158
-
159
- # # Create a DataFrame from student_metrics
160
- # student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index()
161
- # student_metrics_df.rename(columns={'index': 'Student'}, inplace=True)
162
- # return student_metrics_df
163
-
164
- # def compute_average_metrics(self, student_metrics_df):
165
- # # Calculate the attendance and engagement average percentages across students
166
- # attendance_avg_stats = student_metrics_df['Attendance (%)'].mean() # Calculate the average attendance percentage
167
- # engagement_avg_stats = student_metrics_df['Engagement (%)'].mean() # Calculate the average engagement percentage
168
-
169
- # # Round the averages to make them whole numbers
170
- # attendance_avg_stats = round(attendance_avg_stats)
171
- # engagement_avg_stats = round(engagement_avg_stats)
172
-
173
- # return attendance_avg_stats, engagement_avg_stats
174
-
175
- # def evaluate_student(self, row, attendance_threshold=90, engagement_threshold=80):
176
- # if row["Attended ≥ 90%"] == "No":
177
- # return "Address Attendance"
178
- # elif row["Engagement ≥ 80%"] == "No":
179
- # return "Address Engagement"
180
- # return "Consider barriers, fidelity, and progress monitoring"
181
-
182
-
183
-
184
  import re
185
  import pandas as pd
186
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import re
2
  import pandas as pd
3
  import os