updated the processor.py to fix the dates format error

#2
by afanyu237 - opened
Files changed (1) hide show
  1. preprocessor.py +32 -1
preprocessor.py CHANGED
@@ -85,8 +85,39 @@ def preprocess(data):
85
  if sender and sender.strip().lower() != "system":
86
  filtered_messages.append(f"{sender.strip()}: {entry['Message']}")
87
  valid_dates.append(f"{entry['Date']}, {entry['Time'].replace(' ', ' ')}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- df = pd.DataFrame({'user_message': filtered_messages, 'message_date': valid_dates})
90
  df['message_date'] = pd.to_datetime(df['message_date'], format='%m/%d/%y, %I:%M %p', errors='coerce')
91
  df.rename(columns={'message_date': 'date'}, inplace=True)
92
 
 
85
  if sender and sender.strip().lower() != "system":
86
  filtered_messages.append(f"{sender.strip()}: {entry['Message']}")
87
  valid_dates.append(f"{entry['Date']}, {entry['Time'].replace(' ', ' ')}")
88
+ print("-_____--------------__________----------_____________----------______________")
89
+ def convert_to_target_format(date_str):
90
+ try:
91
+ # Attempt to parse the original date string
92
+ dt = datetime.strptime(date_str, '%d/%m/%Y, %H:%M')
93
+ except ValueError:
94
+ # Return the original date string if parsing fails
95
+ return date_str
96
+
97
+ # Extract components without leading zeros
98
+ month = dt.month
99
+ day = dt.day
100
+ year_short = dt.strftime('%y') # Last two digits of the year
101
+
102
+ # Convert to 12-hour format and determine AM/PM
103
+ hour_12 = dt.hour % 12
104
+ if hour_12 == 0:
105
+ hour_12 = 12 # Adjust 0 (from 12 AM/PM) to 12
106
+ hour_str = str(hour_12)
107
+
108
+ # Format minute with leading zero if necessary
109
+ minute_str = f"{dt.minute:02d}"
110
+
111
+ # Get AM/PM designation
112
+ am_pm = dt.strftime('%p')
113
+
114
+ # Construct the formatted date string with Unicode narrow space
115
+ return f"{month}/{day}/{year_short}, {hour_str}:{minute_str}\u202f{am_pm}"
116
+
117
+ converted_dates = [convert_to_target_format(date) for date in valid_dates]
118
+
119
 
120
+ df = pd.DataFrame({'user_message': filtered_messages, 'message_date': converted_dates})
121
  df['message_date'] = pd.to_datetime(df['message_date'], format='%m/%d/%y, %I:%M %p', errors='coerce')
122
  df.rename(columns={'message_date': 'date'}, inplace=True)
123