refactor code into smaller, modular function components
Browse files- logmetric.py +119 -195
logmetric.py
CHANGED
@@ -18,10 +18,8 @@ import datasets
|
|
18 |
import re
|
19 |
import dateutil.parser
|
20 |
import numpy as np
|
21 |
-
from
|
22 |
-
import sacrebleu
|
23 |
|
24 |
-
import time
|
25 |
|
26 |
|
27 |
# TODO: Add BibTeX citation
|
@@ -68,19 +66,6 @@ BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
|
|
68 |
class LogMetric(evaluate.Metric):
|
69 |
"""TODO: Short description of my evaluation module."""
|
70 |
|
71 |
-
# Constant regex to get timestrings
|
72 |
-
timestamp_regex = r'^\s*\[?\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*\]?\s*'
|
73 |
-
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
74 |
-
|
75 |
-
int_regex = r'(-?\d+)'
|
76 |
-
int_pattern = re.compile(int_regex)
|
77 |
-
|
78 |
-
float_regex = r'(-?\d+\.\d+)'
|
79 |
-
float_pattern = re.compile(float_regex)
|
80 |
-
|
81 |
-
sacrebleu_metric = evaluate.load("evaluate-metric/sacrebleu")
|
82 |
-
|
83 |
-
|
84 |
def _info(self):
|
85 |
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|
86 |
return evaluate.MetricInfo(
|
@@ -107,25 +92,59 @@ class LogMetric(evaluate.Metric):
|
|
107 |
# TODO: Download external resources if needed
|
108 |
pass
|
109 |
|
110 |
-
|
111 |
-
def
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
-
return
|
118 |
|
119 |
-
# A score depending on the difference in length of two sentences
|
120 |
-
def get_length_score(self, preds_split, refs_split):
|
121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
pred_content_lengths = np.vectorize(len)(preds_split)
|
123 |
ref_content_lengths = np.vectorize(len)(refs_split)
|
124 |
|
125 |
return self.smapeScore(pred_content_lengths, ref_content_lengths)
|
126 |
|
127 |
# helper function that computes the smape_score either between two numbers or two lists of numbers (must be the same length)
|
128 |
-
def smapeScore(self, P, R):
|
129 |
P_isnumber = isinstance(P, (int, float))
|
130 |
R_isnumber = isinstance(R, (int, float))
|
131 |
|
@@ -136,10 +155,12 @@ class LogMetric(evaluate.Metric):
|
|
136 |
assert(len(P) == len(R))
|
137 |
|
138 |
if P_isnumber and R_isnumber:
|
139 |
-
if P == 0 and R == 0:
|
|
|
140 |
return 1 - (np.sum(np.abs(R - P) / (np.abs(R) + np.abs(P)))) # (n = 1)
|
141 |
else:
|
142 |
-
if len(P) == 0 and len(R) == 0:
|
|
|
143 |
n = len(P)
|
144 |
P = np.array(P)
|
145 |
R = np.array(R)
|
@@ -150,157 +171,27 @@ class LogMetric(evaluate.Metric):
|
|
150 |
|
151 |
return 1 - (1.0/n * np.sum(np.abs(R - P) / denominator))
|
152 |
|
153 |
-
#
|
154 |
-
def
|
155 |
-
pred_lines_amt = len(pred.splitlines())
|
156 |
-
ref_lines_amt = len(ref.splitlines())
|
157 |
-
|
158 |
-
# print("#pred_lines:", pred_lines_amt)
|
159 |
-
# print("#ref_lines:", ref_lines_amt)
|
160 |
-
|
161 |
-
return self.smapeScore(pred_lines_amt, ref_lines_amt)
|
162 |
-
|
163 |
-
def replaceNumbers(self, text:str):
|
164 |
text = self.int_pattern.sub(r'<|INT|>', text)
|
165 |
text = self.float_pattern.sub(r'<|FLOAT|>', text)
|
166 |
return text
|
167 |
|
168 |
-
#
|
169 |
-
def
|
170 |
-
if pred_logMessages == [] and ref_logMessages == []:
|
171 |
-
pred_logMessages = [""]
|
172 |
-
ref_logMessages = [""]
|
173 |
-
sacrebleu_score = self.sacrebleu_metric.compute(predictions=pred_logMessages, references=ref_logMessages)["score"] / 100.0
|
174 |
-
|
175 |
-
smape_length_score = self.get_length_score(pred_logMessages, ref_logMessages)
|
176 |
-
|
177 |
-
vectorized_replaceNumbers = np.vectorize(self.replaceNumbers)
|
178 |
-
|
179 |
-
cleaned_pred_logMessages = vectorized_replaceNumbers(pred_logMessages)
|
180 |
-
cleaned_ref_logMessages = vectorized_replaceNumbers(ref_logMessages)
|
181 |
-
|
182 |
-
sacrebleu_withoutExplicitNumbers_score = self.sacrebleu_metric.compute(predictions=cleaned_pred_logMessages, references=cleaned_ref_logMessages)["score"] / 100.0
|
183 |
-
|
184 |
-
|
185 |
-
return sacrebleu_score, sacrebleu_withoutExplicitNumbers_score, smape_length_score
|
186 |
-
|
187 |
-
# get different scores regarding the timestamp
|
188 |
-
def getTimestampsScore(self, pred_timestamps, ref_timestamps):
|
189 |
-
timestamp_amt_score = self.smapeScore(len(pred_timestamps), len(ref_timestamps))
|
190 |
-
|
191 |
-
if (len(pred_timestamps) == 0) and (len(ref_timestamps) == 0):
|
192 |
-
return timestamp_amt_score, 1.0, 1.0, 1.0
|
193 |
-
|
194 |
-
# if there are no predicted timestamps, return early. It is still consistent and monotonic.
|
195 |
-
if (len(pred_timestamps) == 0) and (len(ref_timestamps) != 0):
|
196 |
-
return timestamp_amt_score, 1.0, 1.0, 0.0
|
197 |
-
|
198 |
-
# replace all digits in the reference timestamp (first timestamp) with '/d' to get
|
199 |
-
# a regex that describes the format
|
200 |
-
pred_timestring_pattern = re.sub(r'\d', r'\\d', re.escape(pred_timestamps[0]))
|
201 |
-
|
202 |
-
matchesPatternScore = 1.0
|
203 |
-
monotonicallyIncreasingScore = 1.0
|
204 |
-
pred_timedeltas = []
|
205 |
-
|
206 |
-
# A variable to save the previous timestamp (as datetime obj) to check monotonicity
|
207 |
-
prev_datetime = None
|
208 |
-
# Convert matches to datetime objects
|
209 |
-
|
210 |
-
for i in range(len(pred_timestamps)):
|
211 |
-
ts = pred_timestamps[i]
|
212 |
-
try:
|
213 |
-
# Check if the format matches with the format of the first timestamp
|
214 |
-
# TODO!! Check this later, maybe it is too restricting for training a llm
|
215 |
-
matchesPattern = re.fullmatch(pred_timestring_pattern, ts) is not None
|
216 |
-
# Check if the timestamps are monotonically increasing
|
217 |
-
cur_datetime = dateutil.parser.parse(ts)
|
218 |
-
if prev_datetime == None:
|
219 |
-
monotonicallyIncreasing = True
|
220 |
-
else:
|
221 |
-
monotonicallyIncreasing = prev_datetime <= cur_datetime
|
222 |
-
pred_timedeltas.append((cur_datetime - prev_datetime).total_seconds())
|
223 |
-
|
224 |
-
prev_datetime = cur_datetime
|
225 |
-
|
226 |
-
# If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
|
227 |
-
matchesPatternScore = 0.0 if (not matchesPattern) else matchesPatternScore
|
228 |
-
monotonicallyIncreasingScore = 0.0 if (not monotonicallyIncreasing) else monotonicallyIncreasingScore
|
229 |
-
|
230 |
-
|
231 |
-
except Exception as e:
|
232 |
-
# e.g. date format not parsable by dateutil.parser
|
233 |
-
matchesPatternScore = 0.0
|
234 |
-
monotonicallyIncreasingScore = 0.0
|
235 |
-
pred_timedeltas.append(-1)
|
236 |
-
|
237 |
-
|
238 |
-
if (len(pred_timestamps) != 0) and (len(ref_timestamps) == 0):
|
239 |
-
return timestamp_amt_score, matchesPatternScore, monotonicallyIncreasingScore, 0.0
|
240 |
-
|
241 |
-
|
242 |
-
ref_timedeltas = []
|
243 |
-
prev_datetime = None
|
244 |
-
for i in range(len(ref_timestamps)):
|
245 |
-
ts = ref_timestamps[i]
|
246 |
-
try:
|
247 |
-
cur_datetime = dateutil.parser.parse(ts)
|
248 |
-
if prev_datetime == None:
|
249 |
-
pass
|
250 |
-
else:
|
251 |
-
ref_timedeltas.append((cur_datetime - prev_datetime).total_seconds())
|
252 |
-
|
253 |
-
prev_datetime = cur_datetime
|
254 |
-
|
255 |
-
except Exception as e:
|
256 |
-
ref_timedeltas.append(-1)
|
257 |
-
|
258 |
-
minlength = min(len(pred_timedeltas), len(ref_timedeltas))
|
259 |
-
|
260 |
-
pred_timedeltas = pred_timedeltas[:minlength]
|
261 |
-
ref_timedeltas = ref_timedeltas[:minlength]
|
262 |
-
|
263 |
-
print("pred_timedeltas:", pred_timedeltas)
|
264 |
-
print("ref_timedeltas:", ref_timedeltas)
|
265 |
-
|
266 |
-
|
267 |
-
timestampDeltaScore = self.smapeScore(pred_timedeltas, ref_timedeltas)
|
268 |
-
|
269 |
-
print("timestampDeltaScore:", timestampDeltaScore)
|
270 |
-
# matchesPatternScore and monotonicallyIncreasingScore are in {0,1}
|
271 |
-
return timestamp_amt_score, matchesPatternScore, monotonicallyIncreasingScore, timestampDeltaScore
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
def getLogMetric(self, pred : str, ref : str):
|
276 |
-
ref = ref.strip(' \t\n\r')
|
277 |
-
pred = pred.strip(' \t\n\r')
|
278 |
-
|
279 |
-
linecount_difference_SMAPE = self.getLineCountScore(pred, ref)
|
280 |
-
|
281 |
-
|
282 |
-
# Split log on timestamps
|
283 |
pred_split_log = self.timestamp_pattern.split(pred)
|
284 |
ref_split_log = self.timestamp_pattern.split(ref)
|
285 |
|
286 |
# One logentry always consists of timestamp + log-message
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
pred_timestamps = []
|
291 |
-
pred_logMessages = []
|
292 |
|
293 |
-
ref_timestamps = []
|
294 |
-
ref_logMessages = []
|
295 |
# reorganize log into logentry-tuples, consisting of timestamp + log-message
|
296 |
for i in range(1, len(pred_split_log), 2):
|
297 |
-
# pred_logentries.append((pred_split_log[i],pred_split_log[i+1]))
|
298 |
pred_timestamps.append(pred_split_log[i])
|
299 |
pred_logMessages.append(pred_split_log[i+1])
|
300 |
|
301 |
-
|
302 |
for i in range(1, len(ref_split_log), 2):
|
303 |
-
# ref_logentries.append((ref_split_log[i],ref_split_log[i+1]))
|
304 |
ref_timestamps.append(ref_split_log[i])
|
305 |
ref_logMessages.append(ref_split_log[i+1])
|
306 |
|
@@ -310,44 +201,77 @@ class LogMetric(evaluate.Metric):
|
|
310 |
pred_logMessages += (max_logentries - len(pred_logMessages)) * [" "]
|
311 |
ref_logMessages += (max_logentries- len(ref_logMessages)) * [" "]
|
312 |
|
313 |
-
|
314 |
-
|
315 |
-
timestamps_difference_SMAPE, timestamps_formatConsistency_absolute, timestamps_monotinicity_absolute, timestamps_delta_SMAPE = self.getTimestampsScore(pred_timestamps, ref_timestamps)
|
316 |
|
|
|
317 |
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
|
|
|
|
|
|
328 |
|
329 |
-
def
|
330 |
-
|
|
|
|
|
|
|
|
|
331 |
|
332 |
-
|
|
|
|
|
|
|
|
|
333 |
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
|
|
|
|
|
|
|
|
|
|
341 |
|
342 |
-
|
343 |
-
|
344 |
|
345 |
-
|
346 |
-
metric_result = dict(zip(keys, mean_values))
|
347 |
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
350 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
|
352 |
-
return metric_result
|
353 |
-
|
|
|
18 |
import re
|
19 |
import dateutil.parser
|
20 |
import numpy as np
|
21 |
+
from typing import List, Dict, Any
|
|
|
22 |
|
|
|
23 |
|
24 |
|
25 |
# TODO: Add BibTeX citation
|
|
|
66 |
class LogMetric(evaluate.Metric):
|
67 |
"""TODO: Short description of my evaluation module."""
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def _info(self):
|
70 |
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|
71 |
return evaluate.MetricInfo(
|
|
|
92 |
# TODO: Download external resources if needed
|
93 |
pass
|
94 |
|
95 |
+
|
96 |
+
def _compute(self, predictions, references):
|
97 |
+
# TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
|
98 |
+
metric_dicts = [PredRefScore(p,r).run() for p,r in zip(predictions,references)]
|
99 |
+
# Extract keys (assuming all dictionaries have the same keys)
|
100 |
+
keys = metric_dicts[0].keys()
|
101 |
+
|
102 |
+
# Convert list of dictionaries into a 2D numpy array
|
103 |
+
values = np.array([list(d.values()) for d in metric_dicts])
|
104 |
+
|
105 |
+
# Calculate the mean along the vertical axis (axis=0)
|
106 |
+
mean_values = np.mean(values, axis=0)
|
107 |
+
|
108 |
+
# a dictionary, matching the keys with their corresponding mean values
|
109 |
+
metric_result = dict(zip(keys, mean_values))
|
110 |
|
111 |
+
return metric_result
|
112 |
|
|
|
|
|
113 |
|
114 |
+
class PredRefScore:
|
115 |
+
# Constant regex to get timestrings
|
116 |
+
timestamp_regex = r'^\s*\[?\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*\]?\s*'
|
117 |
+
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
118 |
+
|
119 |
+
int_pattern = re.compile(r'(-?\d+)')
|
120 |
+
float_pattern = re.compile(r'(-?\d+\.\d+)')
|
121 |
+
|
122 |
+
scores : Dict[str, float]= {}
|
123 |
+
|
124 |
+
sacrebleu_metric = evaluate.load("evaluate-metric/sacrebleu")
|
125 |
+
|
126 |
+
def __init__(self, prediction : str, reference: str) -> Dict[str, float]:
|
127 |
+
self.reference = reference.strip(' \t\n\r')
|
128 |
+
self.prediction = prediction.strip(' \t\n\r')
|
129 |
+
|
130 |
+
def run(self):
|
131 |
+
self.getLogMetric()
|
132 |
+
return self.scores
|
133 |
+
|
134 |
+
|
135 |
+
##### Convenience Methods #####
|
136 |
+
|
137 |
+
# TODO: also set pred_ts, ref_ts, pred_msgs and ref_msgs as fields
|
138 |
+
|
139 |
+
# A score depending on the difference in length of two sentences
|
140 |
+
def get_length_score(self, preds_split : List[Any], refs_split : List[Any]) -> float:
|
141 |
pred_content_lengths = np.vectorize(len)(preds_split)
|
142 |
ref_content_lengths = np.vectorize(len)(refs_split)
|
143 |
|
144 |
return self.smapeScore(pred_content_lengths, ref_content_lengths)
|
145 |
|
146 |
# helper function that computes the smape_score either between two numbers or two lists of numbers (must be the same length)
|
147 |
+
def smapeScore(self, P, R) -> float:
|
148 |
P_isnumber = isinstance(P, (int, float))
|
149 |
R_isnumber = isinstance(R, (int, float))
|
150 |
|
|
|
155 |
assert(len(P) == len(R))
|
156 |
|
157 |
if P_isnumber and R_isnumber:
|
158 |
+
if P == 0 and R == 0:
|
159 |
+
return 1.0 # since this leads to (|R| + |P|) = 0
|
160 |
return 1 - (np.sum(np.abs(R - P) / (np.abs(R) + np.abs(P)))) # (n = 1)
|
161 |
else:
|
162 |
+
if len(P) == 0 and len(R) == 0:
|
163 |
+
return 1.0 # since this leads to n = 0
|
164 |
n = len(P)
|
165 |
P = np.array(P)
|
166 |
R = np.array(R)
|
|
|
171 |
|
172 |
return 1 - (1.0/n * np.sum(np.abs(R - P) / denominator))
|
173 |
|
174 |
+
# Replaces numbers in a string with a placeholder
|
175 |
+
def replaceNumbers(self, text : str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
text = self.int_pattern.sub(r'<|INT|>', text)
|
177 |
text = self.float_pattern.sub(r'<|FLOAT|>', text)
|
178 |
return text
|
179 |
|
180 |
+
# Split all log-entries in timestamps and log-messages
|
181 |
+
def split_log_entry(self, pred : str, ref: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
pred_split_log = self.timestamp_pattern.split(pred)
|
183 |
ref_split_log = self.timestamp_pattern.split(ref)
|
184 |
|
185 |
# One logentry always consists of timestamp + log-message
|
186 |
+
pred_timestamps, pred_logMessages = [], []
|
187 |
+
ref_timestamps, ref_logMessages = [], []
|
|
|
|
|
|
|
188 |
|
|
|
|
|
189 |
# reorganize log into logentry-tuples, consisting of timestamp + log-message
|
190 |
for i in range(1, len(pred_split_log), 2):
|
|
|
191 |
pred_timestamps.append(pred_split_log[i])
|
192 |
pred_logMessages.append(pred_split_log[i+1])
|
193 |
|
|
|
194 |
for i in range(1, len(ref_split_log), 2):
|
|
|
195 |
ref_timestamps.append(ref_split_log[i])
|
196 |
ref_logMessages.append(ref_split_log[i+1])
|
197 |
|
|
|
201 |
pred_logMessages += (max_logentries - len(pred_logMessages)) * [" "]
|
202 |
ref_logMessages += (max_logentries- len(ref_logMessages)) * [" "]
|
203 |
|
204 |
+
return pred_timestamps, pred_logMessages, ref_timestamps, ref_logMessages
|
|
|
|
|
205 |
|
206 |
+
##### Individual Setter Methods for Scores #####
|
207 |
|
208 |
+
# splits both strings at \n and then computes the smape_score of their lengths
|
209 |
+
def set_linecount_score(self, pred : str, ref : str) -> None:
|
210 |
+
pred_lines_amt = len(pred.splitlines())
|
211 |
+
ref_lines_amt = len(ref.splitlines())
|
212 |
+
self.scores["linecount_difference_SMAPE_score"] = self.smapeScore(pred_lines_amt, ref_lines_amt)
|
213 |
+
|
214 |
+
def set_sacrebleu_score(self, pred_log_messages : List[str], ref_log_messages : List[str]) -> None:
|
215 |
+
sacrebleu_score = self.sacrebleu_metric.compute(predictions=pred_log_messages, references=ref_log_messages)["score"] / 100.0
|
216 |
+
self.scores["linecontent_sacrebleu_score"] = sacrebleu_score
|
217 |
+
|
218 |
+
def set_smape_length_score(self, pred_log_messages : List[str], ref_log_messages : List[str]) -> None:
|
219 |
+
smape_length_score = self.get_length_score(pred_log_messages, ref_log_messages)
|
220 |
+
self.scores["linecontentlength_difference_SMAPE_score"] = smape_length_score
|
221 |
|
222 |
+
def set_sacrebleu_withoutexplnumbers_score(self, pred_log_messages : List[str], ref_log_messages : List[str]):
|
223 |
+
vectorized_replaceNumbers = np.vectorize(self.replaceNumbers)
|
224 |
+
cleaned_pred_logMessages = vectorized_replaceNumbers(pred_log_messages)
|
225 |
+
cleaned_ref_logMessages = vectorized_replaceNumbers(ref_log_messages)
|
226 |
+
sacrebleu_withoutExplicitNumbers_score = self.sacrebleu_metric.compute(predictions=cleaned_pred_logMessages, references=cleaned_ref_logMessages)["score"] / 100.0
|
227 |
+
self.scores["linecontent_sacrebleu_withoutExplicitNumbers_score"] = sacrebleu_withoutExplicitNumbers_score
|
228 |
|
229 |
+
# Get differenct scores regarding the content of a log-message
|
230 |
+
def all_linecontent_scores(self, pred_logMessages : List[str], ref_logMessages: List[str]) -> None:
|
231 |
+
if pred_logMessages == [] and ref_logMessages == []:
|
232 |
+
pred_logMessages = [""]
|
233 |
+
ref_logMessages = [""]
|
234 |
|
235 |
+
self.set_sacrebleu_score(pred_logMessages, ref_logMessages)
|
236 |
+
self.set_smape_length_score(pred_logMessages, ref_logMessages)
|
237 |
+
self.set_sacrebleu_withoutexplnumbers_score(pred_logMessages, ref_logMessages)
|
238 |
+
|
239 |
+
def set_timestamp_amt_score(self, pred_timestamps : List[str], ref_timestamps : List[str]):
|
240 |
+
timestamp_amt_score = self.smapeScore(len(pred_timestamps), len(ref_timestamps))
|
241 |
+
self.scores["timestamps_SMAPE_difference_score"] = timestamp_amt_score
|
242 |
+
|
243 |
+
def set_timestamp_format_consistency_score(self, pred_timestamps, ref_timestamps):
|
244 |
+
if (len(pred_timestamps) == 0):
|
245 |
+
self.scores["timestamps_formatConsistency_score"] = 1.0
|
246 |
+
return
|
247 |
|
248 |
+
pred_timestring_pattern = re.sub(r'\d', r'\\d', re.escape(pred_timestamps[0])).strip()
|
249 |
+
all_consistent = all(re.fullmatch(pred_timestring_pattern, ts.strip()) is not None for ts in ref_timestamps)
|
250 |
|
251 |
+
self.scores["timestamps_formatConsistency_score"] = 1.0 if all_consistent else 0.0
|
|
|
252 |
|
253 |
+
def set_timestamp_monotonicity_score(self, pred_timestamps) -> None:
|
254 |
+
try:
|
255 |
+
parsed_times = [dateutil.parser.parse(ts) for ts in pred_timestamps] # Parse all timestamps
|
256 |
+
except dateutil.parser.ParserError:
|
257 |
+
self.scores["timestamps_monotinicity_score"] = 0.0
|
258 |
+
return
|
259 |
|
260 |
+
# Check if the timestamps are monotonically increasing
|
261 |
+
all_monotone = all(t1 <= t2 for t1, t2 in zip(parsed_times, parsed_times[1:]))
|
262 |
+
self.scores["timestamps_monotinicity_score"] = 1.0 if all_monotone else 0.0
|
263 |
+
|
264 |
+
# get different scores regarding the timestamp
|
265 |
+
def all_timestamp_scores(self, pred_timestamps, ref_timestamps) -> None:
|
266 |
+
self.set_timestamp_amt_score(pred_timestamps, ref_timestamps)
|
267 |
+
self.set_timestamp_format_consistency_score(pred_timestamps, ref_timestamps)
|
268 |
+
self.set_timestamp_monotonicity_score(pred_timestamps)
|
269 |
+
|
270 |
+
# driver method for different score computations
|
271 |
+
def getLogMetric(self):
|
272 |
+
self.set_linecount_score(self.prediction, self.reference)
|
273 |
+
# Split log on timestamps
|
274 |
+
pred_timestamps, pred_logMessages, ref_timestamps, ref_logMessages = self.split_log_entry(self.prediction, self.reference)
|
275 |
+
self.all_linecontent_scores(pred_logMessages, ref_logMessages)
|
276 |
+
self.all_timestamp_scores(pred_timestamps, ref_timestamps)
|
277 |
|
|
|
|