svenwey commited on
Commit
422e946
·
1 Parent(s): d0d47f8

first implementation of logscoremetric

Browse files
Files changed (1) hide show
  1. logscoremetric.py +58 -6
logscoremetric.py CHANGED
@@ -15,6 +15,9 @@
15
 
16
  import evaluate
17
  import datasets
 
 
 
18
 
19
 
20
  # TODO: Add BibTeX citation
@@ -61,6 +64,10 @@ BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
61
  class LogScoreMetric(evaluate.Metric):
62
  """TODO: Short description of my evaluation module."""
63
 
 
 
 
 
64
  def _info(self):
65
  # TODO: Specifies the evaluate.EvaluationModuleInfo object
66
  return evaluate.MetricInfo(
@@ -72,8 +79,8 @@ class LogScoreMetric(evaluate.Metric):
72
  # This defines the format of each prediction and reference
73
  # Both prediction and reference are strings
74
  features=datasets.Features({
75
- 'predictions': datasets.Value('string'),
76
- 'references': datasets.Value('string'),
77
  }),
78
  # Homepage of the module for documentation
79
  homepage="http://module.homepage",
@@ -87,11 +94,56 @@ class LogScoreMetric(evaluate.Metric):
87
  # TODO: Download external resources if needed
88
  pass
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def _compute(self, predictions, references):
91
  """Returns the scores"""
92
- # TODO: Compute the different scores of the module
93
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
 
 
 
 
 
 
94
  return {
95
- "timestamp_score": accuracy,
96
- "accuracy": accuracy,
97
  }
 
15
 
16
  import evaluate
17
  import datasets
18
+ import re
19
+ import dateutil.parser
20
+ import numpy as np
21
 
22
 
23
  # TODO: Add BibTeX citation
 
64
  class LogScoreMetric(evaluate.Metric):
65
  """TODO: Short description of my evaluation module."""
66
 
67
+ # Constant regex to get timestrings
68
+ timestamp_regex = r'(^\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)'
69
+ sacrebleu = evaluate.load("sacrebleu")
70
+
71
  def _info(self):
72
  # TODO: Specifies the evaluate.EvaluationModuleInfo object
73
  return evaluate.MetricInfo(
 
79
  # This defines the format of each prediction and reference
80
  # Both prediction and reference are strings
81
  features=datasets.Features({
82
+ "predictions": datasets.Value("string", id="sequence"),
83
+ "references": datasets.Value("string", id="sequence"),
84
  }),
85
  # Homepage of the module for documentation
86
  homepage="http://module.homepage",
 
94
  # TODO: Download external resources if needed
95
  pass
96
 
97
+ def getLogMetric(self, pred : str, ref : str):
98
+ ref = ref.strip(' \t\n\r')
99
+ pred = pred.strip(' \t\n\r')
100
+
101
+ # Find all timestrings in the log
102
+ pred_timestrings = re.findall(self.timestamp_regex, pred, re.MULTILINE)
103
+ ref_timestrings = re.findall(self.timestamp_regex, ref, re.MULTILINE)
104
+
105
+ #Check if there is the correct amount of timestrings in the prediction
106
+ if(len(pred_timestrings) != len(ref_timestrings)):
107
+ return 0.0
108
+
109
+ # replace all digits in the reference timestamp (first timestamp) with '/d' to get
110
+ # a regex that describes the format
111
+ pred_timestring_pattern = re.sub(r'\d', r'\\d', re.escape(pred_timestrings[0])) if (len(pred_timestrings > 0)) else r""
112
+
113
+ # A variable to save the previous timestamp (as datetime obj) to check monotonicity
114
+ prev_datetime = None
115
+ # Convert matches to datetime objects
116
+ for ts in pred_timestrings:
117
+ try:
118
+ # Check if the format matches with the format of the first timestamp
119
+ matchesPattern = re.fullmatch(pred_timestring_pattern, ts) is not None
120
+ # Check if the timestamps are monotonically increasing
121
+ cur_datetime = dateutil.parser.parse(ts)
122
+ monotonicallyIncreasing = True if prev_datetime == None else prev_datetime <= cur_datetime
123
+ prev_datetime = cur_datetime
124
+
125
+ if not (matchesPattern and monotonicallyIncreasing):
126
+ # timestamps not consistent
127
+ return 0.0
128
+
129
+ except Exception as e:
130
+ # e.g. date format not parsable by dateutil.parser
131
+ return 0.0
132
+
133
+ # Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
134
+ return 1.0
135
+
136
  def _compute(self, predictions, references):
137
  """Returns the scores"""
138
+
139
+ timestamp_score = np.mean([self.getLogMetric(p,r) for p,r in zip(predictions,references)])
140
+ predictions_without_timestamps = [re.sub(self.timestamp_regex, '', p, flags=re.MULTILINE) for p in predictions]
141
+ references_without_timestamps = [re.sub(self.timestamp_regex, '', r, flags=re.MULTILINE) for r in references]
142
+
143
+ # Sacrebleu score on logs without timestamps
144
+ sb_results = self.sacrebleu.compute(predictions=predictions_without_timestamps, references=references_without_timestamps)
145
+
146
  return {
147
+ "timestamp_score": timestamp_score,
148
+ "sacrebleu_score": sb_results.score,
149
  }