svenwey commited on
Commit
0b41e7e
·
1 Parent(s): e306ff9

put functions outside of getLogMetric for increased efficiency

Browse files
Files changed (1) hide show
  1. logmetric.py +24 -24
logmetric.py CHANGED
@@ -97,6 +97,30 @@ class LogMetric(evaluate.Metric):
97
  # TODO: Download external resources if needed
98
  pass
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  def getLogMetric(self, pred : str, ref : str):
101
  ref = ref.strip(' \t\n\r')
102
  pred = pred.strip(' \t\n\r')
@@ -170,30 +194,6 @@ class LogMetric(evaluate.Metric):
170
  # e.g. date format not parsable by dateutil.parser
171
  matchesPatternScore = 0.0
172
  monotonicallyIncreasingScore = 0.0
173
-
174
- # Jaccard Similarity to measure closeness of two log-messages
175
- def get_jaccard_similarity(set1, set2):
176
- intersection = set1.intersection(set2)
177
- union = set1.union(set2)
178
- return len(intersection) / len(union)
179
-
180
- # A score depending on the difference in length of two sentences
181
- def get_length_score(sentence1, sentence2):
182
- s1len = len(sentence1)
183
- s2len = len(sentence2)
184
-
185
- return 1 - (abs(s1len - s2len) / max(s1len, s2len))
186
-
187
- # Combine a weighted average of different scores
188
- def get_overall_similarity(sentence1, sentence2):
189
- s1split = sentence1.split()
190
- s2split = sentence2.split()
191
-
192
- jaccard_score = get_jaccard_similarity(set(s1split), set(s2split))
193
- length_score = get_length_score(s1split, s2split)
194
-
195
- return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
196
-
197
 
198
  # apply jaccard-similarity to every pred-ref pair and then take mean score * 100
199
  local_score = np.mean([get_overall_similarity(p, r) for p,r in
 
97
  # TODO: Download external resources if needed
98
  pass
99
 
100
+ # Jaccard Similarity to measure closeness of two log-messages
101
+ def get_jaccard_similarity(set1, set2):
102
+ intersection = set1.intersection(set2)
103
+ union = set1.union(set2)
104
+ return len(intersection) / len(union)
105
+
106
+ # A score depending on the difference in length of two sentences
107
+ def get_length_score(sentence1, sentence2):
108
+ s1len = len(sentence1)
109
+ s2len = len(sentence2)
110
+
111
+ return 1 - (abs(s1len - s2len) / max(s1len, s2len))
112
+
113
+ # Combine a weighted average of different scores
114
+ def get_overall_similarity(sentence1, sentence2):
115
+ s1split = sentence1.split()
116
+ s2split = sentence2.split()
117
+
118
+ jaccard_score = get_jaccard_similarity(set(s1split), set(s2split))
119
+ length_score = get_length_score(s1split, s2split)
120
+
121
+ return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
122
+
123
+
124
  def getLogMetric(self, pred : str, ref : str):
125
  ref = ref.strip(' \t\n\r')
126
  pred = pred.strip(' \t\n\r')
 
194
  # e.g. date format not parsable by dateutil.parser
195
  matchesPatternScore = 0.0
196
  monotonicallyIncreasingScore = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
  # apply jaccard-similarity to every pred-ref pair and then take mean score * 100
199
  local_score = np.mean([get_overall_similarity(p, r) for p,r in