put functions outside of getLogMetric for increased efficiency
Browse files- logmetric.py +24 -24
logmetric.py
CHANGED
@@ -97,6 +97,30 @@ class LogMetric(evaluate.Metric):
|
|
97 |
# TODO: Download external resources if needed
|
98 |
pass
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
def getLogMetric(self, pred : str, ref : str):
|
101 |
ref = ref.strip(' \t\n\r')
|
102 |
pred = pred.strip(' \t\n\r')
|
@@ -170,30 +194,6 @@ class LogMetric(evaluate.Metric):
|
|
170 |
# e.g. date format not parsable by dateutil.parser
|
171 |
matchesPatternScore = 0.0
|
172 |
monotonicallyIncreasingScore = 0.0
|
173 |
-
|
174 |
-
# Jaccard Similarity to measure closeness of two log-messages
|
175 |
-
def get_jaccard_similarity(set1, set2):
|
176 |
-
intersection = set1.intersection(set2)
|
177 |
-
union = set1.union(set2)
|
178 |
-
return len(intersection) / len(union)
|
179 |
-
|
180 |
-
# A score depending on the difference in length of two sentences
|
181 |
-
def get_length_score(sentence1, sentence2):
|
182 |
-
s1len = len(sentence1)
|
183 |
-
s2len = len(sentence2)
|
184 |
-
|
185 |
-
return 1 - (abs(s1len - s2len) / max(s1len, s2len))
|
186 |
-
|
187 |
-
# Combine a weighted average of different scores
|
188 |
-
def get_overall_similarity(sentence1, sentence2):
|
189 |
-
s1split = sentence1.split()
|
190 |
-
s2split = sentence2.split()
|
191 |
-
|
192 |
-
jaccard_score = get_jaccard_similarity(set(s1split), set(s2split))
|
193 |
-
length_score = get_length_score(s1split, s2split)
|
194 |
-
|
195 |
-
return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
|
196 |
-
|
197 |
|
198 |
# apply jaccard-similarity to every pred-ref pair and then take mean score * 100
|
199 |
local_score = np.mean([get_overall_similarity(p, r) for p,r in
|
|
|
97 |
# TODO: Download external resources if needed
|
98 |
pass
|
99 |
|
100 |
+
# Jaccard Similarity to measure closeness of two log-messages
|
101 |
+
def get_jaccard_similarity(set1, set2):
|
102 |
+
intersection = set1.intersection(set2)
|
103 |
+
union = set1.union(set2)
|
104 |
+
return len(intersection) / len(union)
|
105 |
+
|
106 |
+
# A score depending on the difference in length of two sentences
|
107 |
+
def get_length_score(sentence1, sentence2):
|
108 |
+
s1len = len(sentence1)
|
109 |
+
s2len = len(sentence2)
|
110 |
+
|
111 |
+
return 1 - (abs(s1len - s2len) / max(s1len, s2len))
|
112 |
+
|
113 |
+
# Combine a weighted average of different scores
|
114 |
+
def get_overall_similarity(sentence1, sentence2):
|
115 |
+
s1split = sentence1.split()
|
116 |
+
s2split = sentence2.split()
|
117 |
+
|
118 |
+
jaccard_score = get_jaccard_similarity(set(s1split), set(s2split))
|
119 |
+
length_score = get_length_score(s1split, s2split)
|
120 |
+
|
121 |
+
return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
|
122 |
+
|
123 |
+
|
124 |
def getLogMetric(self, pred : str, ref : str):
|
125 |
ref = ref.strip(' \t\n\r')
|
126 |
pred = pred.strip(' \t\n\r')
|
|
|
194 |
# e.g. date format not parsable by dateutil.parser
|
195 |
matchesPatternScore = 0.0
|
196 |
monotonicallyIncreasingScore = 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
# apply jaccard-similarity to every pred-ref pair and then take mean score * 100
|
199 |
local_score = np.mean([get_overall_similarity(p, r) for p,r in
|