Spaces:

svenwey
/

logmetric

Running

App Files Files Community

svenwey commited on Jun 12, 2024

Commit

422e946

1 Parent(s): d0d47f8

first implementation of logscoremetric

Browse files

Files changed (1) hide show

logscoremetric.py +58 -6

logscoremetric.py CHANGED Viewed

@@ -15,6 +15,9 @@
 import evaluate
 import datasets
 # TODO: Add BibTeX citation
@@ -61,6 +64,10 @@ BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
 class LogScoreMetric(evaluate.Metric):
     """TODO: Short description of my evaluation module."""
     def _info(self):
         # TODO: Specifies the evaluate.EvaluationModuleInfo object
         return evaluate.MetricInfo(
@@ -72,8 +79,8 @@ class LogScoreMetric(evaluate.Metric):
             # This defines the format of each prediction and reference
             # Both prediction and reference are strings
             features=datasets.Features({
-                'predictions': datasets.Value('string'),
-                'references': datasets.Value('string'),
             }),
             # Homepage of the module for documentation
             homepage="http://module.homepage",
@@ -87,11 +94,56 @@ class LogScoreMetric(evaluate.Metric):
         # TODO: Download external resources if needed
         pass
     def _compute(self, predictions, references):
         """Returns the scores"""
-        # TODO: Compute the different scores of the module
-        accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
         return {
-            "timestamp_score": accuracy,
-            "accuracy": accuracy,
         }

 import evaluate
 import datasets
+import re
+import dateutil.parser
+import numpy as np
 # TODO: Add BibTeX citation
 class LogScoreMetric(evaluate.Metric):
     """TODO: Short description of my evaluation module."""
+    # Constant regex to get timestrings
+    timestamp_regex = r'(^\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)'
+    sacrebleu = evaluate.load("sacrebleu")
     def _info(self):
         # TODO: Specifies the evaluate.EvaluationModuleInfo object
         return evaluate.MetricInfo(
             # This defines the format of each prediction and reference
             # Both prediction and reference are strings
             features=datasets.Features({
+                "predictions": datasets.Value("string", id="sequence"),
+                "references": datasets.Value("string", id="sequence"),
             }),
             # Homepage of the module for documentation
             homepage="http://module.homepage",
         # TODO: Download external resources if needed
         pass
+    def getLogMetric(self, pred : str, ref : str):
+        ref = ref.strip(' \t\n\r')
+        pred = pred.strip(' \t\n\r')
+        # Find all timestrings in the log
+        pred_timestrings = re.findall(self.timestamp_regex, pred, re.MULTILINE)
+        ref_timestrings = re.findall(self.timestamp_regex, ref, re.MULTILINE)
+        #Check if there is the correct amount of timestrings in the prediction
+        if(len(pred_timestrings) != len(ref_timestrings)):
+            return 0.0
+        # replace all digits in the reference timestamp (first timestamp) with '/d' to get
+        # a regex that describes the format
+        pred_timestring_pattern = re.sub(r'\d', r'\\d', re.escape(pred_timestrings[0])) if (len(pred_timestrings > 0)) else r""
+        # A variable to save the previous timestamp (as datetime obj) to check monotonicity
+        prev_datetime = None
+        # Convert matches to datetime objects
+        for ts in pred_timestrings:
+            try:
+                # Check if the format matches with the format of the first timestamp
+                matchesPattern = re.fullmatch(pred_timestring_pattern, ts) is not None
+                # Check if the timestamps are monotonically increasing
+                cur_datetime = dateutil.parser.parse(ts)
+                monotonicallyIncreasing = True if prev_datetime == None else prev_datetime <= cur_datetime
+                prev_datetime = cur_datetime
+                if not (matchesPattern and monotonicallyIncreasing):
+                    # timestamps not consistent
+                    return 0.0
+            except Exception as e:
+                # e.g. date format not parsable by dateutil.parser
+                return 0.0
+        # Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
+        return 1.0
     def _compute(self, predictions, references):
         """Returns the scores"""
+        timestamp_score = np.mean([self.getLogMetric(p,r) for p,r in zip(predictions,references)])
+        predictions_without_timestamps = [re.sub(self.timestamp_regex, '', p, flags=re.MULTILINE) for p in predictions]
+        references_without_timestamps = [re.sub(self.timestamp_regex, '', r, flags=re.MULTILINE) for r in references]
+        # Sacrebleu score on logs without timestamps
+        sb_results = self.sacrebleu.compute(predictions=predictions_without_timestamps, references=references_without_timestamps)
         return {
+            "timestamp_score": timestamp_score,
+            "sacrebleu_score": sb_results.score,
         }