decompilation-metrics

Sleeping

App Files Files Community

ejschwartz commited on Apr 29

Commit

52b18ab

1 Parent(s): c4e7153

Add wildcard distance

Browse files

Files changed (2) hide show

dist.py +199 -0
main.py +2 -0

dist.py ADDED Viewed

	@@ -0,0 +1,199 @@

+def levenshtein_with_wildcards(str1, str2, wildcard='?', verbose=False):
+    """
+    Calculate the Levenshtein distance between two strings with support for wildcards.
+    Args:
+        str1 (str): The first string.
+        str2 (str): The second string.
+        wildcard (str, optional): The wildcard character. Defaults to '?'.
+        verbose (bool, optional): If True, prints the DP matrix and explains the process.
+    Returns:
+        int: The Levenshtein distance between the two strings.
+        list: If verbose=True, also returns a list of operations performed.
+    """
+    m, n = len(str1), len(str2)
+    # Create a matrix of size (m+1) x (n+1)
+    dp = [[0] * (n + 1) for _ in range(m + 1)]
+    # Initialize the first row and column
+    for i in range(m + 1):
+        dp[i][0] = i
+    for j in range(n + 1):
+        dp[0][j] = j
+    # Fill the dp matrix
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            # If either character is a wildcard, treat it as a match (cost = 0)
+            if str1[i - 1] == wildcard or str2[j - 1] == wildcard:
+                dp[i][j] = dp[i - 1][j - 1]  # No cost for wildcard matches
+            else:
+                cost = 0 if str1[i - 1] == str2[j - 1] else 1
+                dp[i][j] = min(
+                    dp[i - 1][j] + 1,      # deletion
+                    dp[i][j - 1] + 1,      # insertion
+                    dp[i - 1][j - 1] + cost  # substitution
+                )
+    if verbose:
+        operations = explain_match(str1, str2, dp, wildcard)
+        return dp[m][n], operations
+    return dp[m][n]
+def explain_match(str1, str2, dp, wildcard='?'):
+    """
+    Traces the optimal alignment path and explains each step of the matching process.
+    Args:
+        str1 (str): The first string.
+        str2 (str): The second string.
+        dp (list): The dynamic programming matrix.
+        wildcard (str, optional): The wildcard character. Defaults to '?'.
+    Returns:
+        list: A list of explanation strings for each operation performed.
+    """
+    m, n = len(str1), len(str2)
+    operations = []
+    # Find the optimal path
+    i, j = m, n
+    path = []
+    while i > 0 or j > 0:
+        path.append((i, j))
+        if i == 0:
+            j -= 1
+        elif j == 0:
+            i -= 1
+        else:
+            substitution_cost = dp[i-1][j-1]
+            deletion_cost = dp[i-1][j]
+            insertion_cost = dp[i][j-1]
+            min_cost = min(substitution_cost, deletion_cost, insertion_cost)
+            if min_cost == substitution_cost:
+                i -= 1
+                j -= 1
+            elif min_cost == deletion_cost:
+                i -= 1
+            else:
+                j -= 1
+    path.append((0, 0))
+    path.reverse()
+    # Generate explanations for each step
+    for idx in range(1, len(path)):
+        prev_i, prev_j = path[idx-1]
+        curr_i, curr_j = path[idx]
+        # Diagonal move (match or substitution)
+        if curr_i > prev_i and curr_j > prev_j:
+            char1 = str1[curr_i-1]
+            char2 = str2[curr_j-1]
+            if char1 == wildcard or char2 == wildcard:
+                wildcard_char = char1 if char1 == wildcard else char2
+                match_char = char2 if char1 == wildcard else char1
+                operations.append(f"Wildcard match: '{wildcard_char}' matches any character, here '{match_char}'")
+            elif char1 == char2:
+                operations.append(f"Match: '{char1}' matches '{char2}'")
+            else:
+                operations.append(f"Substitution: Replace '{char1}' with '{char2}'")
+        # Horizontal move (insertion)
+        elif curr_i == prev_i and curr_j > prev_j:
+            operations.append(f"Insertion: Insert '{str2[curr_j-1]}'")
+        # Vertical move (deletion)
+        elif curr_i > prev_i and curr_j == prev_j:
+            operations.append(f"Deletion: Delete '{str1[curr_i-1]}'")
+    return operations
+def print_match_summary(str1, str2, wildcard='?'):
+    """
+    Prints a summary of the match between two strings, highlighting wildcards.
+    Args:
+        str1 (str): The first string.
+        str2 (str): The second string.
+        wildcard (str, optional): The wildcard character. Defaults to '?'.
+    """
+    distance, operations = levenshtein_with_wildcards(str1, str2, wildcard, verbose=True)
+    print(f"Comparing '{str1}' and '{str2}' (wildcard: '{wildcard}')")
+    print(f"Edit distance: {distance}")
+    print("\nMatch process:")
+    for i, op in enumerate(operations):
+        print(f"Step {i+1}: {op}")
+    # Visual representation
+    alignment = []
+    i, j = 0, 0
+    aligned_str1 = ""
+    aligned_str2 = ""
+    match_indicators = ""
+    for op in operations:
+        if "match" in op or "Match" in op or "Substitution" in op:
+            aligned_str1 += str1[i]
+            aligned_str2 += str2[j]
+            if "Wildcard" in op:
+                match_indicators += "*"  # Wildcard match
+            elif "Match" in op:
+                match_indicators += "|"  # Exact match
+            else:
+                match_indicators += "X"  # Substitution
+            i += 1
+            j += 1
+        elif "Insertion" in op:
+            aligned_str1 += "-"
+            aligned_str2 += str2[j]
+            match_indicators += " "
+            j += 1
+        elif "Deletion" in op:
+            aligned_str1 += str1[i]
+            aligned_str2 += "-"
+            match_indicators += " "
+            i += 1
+    print("\nAlignment:")
+    print(aligned_str1)
+    print(match_indicators)
+    print(aligned_str2)
+    print("\nLegend:")
+    print("| = exact match, * = wildcard match, X = substitution, - = gap (insertion/deletion)")
+    # Summary of wildcard matches
+    wildcard_matches = [op for op in operations if "Wildcard" in op]
+    if wildcard_matches:
+        print("\nWildcard matches:")
+        for match in wildcard_matches:
+            print(f"- {match}")
+    return distance, operations
+# Example usage
+if __name__ == "__main__":
+    # Basic examples
+    print_match_summary("hello", "hello")       # 0 (identical strings)
+    print_match_summary("hello", "hallo")       # 1 (one substitution)
+    print_match_summary("he?lo", "hello")       # 0 (wildcard matches 'l')
+    print_match_summary("he?lo", "hallo")       # 0 (wildcard matches 'a')
+    print_match_summary("h?llo", "hello")       # 0 (wildcard matches 'e')
+    print_match_summary("h?llo", "hillo")       # 0 (wildcard matches 'i')
+    print_match_summary("c?t", "cat")           # 0 (wildcard matches 'a')
+    print_match_summary("c?t", "cut")           # 0 (wildcard matches 'u')
+    print_match_summary("w?rd", "word")         # 0 (wildcard matches 'o')
+    print_match_summary("d?g", "dog")           # 0 (wildcard matches 'o')

main.py CHANGED Viewed

@@ -7,6 +7,8 @@ import shlex
 import subprocess
 import tempfile
 description = frontmatter.load("README.md").content
 def trim(str, n):

 import subprocess
 import tempfile
+from dist import levenshtein_with_wildcards, print_match_summary
 description = frontmatter.load("README.md").content
 def trim(str, n):