File size: 2,110 Bytes
56cf7e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from difflib import SequenceMatcher
import string

def extract_equal_text(text1, text2):
    def cleanup(text):
        text = text.lower()
        text = text.translate(str.maketrans('', '', string.punctuation))
        return text
    
    splited_text1 = cleanup(text1).split()
    splited_text2 = cleanup(text2).split()
    
    s = SequenceMatcher(None, splited_text1, splited_text2)
    
    equal_idx_1 = []
    equal_idx_2 = []
    text1 = text1.split()
    text2 = text2.split()
    for tag, i1, i2, j1, j2 in s.get_opcodes():
        if tag == 'equal':
            equal_idx_1.append({"start": i1, "end": i2})
            equal_idx_2.append({"start": j1, "end": j2})
            subtext_1 = " ".join(text1[i1:i2])
            subtext_2 = " ".join(text2[j1:j2])
            print(f'{tag:7}   a[{i1:2}:{i2:2}] --> b[{j1:2}:{j2:2}] {subtext_1!r:>55} --> {subtext_2!r}')

    return equal_idx_1, equal_idx_2

text1 = """
Miguel Almiron has permanently rejoined Atlanta United from Newcastle United for £8m.
Almiron made 223 appearances for Newcastle, scoring 30 goals, but recently struggled for a starting place under Eddie Howe.
He made a substitute appearance and waved farewell to fans in Newcastle's recent win against Southampton.
Almiron played a key role in Newcastle reaching the Carabao Cup final and their Premier League top-four finish in 2022-23, and scored against Paris St-Germain in the Champions League.
"""
text2 = """
Newcastle United winger Miguel Almiron has rejoined Atlanta United on a permanent deal for £8m.
Almiron has made 223 appearances for Newcastle, scoring 30 goals, but has struggled recently to gain a place in manager Eddie Howe's starting line-up.
Last weekend he came on as a substitute in Newcastle's 3-1 win against Southampton and waved farewell to the travelling supporters.
Almiron played a significant role in Newcastle reaching the Carabao Cup final and finishing fourth in the Premier League in 2022-23.
"""

idx_1, idx_2 = extract_equal_text(text1, text2)

# text1_split = text1.split()
# for idx in idx_1:
#     print(text1_split[idx["start"]:idx["end"]])