pmkhanh7890's picture
add entities to demo
56cf7e3
raw
history blame
2.11 kB
from difflib import SequenceMatcher
import string
def extract_equal_text(text1, text2):
def cleanup(text):
text = text.lower()
text = text.translate(str.maketrans('', '', string.punctuation))
return text
splited_text1 = cleanup(text1).split()
splited_text2 = cleanup(text2).split()
s = SequenceMatcher(None, splited_text1, splited_text2)
equal_idx_1 = []
equal_idx_2 = []
text1 = text1.split()
text2 = text2.split()
for tag, i1, i2, j1, j2 in s.get_opcodes():
if tag == 'equal':
equal_idx_1.append({"start": i1, "end": i2})
equal_idx_2.append({"start": j1, "end": j2})
subtext_1 = " ".join(text1[i1:i2])
subtext_2 = " ".join(text2[j1:j2])
print(f'{tag:7} a[{i1:2}:{i2:2}] --> b[{j1:2}:{j2:2}] {subtext_1!r:>55} --> {subtext_2!r}')
return equal_idx_1, equal_idx_2
text1 = """
Miguel Almiron has permanently rejoined Atlanta United from Newcastle United for £8m.
Almiron made 223 appearances for Newcastle, scoring 30 goals, but recently struggled for a starting place under Eddie Howe.
He made a substitute appearance and waved farewell to fans in Newcastle's recent win against Southampton.
Almiron played a key role in Newcastle reaching the Carabao Cup final and their Premier League top-four finish in 2022-23, and scored against Paris St-Germain in the Champions League.
"""
text2 = """
Newcastle United winger Miguel Almiron has rejoined Atlanta United on a permanent deal for £8m.
Almiron has made 223 appearances for Newcastle, scoring 30 goals, but has struggled recently to gain a place in manager Eddie Howe's starting line-up.
Last weekend he came on as a substitute in Newcastle's 3-1 win against Southampton and waved farewell to the travelling supporters.
Almiron played a significant role in Newcastle reaching the Carabao Cup final and finishing fourth in the Premier League in 2022-23.
"""
idx_1, idx_2 = extract_equal_text(text1, text2)
# text1_split = text1.split()
# for idx in idx_1:
# print(text1_split[idx["start"]:idx["end"]])