import re
def highlight_common_words(common_words, sentences, title):
"""
Highlight common words in sentences by adding color-coded background and unique IDs.
Args:
common_words (list of tuples): List of tuples where each tuple contains a word's index and the word.
sentences (list of str): List of sentences to search through.
title (str): The title for the HTML output.
Returns:
str: HTML string with the highlighted sentences.
"""
color_map = {}
color_index = 0
highlighted_html = []
# Process each sentence
for idx, sentence in enumerate(sentences, start=1):
sentence_with_idx = f"{idx}. {sentence}"
highlighted_sentence = sentence_with_idx
# Highlight common words in each sentence
for index, word in common_words:
if word not in color_map:
color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
color_index += 1
# Escape word and create regex pattern to match whole word
escaped_word = re.escape(word)
pattern = rf'\b{escaped_word}\b'
# Replace the word with highlighted version
highlighted_sentence = re.sub(
pattern,
lambda m, idx=index, color=color_map[word]: (
f''
f'{idx}'
f'{m.group(0)}'
f''
),
highlighted_sentence,
flags=re.IGNORECASE
)
highlighted_html.append(highlighted_sentence)
# Format the HTML output with the title
final_html = "
".join(highlighted_html)
return f'''
'''
def highlight_common_words_dict(common_words, sentences, title):
"""
Highlight common words in sentences (from a dictionary) by adding color-coded background and unique IDs.
Args:
common_words (list of tuples): List of tuples where each tuple contains a word's index and the word.
sentences (dict): A dictionary of sentences where the key is the sentence and the value is an entailment score.
title (str): The title for the HTML output.
Returns:
str: HTML string with the highlighted sentences and their entailment scores.
"""
color_map = {}
color_index = 0
highlighted_html = []
# Process each sentence and its score
for idx, (sentence, score) in enumerate(sentences.items(), start=1):
sentence_with_idx = f"{idx}. {sentence}"
highlighted_sentence = sentence_with_idx
# Highlight common words in each sentence
for index, word in common_words:
if word not in color_map:
color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
color_index += 1
# Escape word and create regex pattern to match whole word
escaped_word = re.escape(word)
pattern = rf'\b{escaped_word}\b'
# Replace the word with highlighted version
highlighted_sentence = re.sub(
pattern,
lambda m, idx=index, color=color_map[word]: (
f''
f'{idx}'
f'{m.group(0)}'
f''
),
highlighted_sentence,
flags=re.IGNORECASE
)
# Add the entailment score
highlighted_html.append(
f''
f'{highlighted_sentence}'
f'
'
f'Entailment Score: {score}
'
)
# Format the HTML output with the title
final_html = "
".join(highlighted_html)
return f'''
'''
def reparaphrased_sentences_html(sentences):
"""
Create an HTML representation of sentences with numbering.
Args:
sentences (list of str): List of sentences to format.
Returns:
str: HTML string with numbered sentences.
"""
formatted_sentences = []
# Process each sentence
for idx, sentence in enumerate(sentences, start=1):
sentence_with_idx = f"{idx}. {sentence}"
formatted_sentences.append(sentence_with_idx)
# Format the HTML output
final_html = "
".join(formatted_sentences)
return f'''
'''
if __name__ == "__main__":
# Example usage
common_words = [(1, "highlight"), (2, "numbering")]
sentences = ["This is a test to highlight words.", "Numbering is important for clarity."]
# Test highlight_common_words
highlighted_html = highlight_common_words(common_words, sentences, "Test Highlighting")
print(highlighted_html)
# Test highlight_common_words_dict
sentences_with_scores = {"Highlight words in this text.": 0.95, "Number sentences for clarity.": 0.8}
highlighted_html_dict = highlight_common_words_dict(common_words, sentences_with_scores, "Test Dict Highlighting")
print(highlighted_html_dict)