File size: 6,804 Bytes
060ac52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import re
def highlight_common_words(common_words, sentences, title):
"""
Highlight common words in sentences by adding color-coded background and unique IDs.
Args:
common_words (list of tuples): List of tuples where each tuple contains a word's index and the word.
sentences (list of str): List of sentences to search through.
title (str): The title for the HTML output.
Returns:
str: HTML string with the highlighted sentences.
"""
color_map = {}
color_index = 0
highlighted_html = []
# Process each sentence
for idx, sentence in enumerate(sentences, start=1):
sentence_with_idx = f"{idx}. {sentence}"
highlighted_sentence = sentence_with_idx
# Highlight common words in each sentence
for index, word in common_words:
if word not in color_map:
color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
color_index += 1
# Escape word and create regex pattern to match whole word
escaped_word = re.escape(word)
pattern = rf'\b{escaped_word}\b'
# Replace the word with highlighted version
highlighted_sentence = re.sub(
pattern,
lambda m, idx=index, color=color_map[word]: (
f'<span style="background-color: {color}; font-weight: bold;'
f' padding: 2px 4px; border-radius: 2px; position: relative;">'
f'<span style="background-color: black; color: white; border-radius: 50%;'
f' padding: 2px 5px; margin-right: 5px;">{idx}</span>'
f'{m.group(0)}'
f'</span>'
),
highlighted_sentence,
flags=re.IGNORECASE
)
highlighted_html.append(highlighted_sentence)
# Format the HTML output with the title
final_html = "<br><br>".join(highlighted_html)
return f'''
<div style="border: solid 1px #FFFFFF; padding: 16px; background-color: #000000; color: #FFFFFF; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;">
<h3 style="margin-top: 0; font-size: 1em; color: #FFFFFF;">{title}</h3>
<div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px; color: #FFFFFF;">{final_html}</div>
</div>
'''
def highlight_common_words_dict(common_words, sentences, title):
"""
Highlight common words in sentences (from a dictionary) by adding color-coded background and unique IDs.
Args:
common_words (list of tuples): List of tuples where each tuple contains a word's index and the word.
sentences (dict): A dictionary of sentences where the key is the sentence and the value is an entailment score.
title (str): The title for the HTML output.
Returns:
str: HTML string with the highlighted sentences and their entailment scores.
"""
color_map = {}
color_index = 0
highlighted_html = []
# Process each sentence and its score
for idx, (sentence, score) in enumerate(sentences.items(), start=1):
sentence_with_idx = f"{idx}. {sentence}"
highlighted_sentence = sentence_with_idx
# Highlight common words in each sentence
for index, word in common_words:
if word not in color_map:
color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
color_index += 1
# Escape word and create regex pattern to match whole word
escaped_word = re.escape(word)
pattern = rf'\b{escaped_word}\b'
# Replace the word with highlighted version
highlighted_sentence = re.sub(
pattern,
lambda m, idx=index, color=color_map[word]: (
f'<span style="background-color: {color}; font-weight: bold;'
f' padding: 1px 2px; border-radius: 2px; position: relative;">'
f'<span style="background-color: black; color: white; border-radius: 50%;'
f' padding: 1px 3px; margin-right: 3px; font-size: 0.8em;">{idx}</span>'
f'{m.group(0)}'
f'</span>'
),
highlighted_sentence,
flags=re.IGNORECASE
)
# Add the entailment score
highlighted_html.append(
f'<div style="margin-bottom: 5px;">'
f'{highlighted_sentence}'
f'<div style="display: inline-block; margin-left: 5px; padding: 3px 5px; border-radius: 3px; '
f'background-color: #333333; color: white; font-size: 0.9em;">'
f'Entailment Score: {score}</div></div>'
)
# Format the HTML output with the title
final_html = "<br>".join(highlighted_html)
return f'''
<div style="background-color: #000000; color: #FFFFFF;border: solid 1px #FFFFFF; border-radius: 8px;">
<h3 style="margin-top: 0; font-size: 1em; color: #FFFFFF;">{title}</h3>
<div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px; color: #FFFFFF;">{final_html}</div>
</div>
'''
def reparaphrased_sentences_html(sentences):
"""
Create an HTML representation of sentences with numbering.
Args:
sentences (list of str): List of sentences to format.
Returns:
str: HTML string with numbered sentences.
"""
formatted_sentences = []
# Process each sentence
for idx, sentence in enumerate(sentences, start=1):
sentence_with_idx = f"{idx}. {sentence}"
formatted_sentences.append(sentence_with_idx)
# Format the HTML output
final_html = "<br><br>".join(formatted_sentences)
return f'''
<div style="border: solid 1px #FFFFFF; background-color: #000000; color: #FFFFFF;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;">
<div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div>
</div>
'''
if __name__ == "__main__":
# Example usage
common_words = [(1, "highlight"), (2, "numbering")]
sentences = ["This is a test to highlight words.", "Numbering is important for clarity."]
# Test highlight_common_words
highlighted_html = highlight_common_words(common_words, sentences, "Test Highlighting")
print(highlighted_html)
# Test highlight_common_words_dict
sentences_with_scores = {"Highlight words in this text.": 0.95, "Number sentences for clarity.": 0.8}
highlighted_html_dict = highlight_common_words_dict(common_words, sentences_with_scores, "Test Dict Highlighting")
print(highlighted_html_dict)
|