File size: 6,804 Bytes
060ac52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import re

def highlight_common_words(common_words, sentences, title):
    """
    Highlight common words in sentences by adding color-coded background and unique IDs.
    
    Args:
        common_words (list of tuples): List of tuples where each tuple contains a word's index and the word.
        sentences (list of str): List of sentences to search through.
        title (str): The title for the HTML output.

    Returns:
        str: HTML string with the highlighted sentences.
    """
    color_map = {}
    color_index = 0
    highlighted_html = []

    # Process each sentence
    for idx, sentence in enumerate(sentences, start=1):
        sentence_with_idx = f"{idx}. {sentence}"
        highlighted_sentence = sentence_with_idx

        # Highlight common words in each sentence
        for index, word in common_words:
            if word not in color_map:
                color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
                color_index += 1
            
            # Escape word and create regex pattern to match whole word
            escaped_word = re.escape(word)
            pattern = rf'\b{escaped_word}\b'
            
            # Replace the word with highlighted version
            highlighted_sentence = re.sub(
                pattern,
                lambda m, idx=index, color=color_map[word]: (
                    f'<span style="background-color: {color}; font-weight: bold;'
                    f' padding: 2px 4px; border-radius: 2px; position: relative;">'
                    f'<span style="background-color: black; color: white; border-radius: 50%;'
                    f' padding: 2px 5px; margin-right: 5px;">{idx}</span>'
                    f'{m.group(0)}'
                    f'</span>'
                ),
                highlighted_sentence,
                flags=re.IGNORECASE
            )
        
        highlighted_html.append(highlighted_sentence)

    # Format the HTML output with the title
    final_html = "<br><br>".join(highlighted_html)
    return f'''
    <div style="border: solid 1px #FFFFFF; padding: 16px; background-color: #000000; color: #FFFFFF; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;">
        <h3 style="margin-top: 0; font-size: 1em; color: #FFFFFF;">{title}</h3>
        <div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px; color: #FFFFFF;">{final_html}</div>
    </div>
    '''

def highlight_common_words_dict(common_words, sentences, title):
    """
    Highlight common words in sentences (from a dictionary) by adding color-coded background and unique IDs.
    
    Args:
        common_words (list of tuples): List of tuples where each tuple contains a word's index and the word.
        sentences (dict): A dictionary of sentences where the key is the sentence and the value is an entailment score.
        title (str): The title for the HTML output.

    Returns:
        str: HTML string with the highlighted sentences and their entailment scores.
    """
    color_map = {}
    color_index = 0
    highlighted_html = []

    # Process each sentence and its score
    for idx, (sentence, score) in enumerate(sentences.items(), start=1):
        sentence_with_idx = f"{idx}. {sentence}"
        highlighted_sentence = sentence_with_idx

        # Highlight common words in each sentence
        for index, word in common_words:
            if word not in color_map:
                color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
                color_index += 1

            # Escape word and create regex pattern to match whole word
            escaped_word = re.escape(word)
            pattern = rf'\b{escaped_word}\b'

            # Replace the word with highlighted version
            highlighted_sentence = re.sub(
                pattern,
                lambda m, idx=index, color=color_map[word]: (
                    f'<span style="background-color: {color}; font-weight: bold;'
                    f' padding: 1px 2px; border-radius: 2px; position: relative;">'
                    f'<span style="background-color: black; color: white; border-radius: 50%;'
                    f' padding: 1px 3px; margin-right: 3px; font-size: 0.8em;">{idx}</span>'
                    f'{m.group(0)}'
                    f'</span>'
                ),
                highlighted_sentence,
                flags=re.IGNORECASE
            )

        # Add the entailment score
        highlighted_html.append(
            f'<div style="margin-bottom: 5px;">'
            f'{highlighted_sentence}'
            f'<div style="display: inline-block; margin-left: 5px; padding: 3px 5px; border-radius: 3px; '
            f'background-color: #333333; color: white; font-size: 0.9em;">'
            f'Entailment Score: {score}</div></div>'
        )
    
    # Format the HTML output with the title
    final_html = "<br>".join(highlighted_html)
    return f'''
    <div style="background-color: #000000; color: #FFFFFF;border: solid 1px #FFFFFF; border-radius: 8px;">
        <h3 style="margin-top: 0; font-size: 1em; color: #FFFFFF;">{title}</h3>
        <div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px; color: #FFFFFF;">{final_html}</div>
    </div>
    '''

def reparaphrased_sentences_html(sentences):
    """
    Create an HTML representation of sentences with numbering.
    
    Args:
        sentences (list of str): List of sentences to format.

    Returns:
        str: HTML string with numbered sentences.
    """
    formatted_sentences = []

    # Process each sentence
    for idx, sentence in enumerate(sentences, start=1):
        sentence_with_idx = f"{idx}. {sentence}"
        formatted_sentences.append(sentence_with_idx)

    # Format the HTML output
    final_html = "<br><br>".join(formatted_sentences)
    return f'''
    <div style="border: solid 1px #FFFFFF; background-color: #000000; color: #FFFFFF;
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;">
        <div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div>
    </div>
    '''

if __name__ == "__main__":
    # Example usage
    common_words = [(1, "highlight"), (2, "numbering")]
    sentences = ["This is a test to highlight words.", "Numbering is important for clarity."]

    # Test highlight_common_words
    highlighted_html = highlight_common_words(common_words, sentences, "Test Highlighting")
    print(highlighted_html)

    # Test highlight_common_words_dict
    sentences_with_scores = {"Highlight words in this text.": 0.95, "Number sentences for clarity.": 0.8}
    highlighted_html_dict = highlight_common_words_dict(common_words, sentences_with_scores, "Test Dict Highlighting")
    print(highlighted_html_dict)