import re def highlight_common_words(common_words, sentences, title): """ Highlight common words in sentences by adding color-coded background and unique IDs. Args: common_words (list of tuples): List of tuples where each tuple contains a word's index and the word. sentences (list of str): List of sentences to search through. title (str): The title for the HTML output. Returns: str: HTML string with the highlighted sentences. """ color_map = {} color_index = 0 highlighted_html = [] # Process each sentence for idx, sentence in enumerate(sentences, start=1): sentence_with_idx = f"{idx}. {sentence}" highlighted_sentence = sentence_with_idx # Highlight common words in each sentence for index, word in common_words: if word not in color_map: color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)' color_index += 1 # Escape word and create regex pattern to match whole word escaped_word = re.escape(word) pattern = rf'\b{escaped_word}\b' # Replace the word with highlighted version highlighted_sentence = re.sub( pattern, lambda m, idx=index, color=color_map[word]: ( f'' f'{idx}' f'{m.group(0)}' f'' ), highlighted_sentence, flags=re.IGNORECASE ) highlighted_html.append(highlighted_sentence) # Format the HTML output with the title final_html = "

".join(highlighted_html) return f'''

{title}

{final_html}
''' def highlight_common_words_dict(common_words, sentences, title): """ Highlight common words in sentences (from a dictionary) by adding color-coded background and unique IDs. Args: common_words (list of tuples): List of tuples where each tuple contains a word's index and the word. sentences (dict): A dictionary of sentences where the key is the sentence and the value is an entailment score. title (str): The title for the HTML output. Returns: str: HTML string with the highlighted sentences and their entailment scores. """ color_map = {} color_index = 0 highlighted_html = [] # Process each sentence and its score for idx, (sentence, score) in enumerate(sentences.items(), start=1): sentence_with_idx = f"{idx}. {sentence}" highlighted_sentence = sentence_with_idx # Highlight common words in each sentence for index, word in common_words: if word not in color_map: color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)' color_index += 1 # Escape word and create regex pattern to match whole word escaped_word = re.escape(word) pattern = rf'\b{escaped_word}\b' # Replace the word with highlighted version highlighted_sentence = re.sub( pattern, lambda m, idx=index, color=color_map[word]: ( f'' f'{idx}' f'{m.group(0)}' f'' ), highlighted_sentence, flags=re.IGNORECASE ) # Add the entailment score highlighted_html.append( f'
' f'{highlighted_sentence}' f'
' f'Entailment Score: {score}
' ) # Format the HTML output with the title final_html = "
".join(highlighted_html) return f'''

{title}

{final_html}
''' def reparaphrased_sentences_html(sentences): """ Create an HTML representation of sentences with numbering. Args: sentences (list of str): List of sentences to format. Returns: str: HTML string with numbered sentences. """ formatted_sentences = [] # Process each sentence for idx, sentence in enumerate(sentences, start=1): sentence_with_idx = f"{idx}. {sentence}" formatted_sentences.append(sentence_with_idx) # Format the HTML output final_html = "

".join(formatted_sentences) return f'''
{final_html}
''' if __name__ == "__main__": # Example usage common_words = [(1, "highlight"), (2, "numbering")] sentences = ["This is a test to highlight words.", "Numbering is important for clarity."] # Test highlight_common_words highlighted_html = highlight_common_words(common_words, sentences, "Test Highlighting") print(highlighted_html) # Test highlight_common_words_dict sentences_with_scores = {"Highlight words in this text.": 0.95, "Number sentences for clarity.": 0.8} highlighted_html_dict = highlight_common_words_dict(common_words, sentences_with_scores, "Test Dict Highlighting") print(highlighted_html_dict)