Spaces:

peccavi
/

ai-text-watermarking-model

Sleeping

File size: 6,804 Bytes

060ac52

import re

def highlight_common_words(common_words, sentences, title):
    """
    Highlight common words in sentences by adding color-coded background and unique IDs.
    
    Args:
        common_words (list of tuples): List of tuples where each tuple contains a word's index and the word.
        sentences (list of str): List of sentences to search through.
        title (str): The title for the HTML output.

    Returns:
        str: HTML string with the highlighted sentences.
    """
    color_map = {}
    color_index = 0
    highlighted_html = []

    # Process each sentence
    for idx, sentence in enumerate(sentences, start=1):
        sentence_with_idx = f"{idx}. {sentence}"
        highlighted_sentence = sentence_with_idx

        # Highlight common words in each sentence
        for index, word in common_words:
            if word not in color_map:
                color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
                color_index += 1
            
            # Escape word and create regex pattern to match whole word
            escaped_word = re.escape(word)
            pattern = rf'\b{escaped_word}\b'
            
            # Replace the word with highlighted version
            highlighted_sentence = re.sub(
                pattern,
                lambda m, idx=index, color=color_map[word]: (
                    f'<span style="background-color: {color}; font-weight: bold;'
                    f' padding: 2px 4px; border-radius: 2px; position: relative;">'
                    f'<span style="background-color: black; color: white; border-radius: 50%;'
                    f' padding: 2px 5px; margin-right: 5px;">{idx}</span>'
                    f'{m.group(0)}'
                    f'</span>'
                ),
                highlighted_sentence,
                flags=re.IGNORECASE
            )
        
        highlighted_html.append(highlighted_sentence)

    # Format the HTML output with the title
    final_html = "<br><br>".join(highlighted_html)
    return f'''
    <div style="border: solid 1px #FFFFFF; padding: 16px; background-color: #000000; color: #FFFFFF; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;">
        <h3 style="margin-top: 0; font-size: 1em; color: #FFFFFF;">{title}</h3>
        <div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px; color: #FFFFFF;">{final_html}</div>
    </div>
    '''

def highlight_common_words_dict(common_words, sentences, title):
    """
    Highlight common words in sentences (from a dictionary) by adding color-coded background and unique IDs.
    
    Args:
        common_words (list of tuples): List of tuples where each tuple contains a word's index and the word.
        sentences (dict): A dictionary of sentences where the key is the sentence and the value is an entailment score.
        title (str): The title for the HTML output.

    Returns:
        str: HTML string with the highlighted sentences and their entailment scores.
    """
    color_map = {}
    color_index = 0
    highlighted_html = []

    # Process each sentence and its score
    for idx, (sentence, score) in enumerate(sentences.items(), start=1):
        sentence_with_idx = f"{idx}. {sentence}"
        highlighted_sentence = sentence_with_idx

        # Highlight common words in each sentence
        for index, word in common_words:
            if word not in color_map:
                color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
                color_index += 1

            # Escape word and create regex pattern to match whole word
            escaped_word = re.escape(word)
            pattern = rf'\b{escaped_word}\b'

            # Replace the word with highlighted version
            highlighted_sentence = re.sub(
                pattern,
                lambda m, idx=index, color=color_map[word]: (
                    f'<span style="background-color: {color}; font-weight: bold;'
                    f' padding: 1px 2px; border-radius: 2px; position: relative;">'
                    f'<span style="background-color: black; color: white; border-radius: 50%;'
                    f' padding: 1px 3px; margin-right: 3px; font-size: 0.8em;">{idx}</span>'
                    f'{m.group(0)}'
                    f'</span>'
                ),
                highlighted_sentence,
                flags=re.IGNORECASE
            )

        # Add the entailment score
        highlighted_html.append(
            f'<div style="margin-bottom: 5px;">'
            f'{highlighted_sentence}'
            f'<div style="display: inline-block; margin-left: 5px; padding: 3px 5px; border-radius: 3px; '
            f'background-color: #333333; color: white; font-size: 0.9em;">'
            f'Entailment Score: {score}</div></div>'
        )
    
    # Format the HTML output with the title
    final_html = "<br>".join(highlighted_html)
    return f'''
    <div style="background-color: #000000; color: #FFFFFF;border: solid 1px #FFFFFF; border-radius: 8px;">
        <h3 style="margin-top: 0; font-size: 1em; color: #FFFFFF;">{title}</h3>
        <div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px; color: #FFFFFF;">{final_html}</div>
    </div>
    '''

def reparaphrased_sentences_html(sentences):
    """
    Create an HTML representation of sentences with numbering.
    
    Args:
        sentences (list of str): List of sentences to format.

    Returns:
        str: HTML string with numbered sentences.
    """
    formatted_sentences = []

    # Process each sentence
    for idx, sentence in enumerate(sentences, start=1):
        sentence_with_idx = f"{idx}. {sentence}"
        formatted_sentences.append(sentence_with_idx)

    # Format the HTML output
    final_html = "<br><br>".join(formatted_sentences)
    return f'''
    <div style="border: solid 1px #FFFFFF; background-color: #000000; color: #FFFFFF;
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;">
        <div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div>
    </div>
    '''

if __name__ == "__main__":
    # Example usage
    common_words = [(1, "highlight"), (2, "numbering")]
    sentences = ["This is a test to highlight words.", "Numbering is important for clarity."]

    # Test highlight_common_words
    highlighted_html = highlight_common_words(common_words, sentences, "Test Highlighting")
    print(highlighted_html)

    # Test highlight_common_words_dict
    sentences_with_scores = {"Highlight words in this text.": 0.95, "Number sentences for clarity.": 0.8}
    highlighted_html_dict = highlight_common_words_dict(common_words, sentences_with_scores, "Test Dict Highlighting")
    print(highlighted_html_dict)