from pandas import DataFrame from src.application.config import WORD_BREAK from src.application.formatting import ( color_text, format_entity_count, ) from src.application.image.helper import encode_image from src.application.image.image import ImageDetector from src.application.text.entity import apply_highlight from src.application.text.helper import ( extract_equal_text, replace_leading_spaces, ) from src.application.text.text import TextDetector def create_governor_table( aligned_sentences_df: DataFrame, text: TextDetector, image: ImageDetector, ): rows = [] if image.input is not None: rows.append(format_image_governor_row(image)) if text.input is not None: for _, row in aligned_sentences_df.iterrows(): if row["input"] is None: continue if row["source"] is None: equal_idx_1 = equal_idx_2 = [] else: # Get index of equal phrases in input and source sentences equal_idx_1, equal_idx_2 = extract_equal_text( row["input"], row["source"], ) text.governor_table.append( [ row, equal_idx_1, equal_idx_2, row["entities"], ], ) formatted_row = format_text_governor_row(text) rows.append(formatted_row) table = "\n".join(rows) return f"""
Input news | Source (URL in Originality) | Forensic | Originality |
---|