from pandas import DataFrame from src.application.config import WORD_BREAK from src.application.formatting import ( color_text, format_entity_count, ) from src.application.image.helper import encode_image from src.application.image.image import ImageDetector from src.application.text.entity import apply_highlight from src.application.text.helper import ( extract_equal_text, replace_leading_spaces, ) from src.application.text.text import TextDetector def create_fact_checker_table( aligned_sentences_df: DataFrame, text: TextDetector, image: ImageDetector, ): rows = [] if image.input is not None: rows.append(format_image_fact_checker_row(image)) if text.input is not None: for _, row in aligned_sentences_df.iterrows(): if row["input"] is None: continue if row["source"] is None: equal_idx_1 = equal_idx_2 = [] else: # Get index of equal phrases in input and source sentences equal_idx_1, equal_idx_2 = extract_equal_text( row["input"], row["source"], ) text.fact_checker_table.append( [ row, # aligned_sentences_df equal_idx_1, # index of equal text in input equal_idx_2, # index of equal text in source row["entities"], row["url"], ], ) previous_url = None span_row = 1 for index, row in enumerate(text.fact_checker_table): current_url = row[4] last_url_row = False # First row or URL change if index == 0 or current_url != previous_url: first_url_row = True previous_url = current_url # Increase counter "span_row" when the next url is the same while ( index + span_row < len(text.fact_checker_table) and text.fact_checker_table[index + span_row][4] == current_url ): span_row += 1 else: first_url_row = False span_row -= 1 if span_row == 1: last_url_row = True formatted_row = format_text_fact_checker_row( text, row, first_url_row, last_url_row, span_row, ) rows.append(formatted_row) table = "\n".join(rows) return f"""
Input news | Source (URL in Originality) | Forensic | Originality |
---|