Spaces:
Sleeping
Sleeping
from pandas import DataFrame | |
from src.application.config import WORD_BREAK | |
from src.application.formatting import ( | |
color_text, | |
format_entity_count, | |
) | |
from src.application.image.helper import encode_image | |
from src.application.image.image import ImageDetector | |
from src.application.text.entity import apply_highlight | |
from src.application.text.helper import ( | |
extract_equal_text, | |
replace_leading_spaces, | |
) | |
from src.application.text.text import TextDetector | |
def create_governor_table( | |
aligned_sentences_df: DataFrame, | |
text: TextDetector, | |
image: ImageDetector, | |
): | |
rows = [] | |
if image.input is not None: | |
rows.append(format_image_governor_row(image)) | |
if text.input is not None: | |
for _, row in aligned_sentences_df.iterrows(): | |
if row["input"] is None: | |
continue | |
if row["source"] is None: | |
equal_idx_1 = equal_idx_2 = [] | |
else: | |
# Get index of equal phrases in input and source sentences | |
equal_idx_1, equal_idx_2 = extract_equal_text( | |
row["input"], | |
row["source"], | |
) | |
text.governor_table.append( | |
[ | |
row, | |
equal_idx_1, | |
equal_idx_2, | |
row["entities"], | |
], | |
) | |
formatted_row = format_text_governor_row(text) | |
rows.append(formatted_row) | |
table = "\n".join(rows) | |
return f""" | |
<h5>Comparison between input news and source news:</h5> | |
<table border="1" style="width:100%; text-align:left;"> | |
<col style="width: 170px;"> | |
<col style="width: 170px;"> | |
<col style="width: 30px;"> | |
<col style="width: 75px;"> | |
<thead> | |
<tr> | |
<th>Input news</th> | |
<th>Source (URL in Originality)</th> | |
<th>Forensic</th> | |
<th>Originality</th> | |
</tr> | |
</thead> | |
<tbody> | |
{table} | |
</tbody> | |
</table> | |
<style> | |
""" | |
def format_text_governor_row(text): | |
input_sentences = "" | |
source_sentences = "" | |
source_text_urls = "" | |
urls = [] | |
sentence_count = 0 | |
entity_count = [0, 0] # to get index of [-2] | |
for row in text.governor_table: | |
if row[0]["input"] is None: | |
continue | |
if row[0]["source"] is not None: # source is not empty | |
# highlight entities | |
input_sentence, highlight_idx_input = apply_highlight( | |
row[0]["input"], | |
row[3], # entities_with_colors | |
"input", # key | |
entity_count[-2], # since the last one is for current counting | |
) | |
source_sentence, highlight_idx_source = apply_highlight( | |
row[0]["source"], | |
row[3], # entities_with_colors | |
"source", # key | |
entity_count[-2], # since the last one is for current counting | |
) | |
# Color overlapping words | |
input_sentence = color_text( | |
input_sentence, | |
row[1], | |
highlight_idx_input, | |
) # text, index of highlight words | |
source_sentence = color_text( | |
source_sentence, | |
row[2], | |
highlight_idx_source, | |
) # text, index of highlight words | |
input_sentence = input_sentence.replace( | |
"span_style", | |
"span style", | |
).replace("1px_4px", "1px 4px") | |
source_sentence = source_sentence.replace( | |
"span_style", | |
"span style", | |
).replace("1px_4px", "1px 4px") | |
else: | |
if row[0]["source"] is None: | |
source_sentence = "" | |
else: | |
source_sentence = row[0]["source"] | |
input_sentence = row[0]["input"] | |
input_sentence = replace_leading_spaces(input_sentence) | |
source_sentence = replace_leading_spaces(source_sentence) | |
input_sentences += input_sentence + "<br>" | |
source_sentences += source_sentence + "<br>" | |
url = row[0]["url"] | |
if url not in urls: | |
urls.append(url) | |
source_text_urls += f"""<a href="{url}">{url}</a><br><br>""" | |
sentence_count += 1 | |
if row[3] is not None: | |
entity_count.append(len(row[3])) | |
entity_count_text = format_entity_count(sum(entity_count)) | |
return f""" | |
<tr> | |
<td>{input_sentences}</td> | |
<td>{source_sentences}</td> | |
<td>{text.prediction_label[0]}<br> | |
({text.prediction_score[0] * 100:.2f}%)<br><br> | |
{entity_count_text}</td> | |
<td style="{WORD_BREAK}";>{source_text_urls}</td> | |
</tr> | |
""" | |
def format_image_governor_row(image: ImageDetector): | |
if image.input is None: | |
return "" | |
if image.referent_url is not None or image.referent_url != "": | |
if "http" in image.input: | |
input_image = ( | |
f"""<a href="{image.input}">{image.input}</a>""" # noqa: E501 | |
) | |
else: | |
base64_image = encode_image(image.input) | |
input_image = f"""<img src="data:image/jpeg;base64,{base64_image}" width="100" height="150">""" # noqa: E501 | |
source_image_url = f"""<a href="{image.referent_url}">{image.referent_url}</a>""" # noqa: E501 | |
source_image = f"""<img src="{image.referent_url}" width="100" height="150">""" # noqa: E501 | |
else: | |
source_image = "Image not found" | |
source_image_url = "" | |
return f""" | |
<tr> | |
<td>{input_image}</td> | |
<td>{source_image}</td> | |
<td>{image.prediction_label}<br>({image.prediction_score:.2f}%)</td> | |
<td style="{WORD_BREAK}";>{source_image_url}</td> | |
</tr>""" | |