pmkhanh7890 commited on
Commit
530452f
·
1 Parent(s): bfe6692
Files changed (2) hide show
  1. src/application/content_detection.py +25 -39
  2. test.py +14 -2
src/application/content_detection.py CHANGED
@@ -507,15 +507,14 @@ class NewsVerification:
507
  score = self.text_prediction_score[0]
508
 
509
  # Format displayed url
510
-
511
- short_url = self.shorten_url(url, max_length)
512
- source_text_url = f"""<a href="{url}">{short_url}</a>"""
513
 
514
  # Format displayed entity count
515
  entity_count_text = self.get_entity_count_text(entity_count)
516
 
517
  border_top = "border-top: 1px solid transparent;"
518
  border_bottom = "border-bottom: 1px solid transparent;"
 
519
  if first_url_row is True:
520
  # First & Last the group: no transparent
521
  if last_url_row is True:
@@ -526,7 +525,7 @@ class NewsVerification:
526
  <td rowspan="{span_row}">{label}<br>
527
  ({score * 100:.2f}%)<br><br>
528
  {entity_count_text}</td>
529
- <td rowspan="{span_row}">{source_text_url}</td>
530
  </tr>
531
  """
532
  # First row of the group: transparent bottom border
@@ -537,7 +536,7 @@ class NewsVerification:
537
  <td rowspan="{span_row}">{label}<br>
538
  ({score * 100:.2f}%)<br><br>
539
  {entity_count_text}</td>
540
- <td rowspan="{span_row}">{source_text_url}</td>
541
  </tr>
542
  """
543
  else:
@@ -565,15 +564,18 @@ class NewsVerification:
565
  or self.image_referent_url != ""
566
  ):
567
  source_image = f"""<img src="{self.image_referent_url}" width="100" height="150">""" # noqa: E501
568
- short_url = self.shorten_url(self.image_referent_url, max_length)
569
- source_image_url = (
570
- f"""<a href="{self.image_referent_url}">{short_url}</a>"""
571
- )
572
  else:
573
  source_image = "Image not found"
574
  source_image_url = ""
575
 
576
- return f"""<tr><td>input image</td><td>{source_image}</td><td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td><td>{source_image_url}</td></tr>""" # noqa: E501
 
 
 
 
 
 
577
 
578
  def create_ordinary_user_table(self):
579
  rows = []
@@ -585,7 +587,7 @@ class NewsVerification:
585
  return f"""
586
  <h5>Comparison between input news and source news:</h5>
587
  <table border="1" style="width:100%; text-align:left;">
588
- <col style="width: 170px;">
589
  <col style="width: 30px;">
590
  <col style="width: 75px;">
591
  <thead>
@@ -614,15 +616,15 @@ class NewsVerification:
614
  url = row["url"]
615
  if url not in urls:
616
  urls.append(url)
617
- short_url = self.shorten_url(url, max_length)
618
- source_text_urls += f"""<a href="{url}">{short_url}</a><br>"""
619
 
 
620
  return f"""
621
  <tr>
622
  <td>{input_sentences}</td>
623
  <td>{self.text_prediction_label[0]}<br>
624
  ({self.text_prediction_score[0] * 100:.2f}%)</td>
625
- <td>{source_text_urls}</td>
626
  </tr>
627
  """
628
 
@@ -632,15 +634,12 @@ class NewsVerification:
632
  self.image_referent_url is not None
633
  or self.image_referent_url != ""
634
  ):
635
- short_url = self.shorten_url(self.image_referent_url, max_length)
636
- source_image_url = (
637
- f"""<a href="{self.image_referent_url}">{short_url}</a>"""
638
- )
639
  else:
640
- # source_image = "Image not found"
641
  source_image_url = ""
642
 
643
- return f"""<tr><td>input image</td><td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td><td>{source_image_url}</td></tr>""" # noqa: E501
 
644
 
645
  def create_governor_table(self):
646
  rows = []
@@ -764,14 +763,13 @@ class NewsVerification:
764
  url = row[0]["url"]
765
  if url not in urls:
766
  urls.append(url)
767
- short_url = self.shorten_url(url, max_length)
768
- source_text_urls += f"""<a href="{url}">{short_url}</a><br>"""
769
  sentence_count += 1
770
  if row[3] is not None:
771
  entity_count.append(len(row[3]))
772
 
773
  entity_count_text = self.get_entity_count_text(sum(entity_count))
774
-
775
  return f"""
776
  <tr>
777
  <td>{input_sentences}</td>
@@ -779,7 +777,7 @@ class NewsVerification:
779
  <td>{self.text_prediction_label[0]}<br>
780
  ({self.text_prediction_score[0] * 100:.2f}%)<br><br>
781
  {entity_count_text}</td>
782
- <td>{source_text_urls}</td>
783
  </tr>
784
  """
785
 
@@ -789,15 +787,13 @@ class NewsVerification:
789
  or self.image_referent_url != ""
790
  ):
791
  source_image = f"""<img src="{self.image_referent_url}" width="100" height="150">""" # noqa: E501
792
- short_url = self.shorten_url(self.image_referent_url, max_length)
793
- source_image_url = (
794
- f"""<a href="{self.image_referent_url}">{short_url}</a>"""
795
- )
796
  else:
797
  source_image = "Image not found"
798
  source_image_url = ""
799
 
800
- return f"""<tr><td>input image</td><td>{source_image}</td><td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td><td>{source_image_url}</td></tr>""" # noqa: E501
 
801
 
802
  def get_entity_count_text(self, entity_count):
803
  if entity_count <= 0:
@@ -808,16 +804,6 @@ class NewsVerification:
808
  entity_count_text = "with altered entities"
809
  return entity_count_text
810
 
811
- def shorten_url(self, url, max_length=30):
812
- if url is None:
813
- return ""
814
-
815
- if len(url) > max_length:
816
- short_url = url[:max_length] + "..."
817
- else:
818
- short_url = url
819
- return short_url
820
-
821
  def color_text(self, text, colored_idx, highlighted_idx):
822
  paragraph = ""
823
  words = text.split()
 
507
  score = self.text_prediction_score[0]
508
 
509
  # Format displayed url
510
+ source_text_url = f"""<a href="{url}">{url}</a>"""
 
 
511
 
512
  # Format displayed entity count
513
  entity_count_text = self.get_entity_count_text(entity_count)
514
 
515
  border_top = "border-top: 1px solid transparent;"
516
  border_bottom = "border-bottom: 1px solid transparent;"
517
+ word_break = "word-break: break-all;"
518
  if first_url_row is True:
519
  # First & Last the group: no transparent
520
  if last_url_row is True:
 
525
  <td rowspan="{span_row}">{label}<br>
526
  ({score * 100:.2f}%)<br><br>
527
  {entity_count_text}</td>
528
+ <td rowspan="{span_row}"; style="{word_break}";>{source_text_url}</td>
529
  </tr>
530
  """
531
  # First row of the group: transparent bottom border
 
536
  <td rowspan="{span_row}">{label}<br>
537
  ({score * 100:.2f}%)<br><br>
538
  {entity_count_text}</td>
539
+ <td rowspan="{span_row}"; style="{word_break}";>{source_text_url}</td>
540
  </tr>
541
  """
542
  else:
 
564
  or self.image_referent_url != ""
565
  ):
566
  source_image = f"""<img src="{self.image_referent_url}" width="100" height="150">""" # noqa: E501
567
+ source_image_url = f"""<a href="{self.image_referent_url}">{self.image_referent_url}</a>""" # noqa: E501
 
 
 
568
  else:
569
  source_image = "Image not found"
570
  source_image_url = ""
571
 
572
+ word_break = "word-break: break-all;"
573
+ return f"""
574
+ <tr>
575
+ <td>input image</td>
576
+ <td>{source_image}</td>
577
+ <td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td>
578
+ <td style="{word_break}";>{source_image_url}</td></tr>"""
579
 
580
  def create_ordinary_user_table(self):
581
  rows = []
 
587
  return f"""
588
  <h5>Comparison between input news and source news:</h5>
589
  <table border="1" style="width:100%; text-align:left;">
590
+ <col style="width: 340px;">
591
  <col style="width: 30px;">
592
  <col style="width: 75px;">
593
  <thead>
 
616
  url = row["url"]
617
  if url not in urls:
618
  urls.append(url)
619
+ source_text_urls += f"""<a href="{url}">{url}</a><br>"""
 
620
 
621
+ word_break = "word-break: break-all;"
622
  return f"""
623
  <tr>
624
  <td>{input_sentences}</td>
625
  <td>{self.text_prediction_label[0]}<br>
626
  ({self.text_prediction_score[0] * 100:.2f}%)</td>
627
+ <td style="{word_break}";>{source_text_urls}</td>
628
  </tr>
629
  """
630
 
 
634
  self.image_referent_url is not None
635
  or self.image_referent_url != ""
636
  ):
637
+ source_image_url = f"""<a href="{self.image_referent_url}">{self.image_referent_url}</a>""" # noqa: E501
 
 
 
638
  else:
 
639
  source_image_url = ""
640
 
641
+ word_break = "word-break: break-all;"
642
+ return f"""<tr><td>input image</td><td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td><td style="{word_break}";>{source_image_url}</td></tr>""" # noqa: E501
643
 
644
  def create_governor_table(self):
645
  rows = []
 
763
  url = row[0]["url"]
764
  if url not in urls:
765
  urls.append(url)
766
+ source_text_urls += f"""<a href="{url}">{url}</a><br><br>"""
 
767
  sentence_count += 1
768
  if row[3] is not None:
769
  entity_count.append(len(row[3]))
770
 
771
  entity_count_text = self.get_entity_count_text(sum(entity_count))
772
+ word_break = "word-break: break-all;"
773
  return f"""
774
  <tr>
775
  <td>{input_sentences}</td>
 
777
  <td>{self.text_prediction_label[0]}<br>
778
  ({self.text_prediction_score[0] * 100:.2f}%)<br><br>
779
  {entity_count_text}</td>
780
+ <td style="{word_break}";>{source_text_urls}</td>
781
  </tr>
782
  """
783
 
 
787
  or self.image_referent_url != ""
788
  ):
789
  source_image = f"""<img src="{self.image_referent_url}" width="100" height="150">""" # noqa: E501
790
+ source_image_url = f"""<a href="{self.image_referent_url}">{self.image_referent_url}</a>""" # noqa: E501
 
 
 
791
  else:
792
  source_image = "Image not found"
793
  source_image_url = ""
794
 
795
+ word_break = "word-break: break-all;"
796
+ return f"""<tr><td>input image</td><td>{source_image}</td><td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td><td style="{word_break}";>{source_image_url}</td></tr>""" # noqa: E501
797
 
798
  def get_entity_count_text(self, entity_count):
799
  if entity_count <= 0:
 
804
  entity_count_text = "with altered entities"
805
  return entity_count_text
806
 
 
 
 
 
 
 
 
 
 
 
807
  def color_text(self, text, colored_idx, highlighted_idx):
808
  paragraph = ""
809
  words = text.split()
test.py CHANGED
@@ -1,2 +1,14 @@
1
- my_list = [0, 0]
2
- print(my_list[-2])
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ # Assuming your CSV file is named 'data.csv'
4
+ try:
5
+ df = pd.read_csv('data/bbc_news_4o_mini.csv')
6
+ # df = pd.read_csv('data/MAGE_4o_mini.csv')
7
+
8
+ print(df.columns) # header names
9
+ print(len(df))
10
+
11
+ except FileNotFoundError:
12
+ print("Error: data.csv not found")
13
+ except Exception as e:
14
+ print(f"An error occurred: {e}")