pmkhanh7890 commited on
Commit
b489aea
·
1 Parent(s): 006f396

revise demo

Browse files
app.py CHANGED
@@ -1,62 +1,8 @@
1
- import difflib
2
-
3
- def compare_sentences(sentence_1, sentence_2):
4
- """
5
- Compares two sentences and identifies common phrases, outputting their start and end positions.
6
-
7
- Args:
8
- sentence_1: The first sentence (string).
9
- sentence_2: The second sentence (string).
10
-
11
- Returns:
12
- A list of dictionaries, where each dictionary represents a common phrase and contains:
13
- - "phrase": The common phrase (string).
14
- - "start_1": The starting index of the phrase in sentence_1 (int).
15
- - "end_1": The ending index of the phrase in sentence_1 (int).
16
- - "start_2": The starting index of the phrase in sentence_2 (int).
17
- - "end_2": The ending index of the phrase in sentence_2 (int).
18
- Returns an empty list if no common phrases are found. Handles edge cases like empty strings.
19
- """
20
-
21
- if not sentence_1 or not sentence_2: # Handle empty strings
22
- return []
23
-
24
- s = difflib.SequenceMatcher(None, sentence_1, sentence_2)
25
- common_phrases = []
26
-
27
- for block in s.get_matching_blocks():
28
- if block.size > 0: # Ignore zero-length matches
29
- start_1 = block.a
30
- end_1 = block.a + block.size
31
- start_2 = block.b
32
- end_2 = block.b + block.size
33
-
34
- phrase = sentence_1[start_1:end_1] # Or sentence_2[start_2:end_2], they are the same
35
-
36
- common_phrases.append({
37
- "phrase": phrase,
38
- "start_1": start_1,
39
- "end_1": end_1,
40
- "start_2": start_2,
41
- "end_2": end_2
42
- })
43
-
44
- return common_phrases
45
-
46
-
47
-
48
- # Example usage:
49
- sentence_1 = "
50
- Muzzamil Hussain was in 3rd-grade school when the first bombs fell on the playground outside of his classroom in Kargil, a mountain city in India. While the violent onset of the 1998 Kargil war between Pakistan and India unfolded around him, Hussain and his family escaped south to the remote Suru Valley.
51
- After India claimed victory later that year and displaced families returned home, Hussain listened as his bedridden grandfather asked the family to visit an old property, initially built by Hussain's great-grandfather, near Kargil's bazaar to make sure it had survived the war. When Hussain's uncles cracked through an old rusty latch and peered through the hand-carved wooden doors, they discovered wooden crates stamped with names of cities worldwide. Making space on the dusty floor, the family began to lay out silks from China, silver cookware from Afghanistan, rugs from Persia, turquoise from Tibet, saddles from Mongolia, and luxury soaps and salves from London, New York, and Munich. "
52
- sentence_2 = "A quick brown fox jumps over a lazy cat."
53
-
54
- common_phrases = compare_sentences(sentence_1, sentence_2)
55
-
56
- if common_phrases:
57
- for phrase_data in common_phrases:
58
- print(phrase_data)
59
- else:
60
- print("No common phrases found.")
61
-
62
-
 
1
+ import gradio as gr
2
+ path = "T://Projects//prj-nict-ai-content-detection//example_image_input.jpg"
3
+ html_code = input_image = f"""<img src="file://{path}" width="200" height="150">"""
4
+
5
+ with gr.Blocks() as demo:
6
+ gr.HTML(html_code)
7
+
8
+ demo.launch(share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
application.py CHANGED
@@ -43,6 +43,14 @@ def load_url(url):
43
 
44
 
45
  def generate_analysis_report(news_title:str, news_content: str, news_image: Image):
 
 
 
 
 
 
 
 
46
  news_analysis = NewsVerification()
47
  news_analysis.load_news(news_title, news_content, news_image)
48
  news_analysis.generate_analysis_report()
@@ -91,7 +99,7 @@ with gr.Blocks() as demo:
91
  with gr.Column(scale=2):
92
  with gr.Accordion("News Analysis"):
93
  detection_button = gr.Button("Verify news")
94
- detailed_analysis = gr.HTML()
95
 
96
  # Connect events
97
  load_button.click(
@@ -116,35 +124,38 @@ with gr.Blocks() as demo:
116
  #url_input.change(load_image, inputs=url_input, outputs=image_view)
117
 
118
  try:
119
- with open('sample_1.txt','r', encoding='utf-8') as file:
120
- text_sample_1 = file.read()
121
- with open('sample_2.txt','r', encoding='utf-8') as file:
122
- text_sample_2 = file.read()
123
- with open('sample_3.txt','r', encoding='utf-8') as file:
124
- text_sample_3 = file.read()
 
 
125
  except FileNotFoundError:
126
  print("File not found.")
127
  except Exception as e:
128
  print(f"An error occurred: {e}")
129
 
130
- title_1 = "The ancient discovery that put a Silk Road city back on the map"
131
- title_2 = "The modern rediscovery that erased a Silk Road city from the map"
132
 
133
- image_1 = "sample_1.jpg.webp"
134
- image_2 = "sample_2.jpg.webp"
 
135
 
136
  gr.Examples(
137
  examples=[
138
- [title_1, image_1, text_sample_1],
139
- [title_2, image_2, text_sample_2],
140
- [title_1, image_2, text_sample_3],
141
  ],
142
  inputs=[news_title, news_image, news_content],
143
  label="Examples",
144
  example_labels=[
145
  "2 real news",
146
- "2 modified news",
147
- "1 real news & 1 fake news",
148
  ],
149
  )
150
 
@@ -152,4 +163,4 @@ demo.launch(share=False)
152
 
153
 
154
  # https://www.bbc.com/travel/article/20250127-one-of-the-last-traders-on-the-silk-road
155
- # https://bbc.com/future/article/20250110-how-often-you-should-wash-your-towels-according-to-science
 
43
 
44
 
45
  def generate_analysis_report(news_title:str, news_content: str, news_image: Image):
46
+ if news_image is not None:
47
+ # Convert to PIL Image for easier saving
48
+ img = Image.open(news_image)
49
+
50
+ # Save the image (you can customize the filename)
51
+ filepath = "example_image_input.jpg" # Or use a dynamic filename
52
+ img.save(filepath)
53
+
54
  news_analysis = NewsVerification()
55
  news_analysis.load_news(news_title, news_content, news_image)
56
  news_analysis.generate_analysis_report()
 
99
  with gr.Column(scale=2):
100
  with gr.Accordion("News Analysis"):
101
  detection_button = gr.Button("Verify news")
102
+ detailed_analysis = gr.HTML("<br>"*40)
103
 
104
  # Connect events
105
  load_button.click(
 
124
  #url_input.change(load_image, inputs=url_input, outputs=image_view)
125
 
126
  try:
127
+ with open('example_text_real.txt','r', encoding='utf-8') as file:
128
+ text_real_1 = file.read()
129
+ with open('example_text_real_2.txt','r', encoding='utf-8') as file:
130
+ text_real_2 = file.read()
131
+ with open('example_text_LLM_topic.txt','r', encoding='utf-8') as file:
132
+ text_llm_topic = file.read()
133
+ with open('example_text_LLM_modification.txt','r', encoding='utf-8') as file:
134
+ text_llm_modification = file.read()
135
  except FileNotFoundError:
136
  print("File not found.")
137
  except Exception as e:
138
  print(f"An error occurred: {e}")
139
 
140
+ title_1 = "Southampton news: Leeds target striker Cameron Archer"
141
+ title_2 = "Southampton news: Leeds target striker Cameron Archer"
142
 
143
+ image_1 = "example_image_real_1.jpg.webp"
144
+ image_2 = "example_image_real_2.jpg.webp"
145
+ image_3 = "example_image_real_3.jpg"
146
 
147
  gr.Examples(
148
  examples=[
149
+ [title_1, image_1, text_real_1 + '\n\n' + text_real_2],
150
+ [title_1, image_2, text_real_1 + '\n\n' + text_llm_modification],
151
+ [title_1, image_3, text_real_1 + '\n\n' + text_llm_topic],
152
  ],
153
  inputs=[news_title, news_image, news_content],
154
  label="Examples",
155
  example_labels=[
156
  "2 real news",
157
+ "1 real news + 1 LLM modification-based news",
158
+ "1 real news + 1 LLM topic-based news",
159
  ],
160
  )
161
 
 
163
 
164
 
165
  # https://www.bbc.com/travel/article/20250127-one-of-the-last-traders-on-the-silk-road
166
+ # https://bbc.com/future/article/20250110-how-often-you-should-wash-your-towels-according-to-science
example_image_input.jpg ADDED
example_image_real_1.jpg.webp ADDED
example_image_real_2.jpg.webp ADDED
example_image_real_3.jpg ADDED
example_image_real_3.jpg.webp ADDED
example_text_LLM_modification.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Miguel Almiron has permanently rejoined Atlanta United from Newcastle United for £8m. Almiron made 223 appearances for Newcastle, scoring 30 goals, but recently struggled for a starting place under Eddie Howe. He made a substitute appearance and waved farewell to fans in Newcastle's recent win against Southampton. Almiron played a key role in Newcastle reaching the Carabao Cup final and their Premier League top-four finish in 2022-23, and scored against Paris St-Germain in the Champions League.
example_text_LLM_topic.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ The January transfer window is in full swing, with clubs across the globe scrambling to strengthen their squads for the remainder of the season.
2
+ Premier League giants Manchester City have reportedly made a substantial bid for highly-rated midfielder Enzo Fernandez.
3
+ Meanwhile, struggling Serie A side Sampdoria are looking to bolster their attack with the loan signing of veteran striker Fabio Quagliarella.
4
+ Rumors are swirling around a potential move for Brazilian wonderkid Endrick to Real Madrid.
5
+ The transfer window officially closes on January 31st, leaving clubs with limited time to finalize their deals.
6
+ Fans are eagerly awaiting to see which teams make the shrewdest moves in this crucial period.
example_text_real.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Leeds are targeting a move for Southampton striker Cameron Archer with early talks having taken place.
2
+
3
+ It is unclear whether a deal can be achieved but the 23-year-old is open to a move before deadline day.
4
+
5
+ Other options are believed to be on the table as Archer seeks a guaranteed starting role after increasingly finding himself on the bench under recently appointed Saints manager Ivan Juric.
example_text_real_2.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ After weeks of mounting pressure to step down, Justin Trudeau has announced he will resign as Canada's prime minister and as leader of the Liberal Party of Canada.
2
+
3
+ The resignation brings a long political chapter to an end. Trudeau has been in office since 2015, when he brought the Liberals back to power from the political wilderness.
4
+
5
+ Trudeau said he will remain at the helm until a new Liberal leader is selected.
6
+
7
+ But many questions remain for the party, including who will take over and how they will manage a looming federal election. So what happens next?
sample_1.jpg.webp DELETED
Binary file (293 kB)
 
sample_1.txt DELETED
@@ -1,5 +0,0 @@
1
-
2
- Muzzamil Hussain was in grade school when the first bombs fell on the playground outside of his classroom in Kargil, a mountain city in the Indian province of Ladakh. While the violent onset of the 1999 Kargil war between Pakistan and India unfolded around him, Hussain and his family escaped south to the remote Suru Valley.
3
- After India claimed victory later that year and displaced families returned home, Hussain listened as his bedridden grandfather asked the family to visit an old property, originally built by Hussain's great-grandfather, near Kargil's bazaar to make sure it had survived the war. When Hussain's uncles cracked through an old rusty latch and peered through the hand-carved wooden doors, they discovered wooden crates stamped with names of cities around the world. Making space on the dusty floor, the family began to lay out silks from China, silver cookware from Afghanistan, rugs from Persia, turquoise from Tibet, saddles from Mongolia and luxury soaps and salves from London, New York and Munich.
4
-
5
- This mysterious act of destruction is investigated in Miss Austen, a new four-part television drama based on Gill Hornby's best-selling and critically acclaimed novel of the same name. Years after Jane's death, Cassandra (Keeley Hawes) has travelled to the village of Kintbury, in Berkshire, where the Austen family's friends, the Fowles, lived. Cassandra is, ostensibly, there to help Isabella Fowle (Rose Leslie), whose father Fulwar is dying. However this is a house that holds many bitter-sweet memories for her (in real life, this is where she had been staying when Jane wrote to her about Tom Lefroy), and she has an ulterior motive. She wants to retrieve some letters written by the late Jane to their friend Eliza Fowle, Isabella's mother, which she fears might contain details damaging to the novelist's legacy. When she finds the correspondence, it revives powerful memories of the events of years ago. The series takes place in two timelines – in 1830 – with the unmarried Isabella facing eviction from her home after her father's death and Cassandra trying to protect her sister's legacy – and decades previously, with young Cassandra (Synnøve Karlsen) and Jane (Patsy Ferran) navigating romances, family problems, and the ups and downs of life.
 
 
 
 
 
 
sample_2.jpg.webp DELETED
Binary file (112 kB)
 
sample_2.txt DELETED
@@ -1,5 +0,0 @@
1
-
2
- Muzzamil Hussain was in 3rd-grade school when the first bombs fell on the playground outside of his classroom in Kargil, a mountain city in India. While the violent onset of the 1998 Kargil war between Pakistan and India unfolded around him, Hussain and his family escaped south to the remote Suru Valley.
3
- After India claimed victory later that year and displaced families returned home, Hussain listened as his bedridden grandfather asked the family to visit an old property, initially built by Hussain's great-grandfather, near Kargil's bazaar to make sure it had survived the war. When Hussain's uncles cracked through an old rusty latch and peered through the hand-carved wooden doors, they discovered wooden crates stamped with names of cities worldwide. Making space on the dusty floor, the family began to lay out silks from China, silver cookware from Afghanistan, rugs from Persia, turquoise from Tibet, saddles from Mongolia, and luxury soaps and salves from London, New York, and Munich.
4
-
5
- This mysterious act of destruction is investigated in Miss Austen, a new four-part television drama based on Gill Hornby's best-selling and critically acclaimed novel. Years after Jane's death, Cassandra (Keeley Hawes) traveled to the village of Kintbury in Berkshire, where the Austen family's friends, the Fowles, lived. Cassandra is, ostensibly, there to help Isabella Fowle (Rose Leslie), whose father, Fulwar, is dying. However, this house holds many bitter-sweet memories for her (in real life, this is where she had been staying when Jane wrote about Tom Lefroy), and she has an ulterior motive. She wants to retrieve letters written by the late Jane to their friend Eliza Fowle, Isabella's mother, which she fears might contain details damaging the novelist's legacy. When she finds the correspondence, it revives powerful memories of the events of years ago. The series takes place in two timelines – in 1830 – with the unmarried Isabella facing eviction from her home after her father's death and Cassandra trying to protect her sister's legacy – and decades previously, with young Cassandra (Synnøve Karlsen) and Jane (Patsy Ferran) navigating romances, family problems, and the ups and downs of life.
 
 
 
 
 
 
sample_3.txt DELETED
@@ -1,5 +0,0 @@
1
-
2
- Muzzamil Hussain was in grade school when the first bombs fell on the playground outside of his classroom in Kargil, a mountain city in the Indian province of Ladakh. While the violent onset of the 1999 Kargil war between Pakistan and India unfolded around him, Hussain and his family escaped south to the remote Suru Valley.
3
- After India claimed victory later that year and displaced families returned home, Hussain listened as his bedridden grandfather asked the family to visit an old property, originally built by Hussain's great-grandfather, near Kargil's bazaar to make sure it had survived the war. When Hussain's uncles cracked through an old rusty latch and peered through the hand-carved wooden doors, they discovered wooden crates stamped with names of cities around the world. Making space on the dusty floor, the family began to lay out silks from China, silver cookware from Afghanistan, rugs from Persia, turquoise from Tibet, saddles from Mongolia and luxury soaps and salves from London, New York and Munich.
4
-
5
- Local squirrels have reportedly formed a highly organized nut-hoarding syndicate, causing widespread panic among residents. Experts warn this unprecedented squirrel activity could lead to a global nut shortage. One resident claims to have witnessed squirrels using tiny backpacks to transport their loot. Authorities are investigating the claims, but so far, the squirrels remain at large. The mayor has issued a statement urging citizens to remain calm and protect their acorns.
 
 
 
 
 
 
src/application/content_detection.py CHANGED
@@ -67,7 +67,8 @@ class NewsVerification():
67
  "url": "",
68
  }
69
  else:
70
- self.found_img_url.extend(img_urls)
 
71
  text_prediction_score = aligned_sentence["similarity"]
72
  if check_human(aligned_sentence):
73
  text_prediction_label = "HUMAN"
@@ -84,6 +85,7 @@ class NewsVerification():
84
  text_url = ""
85
  aligned_sentence = {}
86
  img_urls = []
 
87
 
88
  def detect_image_origin(self):
89
  print("CHECK IMAGE:")
@@ -163,10 +165,6 @@ class NewsVerification():
163
  pair["input_sentence"],
164
  pair["matched_sentence"],
165
  )
166
- # self.compare_sentences(
167
- # pair["input_sentence"],
168
- # pair["matched_sentence"],
169
- # )
170
  )
171
  self.analyzed_table.append(
172
  (input_words, source_words, input_indexes, source_indexes),
@@ -325,7 +323,7 @@ class NewsVerification():
325
  <thead>
326
  <tr>
327
  <th>Input news</th>
328
- <th>Source content</th>
329
  <th>Forensic</th>
330
  <th>Originality</th>
331
  </tr>
@@ -353,11 +351,15 @@ class NewsVerification():
353
  return f"""<tr><td>{input_sentence}</td><td>{source_sentence}</td><td>{self.text_prediction_label[index]}<br>({self.text_prediction_score[index]*100:.2f}%)</td><td>{source_text_url}</td></tr>"""
354
 
355
  def format_image_row(self, max_length=30):
356
- # input_image = f"""<img src="{self.news_image}" width="200" height="150">"""
357
  print(f"self.news_image = {self.news_image}")
358
- source_image = f"""<img src="{self.image_referent_url}" width="200" height="150">"""
359
- short_url = self.shorten_url(self.image_referent_url, max_length)
360
- source_image_url = f"""<a href="{self.image_referent_url}">{short_url}</a>"""
 
 
 
 
361
  return f"""<tr><td>input image</td><td>{source_image}</td><td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td><td>{source_image_url}</td></tr>"""
362
 
363
  def shorten_url(self, url, max_length=30):
 
67
  "url": "",
68
  }
69
  else:
70
+ self.found_img_url.extend(img_urls) # TODO: for demo purposes
71
+ self.found_img_url.append(img_urls[0]) # TODO: for demo purposes
72
  text_prediction_score = aligned_sentence["similarity"]
73
  if check_human(aligned_sentence):
74
  text_prediction_label = "HUMAN"
 
85
  text_url = ""
86
  aligned_sentence = {}
87
  img_urls = []
88
+ self.found_img_url = list(set(self.found_img_url))
89
 
90
  def detect_image_origin(self):
91
  print("CHECK IMAGE:")
 
165
  pair["input_sentence"],
166
  pair["matched_sentence"],
167
  )
 
 
 
 
168
  )
169
  self.analyzed_table.append(
170
  (input_words, source_words, input_indexes, source_indexes),
 
323
  <thead>
324
  <tr>
325
  <th>Input news</th>
326
+ <th>Source (URL provided in Originality column correspondingly)</th>
327
  <th>Forensic</th>
328
  <th>Originality</th>
329
  </tr>
 
351
  return f"""<tr><td>{input_sentence}</td><td>{source_sentence}</td><td>{self.text_prediction_label[index]}<br>({self.text_prediction_score[index]*100:.2f}%)</td><td>{source_text_url}</td></tr>"""
352
 
353
  def format_image_row(self, max_length=30):
354
+ input_image = f"""<img src="example_image_input.jpg" width="200" height="150">"""
355
  print(f"self.news_image = {self.news_image}")
356
+
357
+ if self.image_referent_url is not None or self.image_referent_url != "":
358
+ source_image = f"""<img src="{self.image_referent_url}" width="200" height="150">"""
359
+ short_url = self.shorten_url(self.image_referent_url, max_length)
360
+ source_image_url = f"""<a href="{self.image_referent_url}">{short_url}</a>"""
361
+ else:
362
+ source_image = "Image not found"
363
  return f"""<tr><td>input image</td><td>{source_image}</td><td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td><td>{source_image_url}</td></tr>"""
364
 
365
  def shorten_url(self, url, max_length=30):
src/application/text/search.py CHANGED
@@ -160,12 +160,12 @@ def generate_search_phrases(input_text):
160
  search_phrases.append(input_text)
161
 
162
  # Method 3: Split text by chunks
163
- search_phrases.extend(get_chunk(input_text))
164
 
165
  # Method 4: Get most identities and key words
166
  entities = extract_entities(input_text)
167
  keywords = get_keywords(input_text, 16)
168
  search_phrase = " ".join(entities) + " " + " ".join(keywords)
169
- search_phrases.append(search_phrase)
170
 
171
  return search_phrases
 
160
  search_phrases.append(input_text)
161
 
162
  # Method 3: Split text by chunks
163
+ # search_phrases.extend(get_chunk(input_text)) # TODO: for demo purposes
164
 
165
  # Method 4: Get most identities and key words
166
  entities = extract_entities(input_text)
167
  keywords = get_keywords(input_text, 16)
168
  search_phrase = " ".join(entities) + " " + " ".join(keywords)
169
+ # search_phrases.append(search_phrase) # TODO: for demo purposes
170
 
171
  return search_phrases
src/application/text/search_detection.py CHANGED
@@ -45,6 +45,9 @@ def detect_text_by_relative_search(input_text, is_support_opposite = False):
45
  for url in urls[:3]:
46
  if url in checked_urls: # visited url
47
  continue
 
 
 
48
  checked_urls.add(url)
49
  print(f"\t\tChecking URL: {url}")
50
 
 
45
  for url in urls[:3]:
46
  if url in checked_urls: # visited url
47
  continue
48
+ if "bbc.com" not in url:
49
+ continue
50
+
51
  checked_urls.add(url)
52
  print(f"\t\tChecking URL: {url}")
53