Spaces:
Sleeping
Sleeping
Commit
·
b489aea
1
Parent(s):
006f396
revise demo
Browse files- app.py +8 -62
- application.py +28 -17
- example_image_input.jpg +0 -0
- example_image_real_1.jpg.webp +0 -0
- example_image_real_2.jpg.webp +0 -0
- example_image_real_3.jpg +0 -0
- example_image_real_3.jpg.webp +0 -0
- example_text_LLM_modification.txt +1 -0
- example_text_LLM_topic.txt +6 -0
- example_text_real.txt +5 -0
- example_text_real_2.txt +7 -0
- sample_1.jpg.webp +0 -0
- sample_1.txt +0 -5
- sample_2.jpg.webp +0 -0
- sample_2.txt +0 -5
- sample_3.txt +0 -5
- src/application/content_detection.py +12 -10
- src/application/text/search.py +2 -2
- src/application/text/search_detection.py +3 -0
app.py
CHANGED
@@ -1,62 +1,8 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
sentence_2: The second sentence (string).
|
10 |
-
|
11 |
-
Returns:
|
12 |
-
A list of dictionaries, where each dictionary represents a common phrase and contains:
|
13 |
-
- "phrase": The common phrase (string).
|
14 |
-
- "start_1": The starting index of the phrase in sentence_1 (int).
|
15 |
-
- "end_1": The ending index of the phrase in sentence_1 (int).
|
16 |
-
- "start_2": The starting index of the phrase in sentence_2 (int).
|
17 |
-
- "end_2": The ending index of the phrase in sentence_2 (int).
|
18 |
-
Returns an empty list if no common phrases are found. Handles edge cases like empty strings.
|
19 |
-
"""
|
20 |
-
|
21 |
-
if not sentence_1 or not sentence_2: # Handle empty strings
|
22 |
-
return []
|
23 |
-
|
24 |
-
s = difflib.SequenceMatcher(None, sentence_1, sentence_2)
|
25 |
-
common_phrases = []
|
26 |
-
|
27 |
-
for block in s.get_matching_blocks():
|
28 |
-
if block.size > 0: # Ignore zero-length matches
|
29 |
-
start_1 = block.a
|
30 |
-
end_1 = block.a + block.size
|
31 |
-
start_2 = block.b
|
32 |
-
end_2 = block.b + block.size
|
33 |
-
|
34 |
-
phrase = sentence_1[start_1:end_1] # Or sentence_2[start_2:end_2], they are the same
|
35 |
-
|
36 |
-
common_phrases.append({
|
37 |
-
"phrase": phrase,
|
38 |
-
"start_1": start_1,
|
39 |
-
"end_1": end_1,
|
40 |
-
"start_2": start_2,
|
41 |
-
"end_2": end_2
|
42 |
-
})
|
43 |
-
|
44 |
-
return common_phrases
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
# Example usage:
|
49 |
-
sentence_1 = "
|
50 |
-
Muzzamil Hussain was in 3rd-grade school when the first bombs fell on the playground outside of his classroom in Kargil, a mountain city in India. While the violent onset of the 1998 Kargil war between Pakistan and India unfolded around him, Hussain and his family escaped south to the remote Suru Valley.
|
51 |
-
After India claimed victory later that year and displaced families returned home, Hussain listened as his bedridden grandfather asked the family to visit an old property, initially built by Hussain's great-grandfather, near Kargil's bazaar to make sure it had survived the war. When Hussain's uncles cracked through an old rusty latch and peered through the hand-carved wooden doors, they discovered wooden crates stamped with names of cities worldwide. Making space on the dusty floor, the family began to lay out silks from China, silver cookware from Afghanistan, rugs from Persia, turquoise from Tibet, saddles from Mongolia, and luxury soaps and salves from London, New York, and Munich. "
|
52 |
-
sentence_2 = "A quick brown fox jumps over a lazy cat."
|
53 |
-
|
54 |
-
common_phrases = compare_sentences(sentence_1, sentence_2)
|
55 |
-
|
56 |
-
if common_phrases:
|
57 |
-
for phrase_data in common_phrases:
|
58 |
-
print(phrase_data)
|
59 |
-
else:
|
60 |
-
print("No common phrases found.")
|
61 |
-
|
62 |
-
|
|
|
1 |
+
import gradio as gr
|
2 |
+
path = "T://Projects//prj-nict-ai-content-detection//example_image_input.jpg"
|
3 |
+
html_code = input_image = f"""<img src="file://{path}" width="200" height="150">"""
|
4 |
+
|
5 |
+
with gr.Blocks() as demo:
|
6 |
+
gr.HTML(html_code)
|
7 |
+
|
8 |
+
demo.launch(share=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
application.py
CHANGED
@@ -43,6 +43,14 @@ def load_url(url):
|
|
43 |
|
44 |
|
45 |
def generate_analysis_report(news_title:str, news_content: str, news_image: Image):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
news_analysis = NewsVerification()
|
47 |
news_analysis.load_news(news_title, news_content, news_image)
|
48 |
news_analysis.generate_analysis_report()
|
@@ -91,7 +99,7 @@ with gr.Blocks() as demo:
|
|
91 |
with gr.Column(scale=2):
|
92 |
with gr.Accordion("News Analysis"):
|
93 |
detection_button = gr.Button("Verify news")
|
94 |
-
detailed_analysis = gr.HTML()
|
95 |
|
96 |
# Connect events
|
97 |
load_button.click(
|
@@ -116,35 +124,38 @@ with gr.Blocks() as demo:
|
|
116 |
#url_input.change(load_image, inputs=url_input, outputs=image_view)
|
117 |
|
118 |
try:
|
119 |
-
with open('
|
120 |
-
|
121 |
-
with open('
|
122 |
-
|
123 |
-
with open('
|
124 |
-
|
|
|
|
|
125 |
except FileNotFoundError:
|
126 |
print("File not found.")
|
127 |
except Exception as e:
|
128 |
print(f"An error occurred: {e}")
|
129 |
|
130 |
-
title_1 = "
|
131 |
-
title_2 = "
|
132 |
|
133 |
-
image_1 = "
|
134 |
-
image_2 = "
|
|
|
135 |
|
136 |
gr.Examples(
|
137 |
examples=[
|
138 |
-
[title_1, image_1,
|
139 |
-
[
|
140 |
-
[title_1,
|
141 |
],
|
142 |
inputs=[news_title, news_image, news_content],
|
143 |
label="Examples",
|
144 |
example_labels=[
|
145 |
"2 real news",
|
146 |
-
"
|
147 |
-
"1 real news
|
148 |
],
|
149 |
)
|
150 |
|
@@ -152,4 +163,4 @@ demo.launch(share=False)
|
|
152 |
|
153 |
|
154 |
# https://www.bbc.com/travel/article/20250127-one-of-the-last-traders-on-the-silk-road
|
155 |
-
# https://bbc.com/future/article/20250110-how-often-you-should-wash-your-towels-according-to-science
|
|
|
43 |
|
44 |
|
45 |
def generate_analysis_report(news_title:str, news_content: str, news_image: Image):
|
46 |
+
if news_image is not None:
|
47 |
+
# Convert to PIL Image for easier saving
|
48 |
+
img = Image.open(news_image)
|
49 |
+
|
50 |
+
# Save the image (you can customize the filename)
|
51 |
+
filepath = "example_image_input.jpg" # Or use a dynamic filename
|
52 |
+
img.save(filepath)
|
53 |
+
|
54 |
news_analysis = NewsVerification()
|
55 |
news_analysis.load_news(news_title, news_content, news_image)
|
56 |
news_analysis.generate_analysis_report()
|
|
|
99 |
with gr.Column(scale=2):
|
100 |
with gr.Accordion("News Analysis"):
|
101 |
detection_button = gr.Button("Verify news")
|
102 |
+
detailed_analysis = gr.HTML("<br>"*40)
|
103 |
|
104 |
# Connect events
|
105 |
load_button.click(
|
|
|
124 |
#url_input.change(load_image, inputs=url_input, outputs=image_view)
|
125 |
|
126 |
try:
|
127 |
+
with open('example_text_real.txt','r', encoding='utf-8') as file:
|
128 |
+
text_real_1 = file.read()
|
129 |
+
with open('example_text_real_2.txt','r', encoding='utf-8') as file:
|
130 |
+
text_real_2 = file.read()
|
131 |
+
with open('example_text_LLM_topic.txt','r', encoding='utf-8') as file:
|
132 |
+
text_llm_topic = file.read()
|
133 |
+
with open('example_text_LLM_modification.txt','r', encoding='utf-8') as file:
|
134 |
+
text_llm_modification = file.read()
|
135 |
except FileNotFoundError:
|
136 |
print("File not found.")
|
137 |
except Exception as e:
|
138 |
print(f"An error occurred: {e}")
|
139 |
|
140 |
+
title_1 = "Southampton news: Leeds target striker Cameron Archer"
|
141 |
+
title_2 = "Southampton news: Leeds target striker Cameron Archer"
|
142 |
|
143 |
+
image_1 = "example_image_real_1.jpg.webp"
|
144 |
+
image_2 = "example_image_real_2.jpg.webp"
|
145 |
+
image_3 = "example_image_real_3.jpg"
|
146 |
|
147 |
gr.Examples(
|
148 |
examples=[
|
149 |
+
[title_1, image_1, text_real_1 + '\n\n' + text_real_2],
|
150 |
+
[title_1, image_2, text_real_1 + '\n\n' + text_llm_modification],
|
151 |
+
[title_1, image_3, text_real_1 + '\n\n' + text_llm_topic],
|
152 |
],
|
153 |
inputs=[news_title, news_image, news_content],
|
154 |
label="Examples",
|
155 |
example_labels=[
|
156 |
"2 real news",
|
157 |
+
"1 real news + 1 LLM modification-based news",
|
158 |
+
"1 real news + 1 LLM topic-based news",
|
159 |
],
|
160 |
)
|
161 |
|
|
|
163 |
|
164 |
|
165 |
# https://www.bbc.com/travel/article/20250127-one-of-the-last-traders-on-the-silk-road
|
166 |
+
# https://bbc.com/future/article/20250110-how-often-you-should-wash-your-towels-according-to-science
|
example_image_input.jpg
ADDED
![]() |
example_image_real_1.jpg.webp
ADDED
![]() |
example_image_real_2.jpg.webp
ADDED
![]() |
example_image_real_3.jpg
ADDED
![]() |
example_image_real_3.jpg.webp
ADDED
![]() |
example_text_LLM_modification.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Miguel Almiron has permanently rejoined Atlanta United from Newcastle United for £8m. Almiron made 223 appearances for Newcastle, scoring 30 goals, but recently struggled for a starting place under Eddie Howe. He made a substitute appearance and waved farewell to fans in Newcastle's recent win against Southampton. Almiron played a key role in Newcastle reaching the Carabao Cup final and their Premier League top-four finish in 2022-23, and scored against Paris St-Germain in the Champions League.
|
example_text_LLM_topic.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
The January transfer window is in full swing, with clubs across the globe scrambling to strengthen their squads for the remainder of the season.
|
2 |
+
Premier League giants Manchester City have reportedly made a substantial bid for highly-rated midfielder Enzo Fernandez.
|
3 |
+
Meanwhile, struggling Serie A side Sampdoria are looking to bolster their attack with the loan signing of veteran striker Fabio Quagliarella.
|
4 |
+
Rumors are swirling around a potential move for Brazilian wonderkid Endrick to Real Madrid.
|
5 |
+
The transfer window officially closes on January 31st, leaving clubs with limited time to finalize their deals.
|
6 |
+
Fans are eagerly awaiting to see which teams make the shrewdest moves in this crucial period.
|
example_text_real.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Leeds are targeting a move for Southampton striker Cameron Archer with early talks having taken place.
|
2 |
+
|
3 |
+
It is unclear whether a deal can be achieved but the 23-year-old is open to a move before deadline day.
|
4 |
+
|
5 |
+
Other options are believed to be on the table as Archer seeks a guaranteed starting role after increasingly finding himself on the bench under recently appointed Saints manager Ivan Juric.
|
example_text_real_2.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
After weeks of mounting pressure to step down, Justin Trudeau has announced he will resign as Canada's prime minister and as leader of the Liberal Party of Canada.
|
2 |
+
|
3 |
+
The resignation brings a long political chapter to an end. Trudeau has been in office since 2015, when he brought the Liberals back to power from the political wilderness.
|
4 |
+
|
5 |
+
Trudeau said he will remain at the helm until a new Liberal leader is selected.
|
6 |
+
|
7 |
+
But many questions remain for the party, including who will take over and how they will manage a looming federal election. So what happens next?
|
sample_1.jpg.webp
DELETED
Binary file (293 kB)
|
|
sample_1.txt
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
|
2 |
-
Muzzamil Hussain was in grade school when the first bombs fell on the playground outside of his classroom in Kargil, a mountain city in the Indian province of Ladakh. While the violent onset of the 1999 Kargil war between Pakistan and India unfolded around him, Hussain and his family escaped south to the remote Suru Valley.
|
3 |
-
After India claimed victory later that year and displaced families returned home, Hussain listened as his bedridden grandfather asked the family to visit an old property, originally built by Hussain's great-grandfather, near Kargil's bazaar to make sure it had survived the war. When Hussain's uncles cracked through an old rusty latch and peered through the hand-carved wooden doors, they discovered wooden crates stamped with names of cities around the world. Making space on the dusty floor, the family began to lay out silks from China, silver cookware from Afghanistan, rugs from Persia, turquoise from Tibet, saddles from Mongolia and luxury soaps and salves from London, New York and Munich.
|
4 |
-
|
5 |
-
This mysterious act of destruction is investigated in Miss Austen, a new four-part television drama based on Gill Hornby's best-selling and critically acclaimed novel of the same name. Years after Jane's death, Cassandra (Keeley Hawes) has travelled to the village of Kintbury, in Berkshire, where the Austen family's friends, the Fowles, lived. Cassandra is, ostensibly, there to help Isabella Fowle (Rose Leslie), whose father Fulwar is dying. However this is a house that holds many bitter-sweet memories for her (in real life, this is where she had been staying when Jane wrote to her about Tom Lefroy), and she has an ulterior motive. She wants to retrieve some letters written by the late Jane to their friend Eliza Fowle, Isabella's mother, which she fears might contain details damaging to the novelist's legacy. When she finds the correspondence, it revives powerful memories of the events of years ago. The series takes place in two timelines – in 1830 – with the unmarried Isabella facing eviction from her home after her father's death and Cassandra trying to protect her sister's legacy – and decades previously, with young Cassandra (Synnøve Karlsen) and Jane (Patsy Ferran) navigating romances, family problems, and the ups and downs of life.
|
|
|
|
|
|
|
|
|
|
|
|
sample_2.jpg.webp
DELETED
Binary file (112 kB)
|
|
sample_2.txt
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
|
2 |
-
Muzzamil Hussain was in 3rd-grade school when the first bombs fell on the playground outside of his classroom in Kargil, a mountain city in India. While the violent onset of the 1998 Kargil war between Pakistan and India unfolded around him, Hussain and his family escaped south to the remote Suru Valley.
|
3 |
-
After India claimed victory later that year and displaced families returned home, Hussain listened as his bedridden grandfather asked the family to visit an old property, initially built by Hussain's great-grandfather, near Kargil's bazaar to make sure it had survived the war. When Hussain's uncles cracked through an old rusty latch and peered through the hand-carved wooden doors, they discovered wooden crates stamped with names of cities worldwide. Making space on the dusty floor, the family began to lay out silks from China, silver cookware from Afghanistan, rugs from Persia, turquoise from Tibet, saddles from Mongolia, and luxury soaps and salves from London, New York, and Munich.
|
4 |
-
|
5 |
-
This mysterious act of destruction is investigated in Miss Austen, a new four-part television drama based on Gill Hornby's best-selling and critically acclaimed novel. Years after Jane's death, Cassandra (Keeley Hawes) traveled to the village of Kintbury in Berkshire, where the Austen family's friends, the Fowles, lived. Cassandra is, ostensibly, there to help Isabella Fowle (Rose Leslie), whose father, Fulwar, is dying. However, this house holds many bitter-sweet memories for her (in real life, this is where she had been staying when Jane wrote about Tom Lefroy), and she has an ulterior motive. She wants to retrieve letters written by the late Jane to their friend Eliza Fowle, Isabella's mother, which she fears might contain details damaging the novelist's legacy. When she finds the correspondence, it revives powerful memories of the events of years ago. The series takes place in two timelines – in 1830 – with the unmarried Isabella facing eviction from her home after her father's death and Cassandra trying to protect her sister's legacy – and decades previously, with young Cassandra (Synnøve Karlsen) and Jane (Patsy Ferran) navigating romances, family problems, and the ups and downs of life.
|
|
|
|
|
|
|
|
|
|
|
|
sample_3.txt
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
|
2 |
-
Muzzamil Hussain was in grade school when the first bombs fell on the playground outside of his classroom in Kargil, a mountain city in the Indian province of Ladakh. While the violent onset of the 1999 Kargil war between Pakistan and India unfolded around him, Hussain and his family escaped south to the remote Suru Valley.
|
3 |
-
After India claimed victory later that year and displaced families returned home, Hussain listened as his bedridden grandfather asked the family to visit an old property, originally built by Hussain's great-grandfather, near Kargil's bazaar to make sure it had survived the war. When Hussain's uncles cracked through an old rusty latch and peered through the hand-carved wooden doors, they discovered wooden crates stamped with names of cities around the world. Making space on the dusty floor, the family began to lay out silks from China, silver cookware from Afghanistan, rugs from Persia, turquoise from Tibet, saddles from Mongolia and luxury soaps and salves from London, New York and Munich.
|
4 |
-
|
5 |
-
Local squirrels have reportedly formed a highly organized nut-hoarding syndicate, causing widespread panic among residents. Experts warn this unprecedented squirrel activity could lead to a global nut shortage. One resident claims to have witnessed squirrels using tiny backpacks to transport their loot. Authorities are investigating the claims, but so far, the squirrels remain at large. The mayor has issued a statement urging citizens to remain calm and protect their acorns.
|
|
|
|
|
|
|
|
|
|
|
|
src/application/content_detection.py
CHANGED
@@ -67,7 +67,8 @@ class NewsVerification():
|
|
67 |
"url": "",
|
68 |
}
|
69 |
else:
|
70 |
-
self.found_img_url.extend(img_urls)
|
|
|
71 |
text_prediction_score = aligned_sentence["similarity"]
|
72 |
if check_human(aligned_sentence):
|
73 |
text_prediction_label = "HUMAN"
|
@@ -84,6 +85,7 @@ class NewsVerification():
|
|
84 |
text_url = ""
|
85 |
aligned_sentence = {}
|
86 |
img_urls = []
|
|
|
87 |
|
88 |
def detect_image_origin(self):
|
89 |
print("CHECK IMAGE:")
|
@@ -163,10 +165,6 @@ class NewsVerification():
|
|
163 |
pair["input_sentence"],
|
164 |
pair["matched_sentence"],
|
165 |
)
|
166 |
-
# self.compare_sentences(
|
167 |
-
# pair["input_sentence"],
|
168 |
-
# pair["matched_sentence"],
|
169 |
-
# )
|
170 |
)
|
171 |
self.analyzed_table.append(
|
172 |
(input_words, source_words, input_indexes, source_indexes),
|
@@ -325,7 +323,7 @@ class NewsVerification():
|
|
325 |
<thead>
|
326 |
<tr>
|
327 |
<th>Input news</th>
|
328 |
-
<th>Source
|
329 |
<th>Forensic</th>
|
330 |
<th>Originality</th>
|
331 |
</tr>
|
@@ -353,11 +351,15 @@ class NewsVerification():
|
|
353 |
return f"""<tr><td>{input_sentence}</td><td>{source_sentence}</td><td>{self.text_prediction_label[index]}<br>({self.text_prediction_score[index]*100:.2f}%)</td><td>{source_text_url}</td></tr>"""
|
354 |
|
355 |
def format_image_row(self, max_length=30):
|
356 |
-
|
357 |
print(f"self.news_image = {self.news_image}")
|
358 |
-
|
359 |
-
|
360 |
-
|
|
|
|
|
|
|
|
|
361 |
return f"""<tr><td>input image</td><td>{source_image}</td><td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td><td>{source_image_url}</td></tr>"""
|
362 |
|
363 |
def shorten_url(self, url, max_length=30):
|
|
|
67 |
"url": "",
|
68 |
}
|
69 |
else:
|
70 |
+
self.found_img_url.extend(img_urls) # TODO: for demo purposes
|
71 |
+
self.found_img_url.append(img_urls[0]) # TODO: for demo purposes
|
72 |
text_prediction_score = aligned_sentence["similarity"]
|
73 |
if check_human(aligned_sentence):
|
74 |
text_prediction_label = "HUMAN"
|
|
|
85 |
text_url = ""
|
86 |
aligned_sentence = {}
|
87 |
img_urls = []
|
88 |
+
self.found_img_url = list(set(self.found_img_url))
|
89 |
|
90 |
def detect_image_origin(self):
|
91 |
print("CHECK IMAGE:")
|
|
|
165 |
pair["input_sentence"],
|
166 |
pair["matched_sentence"],
|
167 |
)
|
|
|
|
|
|
|
|
|
168 |
)
|
169 |
self.analyzed_table.append(
|
170 |
(input_words, source_words, input_indexes, source_indexes),
|
|
|
323 |
<thead>
|
324 |
<tr>
|
325 |
<th>Input news</th>
|
326 |
+
<th>Source (URL provided in Originality column correspondingly)</th>
|
327 |
<th>Forensic</th>
|
328 |
<th>Originality</th>
|
329 |
</tr>
|
|
|
351 |
return f"""<tr><td>{input_sentence}</td><td>{source_sentence}</td><td>{self.text_prediction_label[index]}<br>({self.text_prediction_score[index]*100:.2f}%)</td><td>{source_text_url}</td></tr>"""
|
352 |
|
353 |
def format_image_row(self, max_length=30):
|
354 |
+
input_image = f"""<img src="example_image_input.jpg" width="200" height="150">"""
|
355 |
print(f"self.news_image = {self.news_image}")
|
356 |
+
|
357 |
+
if self.image_referent_url is not None or self.image_referent_url != "":
|
358 |
+
source_image = f"""<img src="{self.image_referent_url}" width="200" height="150">"""
|
359 |
+
short_url = self.shorten_url(self.image_referent_url, max_length)
|
360 |
+
source_image_url = f"""<a href="{self.image_referent_url}">{short_url}</a>"""
|
361 |
+
else:
|
362 |
+
source_image = "Image not found"
|
363 |
return f"""<tr><td>input image</td><td>{source_image}</td><td>{self.image_prediction_label}<br>({self.image_prediction_score:.2f}%)</td><td>{source_image_url}</td></tr>"""
|
364 |
|
365 |
def shorten_url(self, url, max_length=30):
|
src/application/text/search.py
CHANGED
@@ -160,12 +160,12 @@ def generate_search_phrases(input_text):
|
|
160 |
search_phrases.append(input_text)
|
161 |
|
162 |
# Method 3: Split text by chunks
|
163 |
-
search_phrases.extend(get_chunk(input_text))
|
164 |
|
165 |
# Method 4: Get most identities and key words
|
166 |
entities = extract_entities(input_text)
|
167 |
keywords = get_keywords(input_text, 16)
|
168 |
search_phrase = " ".join(entities) + " " + " ".join(keywords)
|
169 |
-
search_phrases.append(search_phrase)
|
170 |
|
171 |
return search_phrases
|
|
|
160 |
search_phrases.append(input_text)
|
161 |
|
162 |
# Method 3: Split text by chunks
|
163 |
+
# search_phrases.extend(get_chunk(input_text)) # TODO: for demo purposes
|
164 |
|
165 |
# Method 4: Get most identities and key words
|
166 |
entities = extract_entities(input_text)
|
167 |
keywords = get_keywords(input_text, 16)
|
168 |
search_phrase = " ".join(entities) + " " + " ".join(keywords)
|
169 |
+
# search_phrases.append(search_phrase) # TODO: for demo purposes
|
170 |
|
171 |
return search_phrases
|
src/application/text/search_detection.py
CHANGED
@@ -45,6 +45,9 @@ def detect_text_by_relative_search(input_text, is_support_opposite = False):
|
|
45 |
for url in urls[:3]:
|
46 |
if url in checked_urls: # visited url
|
47 |
continue
|
|
|
|
|
|
|
48 |
checked_urls.add(url)
|
49 |
print(f"\t\tChecking URL: {url}")
|
50 |
|
|
|
45 |
for url in urls[:3]:
|
46 |
if url in checked_urls: # visited url
|
47 |
continue
|
48 |
+
if "bbc.com" not in url:
|
49 |
+
continue
|
50 |
+
|
51 |
checked_urls.add(url)
|
52 |
print(f"\t\tChecking URL: {url}")
|
53 |
|