Fix: Improved OUI calculation and report in English, added article scoring, and optimized article selection
Browse files
app.py
CHANGED
@@ -89,22 +89,14 @@ def fetch_pubmed_details(article_id):
|
|
89 |
response.raise_for_status()
|
90 |
import xml.etree.ElementTree as ET
|
91 |
root = ET.fromstring(response.text)
|
92 |
-
|
93 |
-
|
94 |
-
abstract_elem = root.find(".//AbstractText")
|
95 |
-
|
96 |
-
# Se non ci sono titolo o abstract, lo segnaliamo nei log
|
97 |
-
if title_elem is None or abstract_elem is None:
|
98 |
-
print(f"Article {article_id} does not contain title or abstract.")
|
99 |
-
|
100 |
-
title = title_elem.text if title_elem is not None else "No Title"
|
101 |
-
abstract = abstract_elem.text if abstract_elem is not None else "No Abstract"
|
102 |
keywords = root.findall(".//Keyword")
|
103 |
keyword_text = " ".join([kw.text for kw in keywords if kw.text]) if keywords else ""
|
104 |
return title, f"{abstract} {keyword_text}"
|
105 |
except Exception as e:
|
106 |
print(f"Errore recupero abstract: {e}")
|
107 |
-
return None
|
108 |
|
109 |
def fetch_pubmed(query, year_start, year_end, max_results=10):
|
110 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
@@ -113,16 +105,12 @@ def fetch_pubmed(query, year_start, year_end, max_results=10):
|
|
113 |
"term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])",
|
114 |
"retmax": max_results,
|
115 |
"retmode": "json",
|
116 |
-
"sort": "relevance" # <--
|
117 |
}
|
118 |
try:
|
119 |
response = requests.get(base_url, params=params)
|
120 |
response.raise_for_status()
|
121 |
id_list = response.json().get("esearchresult", {}).get("idlist", [])
|
122 |
-
|
123 |
-
# Aggiungiamo un log per capire quanti articoli sono stati trovati
|
124 |
-
print(f"Found {len(id_list)} articles in PubMed.")
|
125 |
-
|
126 |
return id_list
|
127 |
except Exception as e:
|
128 |
print(f"Errore fetch PubMed: {e}")
|
@@ -166,12 +154,18 @@ def validate():
|
|
166 |
pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
|
167 |
|
168 |
if not pubmed_ids:
|
169 |
-
flash("Nessun articolo trovato su PubMed per questa ricerca.", "error")
|
170 |
return redirect(url_for("index"))
|
171 |
|
172 |
-
pubmed_results = [
|
173 |
-
|
174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
|
176 |
results = validate_document(pdf_path, pubmed_texts, method="pubmed", titles=pubmed_titles)
|
177 |
|
@@ -226,3 +220,4 @@ def download_report():
|
|
226 |
|
227 |
if __name__ == "__main__":
|
228 |
app.run(debug=True, host="0.0.0.0", port=7860)
|
|
|
|
89 |
response.raise_for_status()
|
90 |
import xml.etree.ElementTree as ET
|
91 |
root = ET.fromstring(response.text)
|
92 |
+
title = root.find(".//ArticleTitle").text if root.find(".//ArticleTitle") is not None else "No Title"
|
93 |
+
abstract = root.find(".//AbstractText").text if root.find(".//AbstractText") is not None else "No Abstract"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
keywords = root.findall(".//Keyword")
|
95 |
keyword_text = " ".join([kw.text for kw in keywords if kw.text]) if keywords else ""
|
96 |
return title, f"{abstract} {keyword_text}"
|
97 |
except Exception as e:
|
98 |
print(f"Errore recupero abstract: {e}")
|
99 |
+
return None # Restituisci None se si verifica un errore
|
100 |
|
101 |
def fetch_pubmed(query, year_start, year_end, max_results=10):
|
102 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
|
105 |
"term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])",
|
106 |
"retmax": max_results,
|
107 |
"retmode": "json",
|
108 |
+
"sort": "relevance" # <-- Qui abbiamo ordinato per rilevanza
|
109 |
}
|
110 |
try:
|
111 |
response = requests.get(base_url, params=params)
|
112 |
response.raise_for_status()
|
113 |
id_list = response.json().get("esearchresult", {}).get("idlist", [])
|
|
|
|
|
|
|
|
|
114 |
return id_list
|
115 |
except Exception as e:
|
116 |
print(f"Errore fetch PubMed: {e}")
|
|
|
154 |
pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
|
155 |
|
156 |
if not pubmed_ids:
|
157 |
+
flash("❌ Nessun articolo trovato su PubMed per questa ricerca.", "error")
|
158 |
return redirect(url_for("index"))
|
159 |
|
160 |
+
pubmed_results = []
|
161 |
+
for id_ in pubmed_ids:
|
162 |
+
result = fetch_pubmed_details(id_)
|
163 |
+
if result: # Aggiungi solo se il risultato non è None
|
164 |
+
pubmed_results.append(result)
|
165 |
+
|
166 |
+
# Ora puoi accedere a pubmed_results senza errori
|
167 |
+
pubmed_texts = [r[1] for r in pubmed_results] # Estrai i testi
|
168 |
+
pubmed_titles = [r[0] for r in pubmed_results] # Estrai i titoli
|
169 |
|
170 |
results = validate_document(pdf_path, pubmed_texts, method="pubmed", titles=pubmed_titles)
|
171 |
|
|
|
220 |
|
221 |
if __name__ == "__main__":
|
222 |
app.run(debug=True, host="0.0.0.0", port=7860)
|
223 |
+
|