Update app.py: skip PubMed empty articles, improve robustness
Browse files
app.py
CHANGED
@@ -89,14 +89,22 @@ def fetch_pubmed_details(article_id):
|
|
89 |
response.raise_for_status()
|
90 |
import xml.etree.ElementTree as ET
|
91 |
root = ET.fromstring(response.text)
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
94 |
keywords = root.findall(".//Keyword")
|
95 |
keyword_text = " ".join([kw.text for kw in keywords if kw.text]) if keywords else ""
|
|
|
|
|
|
|
|
|
96 |
return title, f"{abstract} {keyword_text}"
|
97 |
except Exception as e:
|
98 |
print(f"Errore recupero abstract: {e}")
|
99 |
-
return
|
100 |
|
101 |
def fetch_pubmed(query, year_start, year_end, max_results=10):
|
102 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
@@ -105,13 +113,12 @@ def fetch_pubmed(query, year_start, year_end, max_results=10):
|
|
105 |
"term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])",
|
106 |
"retmax": max_results,
|
107 |
"retmode": "json",
|
108 |
-
"sort": "relevance"
|
109 |
}
|
110 |
try:
|
111 |
response = requests.get(base_url, params=params)
|
112 |
response.raise_for_status()
|
113 |
-
|
114 |
-
return id_list
|
115 |
except Exception as e:
|
116 |
print(f"Errore fetch PubMed: {e}")
|
117 |
return []
|
@@ -152,16 +159,14 @@ def validate():
|
|
152 |
year_end = request.form.get("year_end", "2025")
|
153 |
num_articles = int(request.form.get("num_articles", "10"))
|
154 |
pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
|
|
|
|
|
155 |
|
156 |
-
if not
|
157 |
-
flash("Nessun articolo trovato
|
158 |
return redirect(url_for("index"))
|
159 |
|
160 |
-
pubmed_results = [
|
161 |
-
pubmed_texts = [r[1] for r in pubmed_results]
|
162 |
-
pubmed_titles = [r[0] for r in pubmed_results]
|
163 |
-
|
164 |
-
results = validate_document(pdf_path, pubmed_texts, method="pubmed", titles=pubmed_titles)
|
165 |
|
166 |
return render_template("NORUS.html", results=results, keywords=last_common_keywords)
|
167 |
|
@@ -210,7 +215,7 @@ def download_report():
|
|
210 |
output_path = os.path.join(app.config["UPLOAD_FOLDER"], "NORUS_Report.pdf")
|
211 |
pdf.output(output_path, 'F')
|
212 |
|
213 |
-
return send_file(output_path, as_attachment=True)
|
214 |
|
215 |
if __name__ == "__main__":
|
216 |
-
app.run(debug=True, host="0.0.0.0", port=7860)
|
|
|
89 |
response.raise_for_status()
|
90 |
import xml.etree.ElementTree as ET
|
91 |
root = ET.fromstring(response.text)
|
92 |
+
title_element = root.find(".//ArticleTitle")
|
93 |
+
abstract_element = root.find(".//AbstractText")
|
94 |
+
|
95 |
+
title = title_element.text.strip() if title_element is not None and title_element.text else "No Title"
|
96 |
+
abstract = abstract_element.text.strip() if abstract_element is not None and abstract_element.text else "No Abstract"
|
97 |
+
|
98 |
keywords = root.findall(".//Keyword")
|
99 |
keyword_text = " ".join([kw.text for kw in keywords if kw.text]) if keywords else ""
|
100 |
+
|
101 |
+
if title == "No Title" or abstract == "No Abstract":
|
102 |
+
return None # Se titolo o abstract mancano, scarta questo articolo
|
103 |
+
|
104 |
return title, f"{abstract} {keyword_text}"
|
105 |
except Exception as e:
|
106 |
print(f"Errore recupero abstract: {e}")
|
107 |
+
return None
|
108 |
|
109 |
def fetch_pubmed(query, year_start, year_end, max_results=10):
|
110 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
|
113 |
"term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])",
|
114 |
"retmax": max_results,
|
115 |
"retmode": "json",
|
116 |
+
"sort": "relevance"
|
117 |
}
|
118 |
try:
|
119 |
response = requests.get(base_url, params=params)
|
120 |
response.raise_for_status()
|
121 |
+
return response.json().get("esearchresult", {}).get("idlist", [])
|
|
|
122 |
except Exception as e:
|
123 |
print(f"Errore fetch PubMed: {e}")
|
124 |
return []
|
|
|
159 |
year_end = request.form.get("year_end", "2025")
|
160 |
num_articles = int(request.form.get("num_articles", "10"))
|
161 |
pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
|
162 |
+
pubmed_results = [fetch_pubmed_details(id_) for id_ in pubmed_ids]
|
163 |
+
pubmed_results = [r for r in pubmed_results if r is not None]
|
164 |
|
165 |
+
if not pubmed_results:
|
166 |
+
flash("Nessun articolo PubMed valido trovato. Modifica la query o il range di anni.", "error")
|
167 |
return redirect(url_for("index"))
|
168 |
|
169 |
+
results = validate_document(pdf_path, [r[1] for r in pubmed_results], method="pubmed", titles=[r[0] for r in pubmed_results])
|
|
|
|
|
|
|
|
|
170 |
|
171 |
return render_template("NORUS.html", results=results, keywords=last_common_keywords)
|
172 |
|
|
|
215 |
output_path = os.path.join(app.config["UPLOAD_FOLDER"], "NORUS_Report.pdf")
|
216 |
pdf.output(output_path, 'F')
|
217 |
|
218 |
+
return send_file(output_path, as_attachment=True)
|
219 |
|
220 |
if __name__ == "__main__":
|
221 |
+
app.run(debug=True, host="0.0.0.0", port=7860)
|