mabil commited on
Commit
db5493b
·
1 Parent(s): af53f00

Update app.py: skip PubMed empty articles, improve robustness

Browse files
Files changed (1) hide show
  1. app.py +20 -15
app.py CHANGED
@@ -89,14 +89,22 @@ def fetch_pubmed_details(article_id):
89
  response.raise_for_status()
90
  import xml.etree.ElementTree as ET
91
  root = ET.fromstring(response.text)
92
- title = root.find(".//ArticleTitle").text if root.find(".//ArticleTitle") is not None else "No Title"
93
- abstract = root.find(".//AbstractText").text if root.find(".//AbstractText") is not None else "No Abstract"
 
 
 
 
94
  keywords = root.findall(".//Keyword")
95
  keyword_text = " ".join([kw.text for kw in keywords if kw.text]) if keywords else ""
 
 
 
 
96
  return title, f"{abstract} {keyword_text}"
97
  except Exception as e:
98
  print(f"Errore recupero abstract: {e}")
99
- return "No Title", "No Abstract"
100
 
101
  def fetch_pubmed(query, year_start, year_end, max_results=10):
102
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
@@ -105,13 +113,12 @@ def fetch_pubmed(query, year_start, year_end, max_results=10):
105
  "term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])",
106
  "retmax": max_results,
107
  "retmode": "json",
108
- "sort": "relevance" # <-- Qui abbiamo ordinato per rilevanza
109
  }
110
  try:
111
  response = requests.get(base_url, params=params)
112
  response.raise_for_status()
113
- id_list = response.json().get("esearchresult", {}).get("idlist", [])
114
- return id_list
115
  except Exception as e:
116
  print(f"Errore fetch PubMed: {e}")
117
  return []
@@ -152,16 +159,14 @@ def validate():
152
  year_end = request.form.get("year_end", "2025")
153
  num_articles = int(request.form.get("num_articles", "10"))
154
  pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
 
 
155
 
156
- if not pubmed_ids:
157
- flash("Nessun articolo trovato su PubMed per questa ricerca.", "error")
158
  return redirect(url_for("index"))
159
 
160
- pubmed_results = [fetch_pubmed_details(id_) for id_ in pubmed_ids]
161
- pubmed_texts = [r[1] for r in pubmed_results]
162
- pubmed_titles = [r[0] for r in pubmed_results]
163
-
164
- results = validate_document(pdf_path, pubmed_texts, method="pubmed", titles=pubmed_titles)
165
 
166
  return render_template("NORUS.html", results=results, keywords=last_common_keywords)
167
 
@@ -210,7 +215,7 @@ def download_report():
210
  output_path = os.path.join(app.config["UPLOAD_FOLDER"], "NORUS_Report.pdf")
211
  pdf.output(output_path, 'F')
212
 
213
- return send_file(output_path, as_attachment=True)
214
 
215
  if __name__ == "__main__":
216
- app.run(debug=True, host="0.0.0.0", port=7860)
 
89
  response.raise_for_status()
90
  import xml.etree.ElementTree as ET
91
  root = ET.fromstring(response.text)
92
+ title_element = root.find(".//ArticleTitle")
93
+ abstract_element = root.find(".//AbstractText")
94
+
95
+ title = title_element.text.strip() if title_element is not None and title_element.text else "No Title"
96
+ abstract = abstract_element.text.strip() if abstract_element is not None and abstract_element.text else "No Abstract"
97
+
98
  keywords = root.findall(".//Keyword")
99
  keyword_text = " ".join([kw.text for kw in keywords if kw.text]) if keywords else ""
100
+
101
+ if title == "No Title" or abstract == "No Abstract":
102
+ return None # Se titolo o abstract mancano, scarta questo articolo
103
+
104
  return title, f"{abstract} {keyword_text}"
105
  except Exception as e:
106
  print(f"Errore recupero abstract: {e}")
107
+ return None
108
 
109
  def fetch_pubmed(query, year_start, year_end, max_results=10):
110
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
 
113
  "term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])",
114
  "retmax": max_results,
115
  "retmode": "json",
116
+ "sort": "relevance"
117
  }
118
  try:
119
  response = requests.get(base_url, params=params)
120
  response.raise_for_status()
121
+ return response.json().get("esearchresult", {}).get("idlist", [])
 
122
  except Exception as e:
123
  print(f"Errore fetch PubMed: {e}")
124
  return []
 
159
  year_end = request.form.get("year_end", "2025")
160
  num_articles = int(request.form.get("num_articles", "10"))
161
  pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
162
+ pubmed_results = [fetch_pubmed_details(id_) for id_ in pubmed_ids]
163
+ pubmed_results = [r for r in pubmed_results if r is not None]
164
 
165
+ if not pubmed_results:
166
+ flash("Nessun articolo PubMed valido trovato. Modifica la query o il range di anni.", "error")
167
  return redirect(url_for("index"))
168
 
169
+ results = validate_document(pdf_path, [r[1] for r in pubmed_results], method="pubmed", titles=[r[0] for r in pubmed_results])
 
 
 
 
170
 
171
  return render_template("NORUS.html", results=results, keywords=last_common_keywords)
172
 
 
215
  output_path = os.path.join(app.config["UPLOAD_FOLDER"], "NORUS_Report.pdf")
216
  pdf.output(output_path, 'F')
217
 
218
+ return send_file(output_path, as_attachment=True)
219
 
220
  if __name__ == "__main__":
221
+ app.run(debug=True, host="0.0.0.0", port=7860)