mabil commited on
Commit
e615d1f
·
1 Parent(s): 5649bec

Fix: Improved OUI calculation and report in English, added article scoring, and optimized article selection

Browse files
Files changed (1) hide show
  1. app.py +15 -20
app.py CHANGED
@@ -89,22 +89,14 @@ def fetch_pubmed_details(article_id):
89
  response.raise_for_status()
90
  import xml.etree.ElementTree as ET
91
  root = ET.fromstring(response.text)
92
-
93
- title_elem = root.find(".//ArticleTitle")
94
- abstract_elem = root.find(".//AbstractText")
95
-
96
- # Se non ci sono titolo o abstract, lo segnaliamo nei log
97
- if title_elem is None or abstract_elem is None:
98
- print(f"Article {article_id} does not contain title or abstract.")
99
-
100
- title = title_elem.text if title_elem is not None else "No Title"
101
- abstract = abstract_elem.text if abstract_elem is not None else "No Abstract"
102
  keywords = root.findall(".//Keyword")
103
  keyword_text = " ".join([kw.text for kw in keywords if kw.text]) if keywords else ""
104
  return title, f"{abstract} {keyword_text}"
105
  except Exception as e:
106
  print(f"Errore recupero abstract: {e}")
107
- return None
108
 
109
  def fetch_pubmed(query, year_start, year_end, max_results=10):
110
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
@@ -113,16 +105,12 @@ def fetch_pubmed(query, year_start, year_end, max_results=10):
113
  "term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])",
114
  "retmax": max_results,
115
  "retmode": "json",
116
- "sort": "relevance" # <-- Ordinato per rilevanza
117
  }
118
  try:
119
  response = requests.get(base_url, params=params)
120
  response.raise_for_status()
121
  id_list = response.json().get("esearchresult", {}).get("idlist", [])
122
-
123
- # Aggiungiamo un log per capire quanti articoli sono stati trovati
124
- print(f"Found {len(id_list)} articles in PubMed.")
125
-
126
  return id_list
127
  except Exception as e:
128
  print(f"Errore fetch PubMed: {e}")
@@ -166,12 +154,18 @@ def validate():
166
  pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
167
 
168
  if not pubmed_ids:
169
- flash("Nessun articolo trovato su PubMed per questa ricerca.", "error")
170
  return redirect(url_for("index"))
171
 
172
- pubmed_results = [fetch_pubmed_details(id_) for id_ in pubmed_ids]
173
- pubmed_texts = [r[1] for r in pubmed_results]
174
- pubmed_titles = [r[0] for r in pubmed_results]
 
 
 
 
 
 
175
 
176
  results = validate_document(pdf_path, pubmed_texts, method="pubmed", titles=pubmed_titles)
177
 
@@ -226,3 +220,4 @@ def download_report():
226
 
227
  if __name__ == "__main__":
228
  app.run(debug=True, host="0.0.0.0", port=7860)
 
 
89
  response.raise_for_status()
90
  import xml.etree.ElementTree as ET
91
  root = ET.fromstring(response.text)
92
+ title = root.find(".//ArticleTitle").text if root.find(".//ArticleTitle") is not None else "No Title"
93
+ abstract = root.find(".//AbstractText").text if root.find(".//AbstractText") is not None else "No Abstract"
 
 
 
 
 
 
 
 
94
  keywords = root.findall(".//Keyword")
95
  keyword_text = " ".join([kw.text for kw in keywords if kw.text]) if keywords else ""
96
  return title, f"{abstract} {keyword_text}"
97
  except Exception as e:
98
  print(f"Errore recupero abstract: {e}")
99
+ return None # Restituisci None se si verifica un errore
100
 
101
  def fetch_pubmed(query, year_start, year_end, max_results=10):
102
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
 
105
  "term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])",
106
  "retmax": max_results,
107
  "retmode": "json",
108
+ "sort": "relevance" # <-- Qui abbiamo ordinato per rilevanza
109
  }
110
  try:
111
  response = requests.get(base_url, params=params)
112
  response.raise_for_status()
113
  id_list = response.json().get("esearchresult", {}).get("idlist", [])
 
 
 
 
114
  return id_list
115
  except Exception as e:
116
  print(f"Errore fetch PubMed: {e}")
 
154
  pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
155
 
156
  if not pubmed_ids:
157
+ flash("Nessun articolo trovato su PubMed per questa ricerca.", "error")
158
  return redirect(url_for("index"))
159
 
160
+ pubmed_results = []
161
+ for id_ in pubmed_ids:
162
+ result = fetch_pubmed_details(id_)
163
+ if result: # Aggiungi solo se il risultato non è None
164
+ pubmed_results.append(result)
165
+
166
+ # Ora puoi accedere a pubmed_results senza errori
167
+ pubmed_texts = [r[1] for r in pubmed_results] # Estrai i testi
168
+ pubmed_titles = [r[0] for r in pubmed_results] # Estrai i titoli
169
 
170
  results = validate_document(pdf_path, pubmed_texts, method="pubmed", titles=pubmed_titles)
171
 
 
220
 
221
  if __name__ == "__main__":
222
  app.run(debug=True, host="0.0.0.0", port=7860)
223
+