mabil commited on
Commit
20f9b9b
·
1 Parent(s): af7100d

✅ Fix analisi locale con upload multiplo dei PDF

Browse files
Files changed (1) hide show
  1. app.py +37 -11
app.py CHANGED
@@ -63,7 +63,12 @@ def validate_document(pdf_path, comparison_sources, method="local", titles=None)
63
  token_overlap = calculate_token_overlap(pdf_text, doc_text)
64
  oui = calculate_oui(similarity, token_overlap)
65
  title = titles[i] if titles and i < len(titles) else os.path.basename(doc) if method == "local" else "Unknown Title"
66
- results.append({"title": title, "similarity": round(similarity, 2), "token_overlap": round(token_overlap, 2), "oui": round(oui, 2)})
 
 
 
 
 
67
  return results
68
 
69
  def fetch_pubmed_details(article_id):
@@ -86,7 +91,12 @@ def fetch_pubmed_details(article_id):
86
 
87
  def fetch_pubmed(query, year_start, year_end, max_results=10):
88
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
89
- params = {"db": "pubmed", "term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])", "retmax": max_results, "retmode": "json"}
 
 
 
 
 
90
  try:
91
  response = requests.get(base_url, params=params)
92
  response.raise_for_status()
@@ -103,32 +113,48 @@ def index():
103
  def validate():
104
  pdf_file = request.files.get("pdf_file")
105
  analysis_type = request.form.get("analysis_type")
106
- local_dir = request.form.get("local_directory", "").strip()
107
  query = request.form.get("query", "").strip()
 
108
  if not pdf_file:
109
  flash("Carica un file PDF valido.", "error")
110
  return redirect(url_for("index"))
 
111
  filename = secure_filename(pdf_file.filename)
112
  pdf_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
113
  pdf_file.save(pdf_path)
 
114
  results = []
 
115
  if analysis_type == "local":
116
- if not os.path.isdir(local_dir):
117
- flash("Seleziona una directory valida.", "error")
118
- return redirect(url_for("index"))
119
- comparison_files = [os.path.join(local_dir, f) for f in os.listdir(local_dir) if f.endswith(".pdf")]
120
  if not comparison_files:
121
- flash("La directory non contiene PDF.", "error")
122
  return redirect(url_for("index"))
123
- results = validate_document(pdf_path, comparison_files, method="local")
 
 
 
 
 
 
 
 
 
 
124
  elif analysis_type == "pubmed":
125
  year_start = request.form.get("year_start", "2000")
126
  year_end = request.form.get("year_end", "2025")
127
  num_articles = int(request.form.get("num_articles", "10"))
128
  pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
129
  pubmed_results = [fetch_pubmed_details(article_id) for article_id in pubmed_ids]
130
- results = validate_document(pdf_path, [result[1] for result in pubmed_results], method="pubmed", titles=[result[0] for result in pubmed_results])
 
 
 
 
 
 
131
  return render_template("NORUS.html", results=results)
132
 
133
  if __name__ == "__main__":
134
- app.run(debug=True, host="0.0.0.0", port=7860)
 
63
  token_overlap = calculate_token_overlap(pdf_text, doc_text)
64
  oui = calculate_oui(similarity, token_overlap)
65
  title = titles[i] if titles and i < len(titles) else os.path.basename(doc) if method == "local" else "Unknown Title"
66
+ results.append({
67
+ "title": title,
68
+ "similarity": round(similarity, 2),
69
+ "token_overlap": round(token_overlap, 2),
70
+ "oui": round(oui, 2)
71
+ })
72
  return results
73
 
74
  def fetch_pubmed_details(article_id):
 
91
 
92
  def fetch_pubmed(query, year_start, year_end, max_results=10):
93
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
94
+ params = {
95
+ "db": "pubmed",
96
+ "term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])",
97
+ "retmax": max_results,
98
+ "retmode": "json"
99
+ }
100
  try:
101
  response = requests.get(base_url, params=params)
102
  response.raise_for_status()
 
113
  def validate():
114
  pdf_file = request.files.get("pdf_file")
115
  analysis_type = request.form.get("analysis_type")
 
116
  query = request.form.get("query", "").strip()
117
+
118
  if not pdf_file:
119
  flash("Carica un file PDF valido.", "error")
120
  return redirect(url_for("index"))
121
+
122
  filename = secure_filename(pdf_file.filename)
123
  pdf_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
124
  pdf_file.save(pdf_path)
125
+
126
  results = []
127
+
128
  if analysis_type == "local":
129
+ comparison_files = request.files.getlist("comparison_files")
 
 
 
130
  if not comparison_files:
131
+ flash("Carica almeno un file di confronto.", "error")
132
  return redirect(url_for("index"))
133
+
134
+ saved_paths = []
135
+ for file in comparison_files:
136
+ if file.filename.endswith(".pdf"):
137
+ filename = secure_filename(file.filename)
138
+ file_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
139
+ file.save(file_path)
140
+ saved_paths.append(file_path)
141
+
142
+ results = validate_document(pdf_path, saved_paths, method="local")
143
+
144
  elif analysis_type == "pubmed":
145
  year_start = request.form.get("year_start", "2000")
146
  year_end = request.form.get("year_end", "2025")
147
  num_articles = int(request.form.get("num_articles", "10"))
148
  pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
149
  pubmed_results = [fetch_pubmed_details(article_id) for article_id in pubmed_ids]
150
+ results = validate_document(
151
+ pdf_path,
152
+ [result[1] for result in pubmed_results],
153
+ method="pubmed",
154
+ titles=[result[0] for result in pubmed_results]
155
+ )
156
+
157
  return render_template("NORUS.html", results=results)
158
 
159
  if __name__ == "__main__":
160
+ app.run(debug=True, host="0.0.0.0", port=7860)