Spaces:
Sleeping
Sleeping
✅ Fix analisi locale con upload multiplo dei PDF
Browse files
app.py
CHANGED
@@ -63,7 +63,12 @@ def validate_document(pdf_path, comparison_sources, method="local", titles=None)
|
|
63 |
token_overlap = calculate_token_overlap(pdf_text, doc_text)
|
64 |
oui = calculate_oui(similarity, token_overlap)
|
65 |
title = titles[i] if titles and i < len(titles) else os.path.basename(doc) if method == "local" else "Unknown Title"
|
66 |
-
results.append({
|
|
|
|
|
|
|
|
|
|
|
67 |
return results
|
68 |
|
69 |
def fetch_pubmed_details(article_id):
|
@@ -86,7 +91,12 @@ def fetch_pubmed_details(article_id):
|
|
86 |
|
87 |
def fetch_pubmed(query, year_start, year_end, max_results=10):
|
88 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
89 |
-
params = {
|
|
|
|
|
|
|
|
|
|
|
90 |
try:
|
91 |
response = requests.get(base_url, params=params)
|
92 |
response.raise_for_status()
|
@@ -103,32 +113,48 @@ def index():
|
|
103 |
def validate():
|
104 |
pdf_file = request.files.get("pdf_file")
|
105 |
analysis_type = request.form.get("analysis_type")
|
106 |
-
local_dir = request.form.get("local_directory", "").strip()
|
107 |
query = request.form.get("query", "").strip()
|
|
|
108 |
if not pdf_file:
|
109 |
flash("Carica un file PDF valido.", "error")
|
110 |
return redirect(url_for("index"))
|
|
|
111 |
filename = secure_filename(pdf_file.filename)
|
112 |
pdf_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
|
113 |
pdf_file.save(pdf_path)
|
|
|
114 |
results = []
|
|
|
115 |
if analysis_type == "local":
|
116 |
-
|
117 |
-
flash("Seleziona una directory valida.", "error")
|
118 |
-
return redirect(url_for("index"))
|
119 |
-
comparison_files = [os.path.join(local_dir, f) for f in os.listdir(local_dir) if f.endswith(".pdf")]
|
120 |
if not comparison_files:
|
121 |
-
flash("
|
122 |
return redirect(url_for("index"))
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
elif analysis_type == "pubmed":
|
125 |
year_start = request.form.get("year_start", "2000")
|
126 |
year_end = request.form.get("year_end", "2025")
|
127 |
num_articles = int(request.form.get("num_articles", "10"))
|
128 |
pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
|
129 |
pubmed_results = [fetch_pubmed_details(article_id) for article_id in pubmed_ids]
|
130 |
-
results = validate_document(
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
return render_template("NORUS.html", results=results)
|
132 |
|
133 |
if __name__ == "__main__":
|
134 |
-
app.run(debug=True, host="0.0.0.0", port=7860)
|
|
|
63 |
token_overlap = calculate_token_overlap(pdf_text, doc_text)
|
64 |
oui = calculate_oui(similarity, token_overlap)
|
65 |
title = titles[i] if titles and i < len(titles) else os.path.basename(doc) if method == "local" else "Unknown Title"
|
66 |
+
results.append({
|
67 |
+
"title": title,
|
68 |
+
"similarity": round(similarity, 2),
|
69 |
+
"token_overlap": round(token_overlap, 2),
|
70 |
+
"oui": round(oui, 2)
|
71 |
+
})
|
72 |
return results
|
73 |
|
74 |
def fetch_pubmed_details(article_id):
|
|
|
91 |
|
92 |
def fetch_pubmed(query, year_start, year_end, max_results=10):
|
93 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
94 |
+
params = {
|
95 |
+
"db": "pubmed",
|
96 |
+
"term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])",
|
97 |
+
"retmax": max_results,
|
98 |
+
"retmode": "json"
|
99 |
+
}
|
100 |
try:
|
101 |
response = requests.get(base_url, params=params)
|
102 |
response.raise_for_status()
|
|
|
113 |
def validate():
|
114 |
pdf_file = request.files.get("pdf_file")
|
115 |
analysis_type = request.form.get("analysis_type")
|
|
|
116 |
query = request.form.get("query", "").strip()
|
117 |
+
|
118 |
if not pdf_file:
|
119 |
flash("Carica un file PDF valido.", "error")
|
120 |
return redirect(url_for("index"))
|
121 |
+
|
122 |
filename = secure_filename(pdf_file.filename)
|
123 |
pdf_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
|
124 |
pdf_file.save(pdf_path)
|
125 |
+
|
126 |
results = []
|
127 |
+
|
128 |
if analysis_type == "local":
|
129 |
+
comparison_files = request.files.getlist("comparison_files")
|
|
|
|
|
|
|
130 |
if not comparison_files:
|
131 |
+
flash("Carica almeno un file di confronto.", "error")
|
132 |
return redirect(url_for("index"))
|
133 |
+
|
134 |
+
saved_paths = []
|
135 |
+
for file in comparison_files:
|
136 |
+
if file.filename.endswith(".pdf"):
|
137 |
+
filename = secure_filename(file.filename)
|
138 |
+
file_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
|
139 |
+
file.save(file_path)
|
140 |
+
saved_paths.append(file_path)
|
141 |
+
|
142 |
+
results = validate_document(pdf_path, saved_paths, method="local")
|
143 |
+
|
144 |
elif analysis_type == "pubmed":
|
145 |
year_start = request.form.get("year_start", "2000")
|
146 |
year_end = request.form.get("year_end", "2025")
|
147 |
num_articles = int(request.form.get("num_articles", "10"))
|
148 |
pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
|
149 |
pubmed_results = [fetch_pubmed_details(article_id) for article_id in pubmed_ids]
|
150 |
+
results = validate_document(
|
151 |
+
pdf_path,
|
152 |
+
[result[1] for result in pubmed_results],
|
153 |
+
method="pubmed",
|
154 |
+
titles=[result[0] for result in pubmed_results]
|
155 |
+
)
|
156 |
+
|
157 |
return render_template("NORUS.html", results=results)
|
158 |
|
159 |
if __name__ == "__main__":
|
160 |
+
app.run(debug=True, host="0.0.0.0", port=7860)
|