mabil commited on
Commit
af53f00
ยท
1 Parent(s): 0767e59

Uploaded NORUS app files

Browse files
.DS_Store ADDED
Binary file (10.2 kB). View file
 
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ # 1. Crea utente non-root (richiesto da Hugging Face)
4
+ RUN useradd -m -u 1000 user
5
+ USER user
6
+ ENV PATH="/home/user/.local/bin:$PATH"
7
+
8
+ # 2. Crea directory di lavoro
9
+ WORKDIR /app
10
+
11
+ # 3. Copia requirements e installa pacchetti
12
+ COPY --chown=user requirements.txt .
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ # 4. Copia script di setup per NLTK
16
+ COPY --chown=user app_setup.sh .
17
+ RUN chmod +x app_setup.sh && ./app_setup.sh
18
+
19
+ # 5. Copia tutto il resto dell'app
20
+ COPY --chown=user . .
21
+
22
+ # 6. Imposta variabile per NLTK
23
+ ENV NLTK_DATA="/home/user/nltk_data"
24
+
25
+ # 7. Avvia l'app
26
+ CMD ["python", "app.py"]
27
+
README.md CHANGED
@@ -1,11 +1,84 @@
1
  ---
2
- title: NORUS2
3
- emoji: ๐Ÿ†
4
- colorFrom: gray
5
- colorTo: red
6
- sdk: static
7
- pinned: false
8
- short_description: Neural ORiginality Understanding System
9
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Norus Tool
3
+ emoji: ๐Ÿ”ฅ
4
+ colorFrom: green
5
+ colorTo: purple
6
+ sdk: docker
7
+ app_file: app.py
8
+ pinned: true
9
  ---
10
+ # NORUS Tool ๐Ÿง ๐Ÿ“„
11
+
12
+ [![๐Ÿค— Hugging Face Spaces](https://img.shields.io/badge/๐Ÿค—-HuggingFace_Spaces-blue)](https://huggingface.co/spaces/mabil/norus-tool)
13
+
14
+ **NORUS** (Novelty and Originality Recognition Utility System) รจ uno strumento basato su intelligenza artificiale che consente l'analisi semantica di articoli scientifici in formato PDF, confrontandoli con articoli locali o pubblicati su PubMed. Il tool restituisce misure di **similaritร  semantica**, **token overlap** e un indice composito chiamato **OUI (Originality & Uniqueness Index)**.
15
+
16
+ ## ๐Ÿš€ Funzionalitร  principali
17
+
18
+ - โœ… Caricamento PDF da analizzare
19
+ - ๐Ÿ“‚ Confronto con PDF locali o articoli da PubMed
20
+ - ๐Ÿค– Estrazione di embedding semantici tramite SciBERT
21
+ - ๐Ÿ“Š Calcolo di:
22
+ - Similaritร  semantica (cosine similarity)
23
+ - Sovrapposizione testuale (token overlap)
24
+ - Indice OUI (originalitร  e novitร )
25
+ - ๐Ÿ“ˆ Visualizzazione interattiva dei risultati via Chart.js
26
+
27
+ ## ๐Ÿงช OUI - Originality & Uniqueness Index
28
+
29
+ \`\`\`math
30
+ OUI = 1 - (ฮฑ ร— semantic_similarity + ฮฒ ร— token_overlap)
31
+ \`\`\`
32
+
33
+ - ฮฑ = 0.7 โ†’ penalizza la somiglianza semantica
34
+ - ฮฒ = 0.3 โ†’ penalizza la ripetizione letterale
35
+ - L'OUI misura **quanto un documento รจ originale**, sia nel contenuto che nella forma.
36
+
37
+ ## ๐Ÿงฑ Architettura
38
+
39
+ - `Flask` come backend web
40
+ - `pdfplumber` per l'estrazione del testo dai PDF
41
+ - `nltk` per preprocessing linguistico
42
+ - `sentence-transformers` con modello `allenai/scibert_scivocab_uncased`
43
+ - `requests` per l'interfaccia con PubMed
44
+
45
+ ## ๐Ÿ“‚ Struttura del progetto
46
+
47
+ ```
48
+ .
49
+ โ”œโ”€โ”€ app.py
50
+ โ”œโ”€โ”€ Dockerfile
51
+ โ”œโ”€โ”€ requirements.txt
52
+ โ”œโ”€โ”€ static/
53
+ โ”œโ”€โ”€ templates/
54
+ โ”œโ”€โ”€ uploads/
55
+ โ”œโ”€โ”€ README.md
56
+ ```
57
+
58
+ ## โ–ถ๏ธ Esecuzione locale
59
+
60
+ Per eseguire localmente:
61
+
62
+ 1. Assicurati di avere Python 3.9+
63
+ 2. Installa le dipendenze:
64
+
65
+ \`\`\`bash
66
+ pip install -r requirements.txt
67
+ \`\`\`
68
+
69
+ 3. Avvia l'app:
70
+
71
+ \`\`\`bash
72
+ python app.py
73
+ \`\`\`
74
+
75
+ Apri il browser su `http://localhost:7860`
76
+
77
+ ## ๐Ÿ“ก Deploy su Hugging Face Spaces
78
+
79
+ Puoi caricare questo progetto come Space Docker-based su Hugging Face. Il `Dockerfile` รจ giร  configurato.
80
+
81
+ ---
82
+ ---
83
+ ๐Ÿง  Developed by Marina Bilotta โ€“ Computational Chemistry & AI Research
84
 
 
app.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import pdfplumber
4
+ from flask import Flask, render_template, request, redirect, url_for, flash, send_file
5
+ from werkzeug.utils import secure_filename
6
+ from sentence_transformers import SentenceTransformer, util
7
+ from transformers import AutoTokenizer
8
+ from fpdf import FPDF
9
+ from collections import Counter
10
+ from io import BytesIO
11
+
12
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
13
+
14
+ app = Flask(__name__)
15
+ app.secret_key = os.environ.get("SECRET_KEY", "NORUS_secretkey_05")
16
+ app.config["UPLOAD_FOLDER"] = "uploads"
17
+ os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)
18
+
19
+ model = SentenceTransformer("allenai/scibert_scivocab_uncased")
20
+
21
+ last_results = []
22
+ last_common_keywords = []
23
+
24
+ def extract_pdf_text(pdf_path):
25
+ text = ""
26
+ try:
27
+ with pdfplumber.open(pdf_path) as pdf:
28
+ for page in pdf.pages:
29
+ text += page.extract_text() or " "
30
+ except Exception as e:
31
+ print(f"Errore estrazione testo: {e}")
32
+ return text.lower().strip()
33
+
34
+ def preprocess_text(text):
35
+ tokens = tokenizer.tokenize(text.lower())
36
+ tokens = [token for token in tokens if len(token) > 3 and token.isalpha()]
37
+ return tokens
38
+
39
+ def calculate_token_overlap(text1, text2):
40
+ tokens1 = set(text1.split())
41
+ tokens2 = set(text2.split())
42
+ overlap = len(tokens1 & tokens2)
43
+ return round((overlap / max(len(tokens1), 1)) * 100, 2)
44
+
45
+ def calculate_oui(similarity, token_overlap, alpha=0.7, beta=0.3):
46
+ oui = alpha * (1 - similarity / 100) + beta * (1 - token_overlap / 100)
47
+ result = round(oui * 100, 2)
48
+ return 0.0 if result == -0.0 else result
49
+
50
+ def validate_document(pdf_path, comparison_sources, method="local", titles=None):
51
+ pdf_text = extract_pdf_text(pdf_path)
52
+ pdf_tokens = preprocess_text(pdf_text)
53
+ results = []
54
+ all_keywords = []
55
+
56
+ for i, doc in enumerate(comparison_sources):
57
+ doc_text = extract_pdf_text(doc) if method == "local" else doc
58
+ doc_tokens = preprocess_text(doc_text)
59
+
60
+ similarity = util.pytorch_cos_sim(
61
+ model.encode(pdf_text, convert_to_tensor=True),
62
+ model.encode(doc_text, convert_to_tensor=True)
63
+ ).item() * 100
64
+
65
+ token_overlap = calculate_token_overlap(" ".join(pdf_tokens), " ".join(doc_tokens))
66
+ oui = calculate_oui(similarity, token_overlap)
67
+ title = titles[i] if titles and i < len(titles) else os.path.basename(doc) if method == "local" else "Unknown Title"
68
+
69
+ common_keywords = list(set(pdf_tokens) & set(doc_tokens))[:5]
70
+ all_keywords.extend(common_keywords)
71
+
72
+ results.append({
73
+ "title": title,
74
+ "similarity": round(similarity, 2),
75
+ "token_overlap": round(token_overlap, 2),
76
+ "oui": round(oui, 2)
77
+ })
78
+
79
+ global last_results, last_common_keywords
80
+ last_results = results
81
+ last_common_keywords = Counter(all_keywords).most_common(10)
82
+ return results
83
+
84
+ def fetch_pubmed_details(article_id):
85
+ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
86
+ params = {"db": "pubmed", "id": article_id, "retmode": "xml"}
87
+ try:
88
+ response = requests.get(base_url, params=params)
89
+ response.raise_for_status()
90
+ import xml.etree.ElementTree as ET
91
+ root = ET.fromstring(response.text)
92
+ title = root.find(".//ArticleTitle").text if root.find(".//ArticleTitle") is not None else "No Title"
93
+ abstract = root.find(".//AbstractText").text if root.find(".//AbstractText") is not None else "No Abstract"
94
+ keywords = root.findall(".//Keyword")
95
+ keyword_text = " ".join([kw.text for kw in keywords if kw.text]) if keywords else ""
96
+ return title, f"{abstract} {keyword_text}"
97
+ except Exception as e:
98
+ print(f"Errore recupero abstract: {e}")
99
+ return "No Title", "No Abstract"
100
+
101
+ def fetch_pubmed(query, year_start, year_end, max_results=10):
102
+ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
103
+ params = {
104
+ "db": "pubmed",
105
+ "term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])",
106
+ "retmax": max_results,
107
+ "retmode": "json",
108
+ "sort": "relevance" # <-- Qui abbiamo ordinato per rilevanza
109
+ }
110
+ try:
111
+ response = requests.get(base_url, params=params)
112
+ response.raise_for_status()
113
+ id_list = response.json().get("esearchresult", {}).get("idlist", [])
114
+ return id_list
115
+ except Exception as e:
116
+ print(f"Errore fetch PubMed: {e}")
117
+ return []
118
+
119
+ @app.route("/")
120
+ def index():
121
+ return render_template("NORUS.html")
122
+
123
+ @app.route("/validate", methods=["POST"])
124
+ def validate():
125
+ pdf_file = request.files.get("pdf_file")
126
+ analysis_type = request.form.get("analysis_type")
127
+ query = request.form.get("query", "").strip()
128
+
129
+ if not pdf_file:
130
+ flash("Carica un file PDF valido.", "error")
131
+ return redirect(url_for("index"))
132
+
133
+ filename = secure_filename(pdf_file.filename)
134
+ pdf_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
135
+ pdf_file.save(pdf_path)
136
+
137
+ if analysis_type == "local":
138
+ comparison_files = request.files.getlist("comparison_files")
139
+ saved_paths = []
140
+ for file in comparison_files:
141
+ if file and file.filename.endswith(".pdf"):
142
+ fname = secure_filename(file.filename)
143
+ path = os.path.join(app.config["UPLOAD_FOLDER"], fname)
144
+ file.save(path)
145
+ saved_paths.append(path)
146
+ if not saved_paths:
147
+ flash("Nessun file di confronto caricato.", "error")
148
+ return redirect(url_for("index"))
149
+ results = validate_document(pdf_path, saved_paths, method="local")
150
+ else:
151
+ year_start = request.form.get("year_start", "2000")
152
+ year_end = request.form.get("year_end", "2025")
153
+ num_articles = int(request.form.get("num_articles", "10"))
154
+ pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
155
+
156
+ if not pubmed_ids:
157
+ flash("Nessun articolo trovato su PubMed per questa ricerca.", "error")
158
+ return redirect(url_for("index"))
159
+
160
+ pubmed_results = [fetch_pubmed_details(id_) for id_ in pubmed_ids]
161
+ pubmed_texts = [r[1] for r in pubmed_results]
162
+ pubmed_titles = [r[0] for r in pubmed_results]
163
+
164
+ results = validate_document(pdf_path, pubmed_texts, method="pubmed", titles=pubmed_titles)
165
+
166
+ return render_template("NORUS.html", results=results, keywords=last_common_keywords)
167
+
168
+ @app.route("/download_report", methods=["POST"])
169
+ def download_report():
170
+ if not last_results:
171
+ flash("Nessun risultato da esportare.", "error")
172
+ return redirect(url_for("index"))
173
+
174
+ pdf = FPDF()
175
+ pdf.add_page()
176
+ pdf.set_font("Arial", "B", 16)
177
+ pdf.cell(0, 10, "NORUS Tool - Report Analisi", ln=True, align="C")
178
+ pdf.ln(10)
179
+ pdf.set_font('Arial', '', 12)
180
+ pdf.multi_cell(0, 10, "Indice OUI = alpha(1 - sim/100) + beta(1 - overlap/100), con alpha = 0.7 e beta = 0.3.\nValori piรน bassi di OUI indicano maggiore similaritร  semantica e testuale.")
181
+ pdf.ln(5)
182
+ pdf.set_font("Arial", "B", 12)
183
+ pdf.cell(90, 10, "Titolo", 1)
184
+ pdf.cell(30, 10, "Sim %", 1)
185
+ pdf.cell(30, 10, "Overlap %", 1)
186
+ pdf.cell(30, 10, "OUI", 1)
187
+ pdf.ln()
188
+
189
+ pdf.set_font("Arial", "", 11)
190
+ for res in last_results:
191
+ title = res["title"][:40] + "..." if len(res["title"]) > 43 else res["title"]
192
+ pdf.cell(90, 10, title, 1)
193
+ pdf.cell(30, 10, str(res["similarity"]), 1)
194
+ pdf.cell(30, 10, str(res["token_overlap"]), 1)
195
+ pdf.cell(30, 10, str(res["oui"]), 1)
196
+ pdf.ln()
197
+
198
+ if last_common_keywords:
199
+ pdf.ln(6)
200
+ pdf.set_font("Arial", "B", 12)
201
+ pdf.cell(0, 10, "Parole chiave comuni:", ln=True)
202
+ pdf.set_font("Arial", "", 11)
203
+ for kw, count in last_common_keywords:
204
+ pdf.cell(0, 10, f"- {kw} ({count})", ln=True)
205
+
206
+ pdf.set_y(-20)
207
+ pdf.set_font("Arial", "I", 9)
208
+ pdf.cell(0, 10, "ยฉ 2025 NORUS Tool", 0, 0, "C")
209
+
210
+ output_path = os.path.join(app.config["UPLOAD_FOLDER"], "NORUS_Report.pdf")
211
+ pdf.output(output_path, 'F')
212
+
213
+ return send_file(output_path, as_attachment=True)
214
+
215
+ if __name__ == "__main__":
216
+ app.run(debug=True, host="0.0.0.0", port=7860)
app_setup.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ echo ">>> Setup NLTK resources..."
4
+ mkdir -p /home/user/nltk_data
5
+ python3 -m nltk.downloader -d /home/user/nltk_data punkt stopwords wordnet
6
+
index.html DELETED
@@ -1,19 +0,0 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/.DS_Store ADDED
Binary file (6.15 kB). View file
 
models/__pycache__/similarity_model.cpython-313.pyc ADDED
Binary file (762 Bytes). View file
 
models/similarity_model.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.feature_extraction.text import TfidfVectorizer
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+
4
+ def compute_similarity(text1, text2):
5
+ """
6
+ Calcola la similaritร  del coseno tra due testi usando TF-IDF.
7
+
8
+ Parametri:
9
+ text1 (str): Primo testo.
10
+ text2 (str): Secondo testo.
11
+
12
+ Ritorna:
13
+ float: Valore di similaritร  (0 a 1).
14
+ """
15
+ try:
16
+ # Verifica che i testi non siano vuoti
17
+ if not text1.strip() or not text2.strip():
18
+ raise ValueError("Uno o entrambi i testi sono vuoti.")
19
+
20
+ # Vettorizzazione con TF-IDF
21
+ vectorizer = TfidfVectorizer(stop_words='english')
22
+ tfidf_matrix = vectorizer.fit_transform([text1, text2])
23
+
24
+ # Calcolo della similaritร  del coseno
25
+ similarity_matrix = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
26
+
27
+ return similarity_matrix[0][0] # Ritorna il valore della similaritร 
28
+
29
+ except Exception as e:
30
+ print(f"Errore durante il calcolo della similaritร : {e}")
31
+ return None
32
+
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fpdf
2
+ flask
3
+ pdfplumber
4
+ nltk
5
+ sentence-transformers
6
+ scikit-learn
7
+ pandas
8
+ reportlab
9
+ matplotlib
10
+ requests
11
+ keybert
12
+ torch
13
+ transformers
14
+ spacy
start_local.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ echo "โš™๏ธ Avvio dell'ambiente NORUS..."
3
+ python3 -m venv venv
4
+ source venv/bin/activate
5
+ pip install --upgrade pip
6
+ pip install -r requirements.txt
7
+ echo "โœ… Ambiente pronto. Avvio del server Flask..."
8
+ python app.py
static/.DS_Store ADDED
Binary file (8.2 kB). View file
 
static/css/.DS_Store ADDED
Binary file (6.15 kB). View file
 
static/css/style.css ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Reset base */
2
+ html, body {
3
+ height: 100%;
4
+ margin: 0;
5
+ padding: 0;
6
+ overflow-y: auto;
7
+ }
8
+
9
+ /* Corpo */
10
+ body {
11
+ font-family: Arial, sans-serif;
12
+ background-color: #f8f8f8;
13
+ color: #333;
14
+ display: flex;
15
+ flex-direction: column;
16
+ min-height: 100vh;
17
+ }
18
+
19
+ /* Header */
20
+ header {
21
+ background-color: rgba(42, 77, 111, 0.9);
22
+ color: #fff;
23
+ padding: 20px;
24
+ text-align: center;
25
+ }
26
+
27
+ header h1 {
28
+ margin-bottom: 10px;
29
+ font-size: 2.2em;
30
+ }
31
+
32
+ header p {
33
+ font-size: 1.2em;
34
+ }
35
+
36
+ /* Logo */
37
+ #logo {
38
+ display: block;
39
+ margin: 0 auto;
40
+ max-width: 200px;
41
+ height: auto;
42
+ cursor: pointer;
43
+ transition: transform 0.3s ease;
44
+ }
45
+
46
+ #logo:hover {
47
+ transform: scale(1.2);
48
+ }
49
+
50
+ /* Form principale */
51
+ form {
52
+ margin: 20px auto;
53
+ width: 90%;
54
+ max-width: 800px;
55
+ padding: 25px;
56
+ background-color: #fff;
57
+ border-radius: 12px;
58
+ box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1);
59
+ }
60
+
61
+ label {
62
+ display: block;
63
+ font-size: 1.05em;
64
+ margin: 12px 0 5px;
65
+ font-weight: bold;
66
+ color: #2a4d6f;
67
+ }
68
+
69
+ input[type="file"],
70
+ input[type="text"],
71
+ input[type="number"],
72
+ select {
73
+ width: 100%;
74
+ padding: 10px;
75
+ margin-bottom: 15px;
76
+ border: 1px solid #ccc;
77
+ border-radius: 6px;
78
+ font-size: 1em;
79
+ box-sizing: border-box;
80
+ }
81
+
82
+ /* Input file */
83
+ input[type="file"]::file-selector-button {
84
+ padding: 6px 12px;
85
+ margin-right: 10px;
86
+ background-color: #2a4d6f;
87
+ color: white;
88
+ border: none;
89
+ border-radius: 5px;
90
+ cursor: pointer;
91
+ transition: background-color 0.3s;
92
+ }
93
+
94
+ input[type="file"]::file-selector-button:hover {
95
+ background-color: #1a3d56;
96
+ }
97
+
98
+ /* Pulsanti */
99
+ button {
100
+ width: 100%;
101
+ background-color: #2a4d6f;
102
+ color: #fff;
103
+ padding: 12px;
104
+ border: none;
105
+ border-radius: 6px;
106
+ font-size: 1.1em;
107
+ cursor: pointer;
108
+ transition: background-color 0.3s, transform 0.2s;
109
+ margin-top: 10px;
110
+ }
111
+
112
+ button:hover {
113
+ background-color: #1a3d56;
114
+ transform: scale(1.02);
115
+ }
116
+
117
+ /* Risultati */
118
+ .results {
119
+ padding: 25px;
120
+ background-color: #fff;
121
+ margin: 30px auto;
122
+ border-radius: 12px;
123
+ box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1);
124
+ max-width: 1000px;
125
+ overflow-x: auto;
126
+ }
127
+
128
+ /* Tabelle */
129
+ table {
130
+ width: 100%;
131
+ border-collapse: collapse;
132
+ margin-top: 25px;
133
+ }
134
+
135
+ th {
136
+ background-color: #2a4d6f;
137
+ color: #fff;
138
+ padding: 12px;
139
+ text-align: left;
140
+ }
141
+
142
+ td {
143
+ padding: 12px;
144
+ border-bottom: 1px solid #ddd;
145
+ background-color: #f9f9f9;
146
+ }
147
+
148
+ table tr:hover {
149
+ background-color: #eef3f7;
150
+ }
151
+
152
+ table th, table td {
153
+ font-size: 1em;
154
+ word-wrap: break-word;
155
+ }
156
+
157
+ /* Grafico */
158
+ #chart-container {
159
+ width: 100%;
160
+ max-width: 1000px;
161
+ height: 500px;
162
+ margin: 40px auto;
163
+ }
164
+
165
+ canvas {
166
+ width: 100% !important;
167
+ height: 100% !important;
168
+ display: block;
169
+ }
170
+
171
+ /* Barra di caricamento */
172
+ #progress-container {
173
+ width: 100%;
174
+ background-color: #e0e0e0;
175
+ border-radius: 20px;
176
+ overflow: hidden;
177
+ margin-top: 20px;
178
+ }
179
+
180
+ #progress-bar {
181
+ height: 20px;
182
+ width: 0;
183
+ background: linear-gradient(90deg, #4caf50 0%, #8bc34a 100%);
184
+ text-align: center;
185
+ line-height: 20px;
186
+ color: white;
187
+ font-weight: bold;
188
+ transition: width 0.4s ease;
189
+ }
190
+
191
+ /* Quando al 100%, barra diventa blu */
192
+ #progress-bar.complete {
193
+ background: linear-gradient(90deg, #2196f3 0%, #21cbf3 100%);
194
+ }
195
+
196
+ /* Footer */
197
+ footer {
198
+ background-color: #2a4d6f;
199
+ color: #fff;
200
+ text-align: center;
201
+ padding: 15px;
202
+ width: 100%;
203
+ font-size: 1em;
204
+ margin-top: auto;
205
+ }
206
+
207
+ /* Responsive layout */
208
+ @media screen and (max-width: 600px) {
209
+ form, .results {
210
+ width: 95%;
211
+ padding: 15px;
212
+ }
213
+
214
+ header h1 {
215
+ font-size: 1.5em;
216
+ }
217
+
218
+ header p {
219
+ font-size: 1em;
220
+ }
221
+ }
static/js/.DS_Store ADDED
Binary file (6.15 kB). View file
 
static/js/script.js ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ document.addEventListener("DOMContentLoaded", function () {
2
+ const logoLink = document.getElementById("logo-link");
3
+ if (logoLink) {
4
+ logoLink.addEventListener("click", function () {
5
+ const logo = document.getElementById("logo");
6
+ logo.style.transform = "scale(1.5)";
7
+ setTimeout(() => {
8
+ logo.style.transform = "scale(1)";
9
+ }, 500);
10
+ });
11
+ }
12
+
13
+ function startProgress() {
14
+ const progressBar = document.getElementById("progress-bar");
15
+ const progressContainer = document.getElementById("progress-container");
16
+ const analyzeBtn = document.querySelector("button[type='submit']");
17
+
18
+ if (progressBar && progressContainer && analyzeBtn) {
19
+ progressContainer.style.display = "block";
20
+ analyzeBtn.disabled = true;
21
+ analyzeBtn.textContent = "โณ Analysis in progress...";
22
+
23
+ let width = 0;
24
+ const totalTime = 180000; // 3 minutes
25
+ const intervalTime = totalTime / 100;
26
+
27
+ const interval = setInterval(() => {
28
+ if (width >= 100) {
29
+ clearInterval(interval);
30
+ progressBar.textContent = "100%";
31
+ setTimeout(() => {
32
+ progressContainer.style.display = "none";
33
+ progressBar.style.width = "0%";
34
+ progressBar.textContent = "0%";
35
+ analyzeBtn.disabled = false;
36
+ analyzeBtn.textContent = "Analyze";
37
+ }, 1000);
38
+ } else {
39
+ width += 1;
40
+ progressBar.style.width = width + "%";
41
+ progressBar.textContent = width + "%";
42
+ }
43
+ }, intervalTime);
44
+
45
+ // fallback di sicurezza
46
+ setTimeout(() => {
47
+ analyzeBtn.disabled = false;
48
+ analyzeBtn.textContent = "Analyze";
49
+ progressContainer.style.display = "none";
50
+ progressBar.style.width = "0%";
51
+ progressBar.textContent = "0%";
52
+ }, totalTime + 3000);
53
+ }
54
+ }
55
+
56
+ window.startProgress = startProgress;
57
+
58
+ const analysisForm = document.getElementById("analysisForm");
59
+ if (analysisForm) {
60
+ analysisForm.addEventListener("submit", function () {
61
+ startProgress();
62
+ });
63
+ }
64
+
65
+ const analysisType = document.getElementById("analysis_type");
66
+ if (analysisType) {
67
+ analysisType.addEventListener("change", function () {
68
+ document.getElementById("pubmed-options").style.display =
69
+ this.value === "pubmed" ? "block" : "none";
70
+ document.getElementById("local-options").style.display =
71
+ this.value === "local" ? "block" : "none";
72
+ });
73
+ analysisType.dispatchEvent(new Event("change"));
74
+ }
75
+
76
+ const fileInput = document.getElementById("pdf_file");
77
+ if (fileInput) {
78
+ fileInput.addEventListener("change", function () {
79
+ const fileLabel = document.querySelector('label[for="pdf_file"]');
80
+ if (fileInput.files.length > 0 && fileLabel) {
81
+ fileLabel.textContent = `Main PDF selected: ${fileInput.files[0].name}`;
82
+ }
83
+ });
84
+ }
85
+
86
+ const comparisonInput = document.getElementById("comparison_files");
87
+ if (comparisonInput) {
88
+ comparisonInput.addEventListener("change", function () {
89
+ const label = document.querySelector('label[for="comparison_files"]');
90
+ if (comparisonInput.files.length > 0 && label) {
91
+ label.textContent = `${comparisonInput.files.length} comparison files selected`;
92
+ }
93
+ });
94
+ }
95
+
96
+ const flashMessages = document.querySelectorAll(".error");
97
+ if (flashMessages.length > 0) {
98
+ setTimeout(() => {
99
+ flashMessages.forEach(message => message.remove());
100
+ }, 5000);
101
+ }
102
+ });
style.css DELETED
@@ -1,28 +0,0 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
- }
5
-
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
- }
10
-
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
- }
17
-
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
- }
25
-
26
- .card p:last-child {
27
- margin-bottom: 0;
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
templates/.DS_Store ADDED
Binary file (6.15 kB). View file
 
templates/NORUS.html ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
6
+ <title>NORUS Tool</title>
7
+ <link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
8
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
9
+ <script src="{{ url_for('static', filename='js/script.js') }}"></script>
10
+ </head>
11
+ <body>
12
+ <header>
13
+ <div style="text-align: center; margin-top: 20px;">
14
+ <a href="#" id="logo-link">
15
+ <img id="logo" src="https://i.imgur.com/MT5Sl9h.png" alt="NORUS Logo" style="width: 150px;" />
16
+ </a>
17
+ </div>
18
+ <h1>NORUS Tool</h1>
19
+ <p>Analyze your PDF and discover originality and similarity</p>
20
+ </header>
21
+
22
+ <main>
23
+ <form id="analysisForm" action="/validate" method="POST" enctype="multipart/form-data" onsubmit="startProgress()">
24
+ <label for="analysis_type">Choose Analysis Type:</label>
25
+ <select name="analysis_type" id="analysis_type" required>
26
+ <option value="local">Local Comparison</option>
27
+ <option value="pubmed">PubMed Search</option>
28
+ </select>
29
+
30
+ <div id="pubmed-options" style="display: none;">
31
+ <label for="query">PubMed Query:</label>
32
+ <input type="text" name="query" id="query" />
33
+
34
+ <label for="year_start">Start Year:</label>
35
+ <input type="number" name="year_start" id="year_start" min="1900" max="2025" value="2000" />
36
+
37
+ <label for="year_end">End Year:</label>
38
+ <input type="number" name="year_end" id="year_end" min="1900" max="2025" value="2025" />
39
+
40
+ <label for="num_articles">Number of Articles:</label>
41
+ <input type="number" name="num_articles" id="num_articles" min="1" value="10" />
42
+ </div>
43
+
44
+ <div id="local-options" style="display: none;">
45
+ <label for="comparison_files">Upload comparison PDFs (select multiple):</label>
46
+ <input type="file" name="comparison_files" id="comparison_files" multiple />
47
+ </div>
48
+
49
+ <label for="pdf_file">Upload your main PDF:</label>
50
+ <input type="file" name="pdf_file" id="pdf_file" required />
51
+
52
+ <button type="submit">Analyze</button>
53
+ </form>
54
+
55
+ <div id="progress-container" style="display: none;">
56
+ <p style="text-align: center;">โณ Analysis in progress...</p>
57
+ <div id="progress-bar">0%</div>
58
+ </div>
59
+
60
+ {% if results %}
61
+ <section>
62
+ <h2>Analysis Results</h2>
63
+ <table>
64
+ <thead>
65
+ <tr>
66
+ <th>Title</th>
67
+ <th>Semantic Similarity (%)</th>
68
+ <th>Token Overlap (%)</th>
69
+ <th>OUI (Originality & Uniqueness Index)</th>
70
+ </tr>
71
+ </thead>
72
+ <tbody>
73
+ {% for result in results %}
74
+ <tr>
75
+ <td style="max-width: 400px; word-wrap: break-word;">{{ result.title }}</td>
76
+ <td>{{ "%.2f"|format(result.similarity) }}</td>
77
+ <td>{{ "%.2f"|format(result.token_overlap) }}</td>
78
+ <td>{{ "%.2f"|format(result.oui) }}</td>
79
+ </tr>
80
+ {% endfor %}
81
+ </tbody>
82
+ </table>
83
+
84
+ {% if keywords %}
85
+ <div class="results" style="text-align: center; margin-top: 30px;">
86
+ <h3>๐Ÿ”‘ Common Keywords</h3>
87
+ <p>
88
+ {% for kw, count in keywords %}
89
+ <span style="margin: 5px; font-weight: bold;">{{ kw }} ({{ count }})</span>
90
+ {% endfor %}
91
+ </p>
92
+ </div>
93
+ {% endif %}
94
+
95
+ <form action="/download_report" method="post" style="text-align: center; margin-top: 30px;">
96
+ <button type="submit">๐Ÿ“„ Download PDF Report</button>
97
+ </form>
98
+
99
+ <div id="chart-container" style="margin-top: 50px;">
100
+ <canvas id="similarityChart"></canvas>
101
+ </div>
102
+ </section>
103
+ {% endif %}
104
+ </main>
105
+
106
+ <footer><p>&copy; 2025 NORUS Tool. All rights reserved.</p></footer>
107
+
108
+ <script>
109
+ document.addEventListener("DOMContentLoaded", function() {
110
+ const analysisType = document.getElementById("analysis_type");
111
+ const pubmedOptions = document.getElementById("pubmed-options");
112
+ const localOptions = document.getElementById("local-options");
113
+
114
+ function toggleOptions() {
115
+ if (analysisType.value === "pubmed") {
116
+ pubmedOptions.style.display = "block";
117
+ localOptions.style.display = "none";
118
+ } else {
119
+ pubmedOptions.style.display = "none";
120
+ localOptions.style.display = "block";
121
+ }
122
+ }
123
+
124
+ analysisType.addEventListener("change", toggleOptions);
125
+ toggleOptions();
126
+ });
127
+ </script>
128
+
129
+ {% if results %}
130
+ <script>
131
+ new Chart(document.getElementById('similarityChart'), {
132
+ type: 'bar',
133
+ data: {
134
+ labels: {{ results | map(attribute='title') | list | safe }},
135
+ datasets: [
136
+ {
137
+ label: 'Semantic Similarity (%)',
138
+ data: {{ results | map(attribute='similarity') | list | safe }},
139
+ backgroundColor: 'rgba(54, 162, 235, 0.7)',
140
+ borderColor: 'rgba(54, 162, 235, 1)',
141
+ borderWidth: 1
142
+ },
143
+ {
144
+ label: 'Token Overlap (%)',
145
+ data: {{ results | map(attribute='token_overlap') | list | safe }},
146
+ backgroundColor: 'rgba(255, 159, 64, 0.7)',
147
+ borderColor: 'rgba(255, 159, 64, 1)',
148
+ borderWidth: 1
149
+ },
150
+ {
151
+ label: 'OUI (%)',
152
+ data: {{ results | map(attribute='oui') | list | safe }},
153
+ backgroundColor: 'rgba(153, 102, 255, 0.7)',
154
+ borderColor: 'rgba(153, 102, 255, 1)',
155
+ borderWidth: 1
156
+ }
157
+ ]
158
+ },
159
+ options: {
160
+ responsive: true,
161
+ plugins: {
162
+ legend: { position: 'top' },
163
+ tooltip: { mode: 'index', intersect: false }
164
+ },
165
+ scales: {
166
+ y: { beginAtZero: true },
167
+ x: {
168
+ ticks: {
169
+ autoSkip: false,
170
+ maxRotation: 45,
171
+ minRotation: 45
172
+ }
173
+ }
174
+ }
175
+ }
176
+ });
177
+ </script>
178
+ {% endif %}
179
+ </body>
180
+ </html>
templates/app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import pdfplumber
4
+ import numpy as np
5
+ from flask import Flask, render_template, request, redirect, url_for, flash
6
+ from werkzeug.utils import secure_filename
7
+ from sentence_transformers import SentenceTransformer, util
8
+ import nltk
9
+ from nltk.stem import WordNetLemmatizer, PorterStemmer
10
+ from nltk.tokenize import word_tokenize
11
+ from nltk.corpus import stopwords
12
+
13
+ nltk.download("punkt")
14
+ nltk.download("wordnet")
15
+ nltk.download("stopwords")
16
+
17
+ lemmatizer = WordNetLemmatizer()
18
+ stemmer = PorterStemmer()
19
+ stop_words = set(stopwords.words("english"))
20
+
21
+ app = Flask(__name__)
22
+ app.config["UPLOAD_FOLDER"] = "uploads"
23
+ os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)
24
+
25
+ model = SentenceTransformer("allenai/scibert_scivocab_uncased")
26
+
27
+ def extract_pdf_text(pdf_path):
28
+ text = ""
29
+ try:
30
+ with pdfplumber.open(pdf_path) as pdf:
31
+ for page in pdf.pages:
32
+ text += page.extract_text() or " "
33
+ except Exception as e:
34
+ print(f"Errore estrazione testo: {e}")
35
+ return text.lower().strip()
36
+
37
+ def preprocess_text(text):
38
+ text = text.lower()
39
+ words = word_tokenize(text)
40
+ words = [stemmer.stem(lemmatizer.lemmatize(w)) for w in words if w.isalnum() and w not in stop_words]
41
+ return " ".join(words)
42
+
43
+ def calculate_token_overlap(text1, text2):
44
+ tokens1 = set(text1.split())
45
+ tokens2 = set(text2.split())
46
+ overlap = len(tokens1 & tokens2)
47
+ return round((overlap / max(len(tokens1), 1)) * 100, 2)
48
+
49
+ def calculate_oui(similarity, token_overlap, alpha=0.7, beta=0.3):
50
+ oui = alpha * (1 - similarity / 100) + beta * (1 - token_overlap / 100)
51
+ return round(max(0, min(oui * 100, 100)), 2)
52
+
53
+ def validate_document(pdf_path, comparison_sources, method="local", titles=None):
54
+ pdf_text = extract_pdf_text(pdf_path)
55
+ results = []
56
+ for i, doc in enumerate(comparison_sources):
57
+ doc_text = extract_pdf_text(doc) if method == "local" else doc
58
+ similarity = util.pytorch_cos_sim(
59
+ model.encode(pdf_text, convert_to_tensor=True),
60
+ model.encode(doc_text, convert_to_tensor=True)
61
+ ).item() * 100
62
+ token_overlap = calculate_token_overlap(pdf_text, doc_text)
63
+ oui = calculate_oui(similarity, token_overlap)
64
+ title = titles[i] if titles and i < len(titles) else os.path.basename(doc) if method == "local" else "Unknown Title"
65
+ results.append({"title": title, "similarity": round(similarity, 2), "token_overlap": round(token_overlap, 2), "oui": round(oui, 2)})
66
+ return results
67
+
68
+ def fetch_pubmed_details(article_id):
69
+ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
70
+ params = {"db": "pubmed", "id": article_id, "retmode": "xml"}
71
+ try:
72
+ response = requests.get(base_url, params=params)
73
+ response.raise_for_status()
74
+ import xml.etree.ElementTree as ET
75
+ root = ET.fromstring(response.text)
76
+ title = root.find(".//ArticleTitle").text if root.find(".//ArticleTitle") is not None else "No Title"
77
+ abstract = root.find(".//AbstractText").text if root.find(".//AbstractText") is not None else "No Abstract"
78
+ keywords = root.findall(".//Keyword")
79
+ keyword_text = " ".join([kw.text for kw in keywords if kw.text]) if keywords else "No Keywords"
80
+ print(f"\n๐Ÿ” ARTICOLO RECUPERATO\n๐Ÿ“– Titolo: {title}\n๐Ÿ“ Abstract: {abstract[:500]}...\n๐Ÿ”‘ Keywords: {keyword_text}\n")
81
+ return title, f"{abstract} {keyword_text}"
82
+ except requests.exceptions.RequestException as e:
83
+ print(f"Errore recupero abstract: {e}")
84
+ return "No Title", "No Abstract"
85
+
86
+ def fetch_pubmed(query, year_start, year_end, max_results=10):
87
+ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
88
+ params = {"db": "pubmed", "term": f"{query} AND ({year_start}[PDAT] : {year_end}[PDAT])", "retmax": max_results, "retmode": "json"}
89
+ try:
90
+ response = requests.get(base_url, params=params)
91
+ response.raise_for_status()
92
+ return response.json().get("esearchresult", {}).get("idlist", [])
93
+ except requests.exceptions.RequestException as e:
94
+ print(f"Errore recupero articoli PubMed: {e}")
95
+
96
+ return []
97
+
98
+ @app.route("/")
99
+ def index():
100
+ return render_template("NORUS.html")
101
+
102
+ @app.route("/validate", methods=["POST"])
103
+ def validate():
104
+ pdf_file = request.files.get("pdf_file")
105
+ analysis_type = request.form.get("analysis_type")
106
+ local_dir = request.form.get("local_directory", "").strip()
107
+ query = request.form.get("query", "").strip()
108
+ if not pdf_file:
109
+ flash("Carica un file PDF valido.", "error")
110
+ return redirect(url_for("index"))
111
+ filename = secure_filename(pdf_file.filename)
112
+ pdf_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
113
+ pdf_file.save(pdf_path)
114
+ results = []
115
+ if analysis_type == "local":
116
+ if not os.path.isdir(local_dir):
117
+ flash("Seleziona una directory valida.", "error")
118
+ return redirect(url_for("index"))
119
+ comparison_files = [os.path.join(local_dir, f) for f in os.listdir(local_dir) if f.endswith(".pdf")]
120
+ if not comparison_files:
121
+ flash("La directory non contiene PDF.", "error")
122
+ return redirect(url_for("index"))
123
+ results = validate_document(pdf_path, comparison_files, method="local")
124
+ elif analysis_type == "pubmed":
125
+ year_start = request.form.get("year_start", "2000")
126
+ year_end = request.form.get("year_end", "2025")
127
+ num_articles = int(request.form.get("num_articles", "10"))
128
+ pubmed_ids = fetch_pubmed(query, year_start, year_end, num_articles)
129
+ pubmed_results = [fetch_pubmed_details(article_id) for article_id in pubmed_ids]
130
+ results = validate_document(pdf_path, [result[1] for result in pubmed_results], method="pubmed", titles=[result[0] for result in pubmed_results])
131
+ return render_template("NORUS.html", results=results)
132
+
133
+ if __name__ == "__main__":
134
+ app.run(debug=True, port=7860)