Fix: Improved OUI calculation and report in English, added article scoring, and optimized article selection
Browse files
app.py
CHANGED
@@ -80,7 +80,9 @@ def calculate_token_overlap(text1, text2):
|
|
80 |
def calculate_oui(similarity, token_overlap, alpha=0.7, beta=0.3):
|
81 |
oui = alpha * (1 - similarity / 100) + beta * (1 - token_overlap / 100)
|
82 |
result = round(oui * 100, 2)
|
83 |
-
|
|
|
|
|
84 |
|
85 |
# Funzione di scoring degli articoli
|
86 |
def score_article(article, pdf_text):
|
@@ -118,7 +120,8 @@ def validate_document(pdf_path, comparison_sources, method="local", titles=None,
|
|
118 |
'text': doc_text
|
119 |
}
|
120 |
score = score_article(article, pdf_text)
|
121 |
-
|
|
|
122 |
|
123 |
# Ordina gli articoli in base al punteggio
|
124 |
scored_articles.sort(reverse=True, key=lambda x: x[0])
|
@@ -263,13 +266,13 @@ def download_report():
|
|
263 |
pdf = FPDF()
|
264 |
pdf.add_page()
|
265 |
pdf.set_font("Arial", "B", 16)
|
266 |
-
pdf.cell(0, 10, "NORUS Tool - Report
|
267 |
pdf.ln(10)
|
268 |
pdf.set_font('Arial', '', 12)
|
269 |
-
pdf.multi_cell(0, 10, "
|
270 |
pdf.ln(5)
|
271 |
pdf.set_font("Arial", "B", 12)
|
272 |
-
pdf.cell(90, 10, "
|
273 |
pdf.cell(30, 10, "Sim %", 1)
|
274 |
pdf.cell(30, 10, "Overlap %", 1)
|
275 |
pdf.cell(30, 10, "OUI", 1)
|
@@ -287,7 +290,7 @@ def download_report():
|
|
287 |
if last_common_keywords:
|
288 |
pdf.ln(6)
|
289 |
pdf.set_font("Arial", "B", 12)
|
290 |
-
pdf.cell(0, 10, "
|
291 |
pdf.set_font("Arial", "", 11)
|
292 |
for kw, count in last_common_keywords:
|
293 |
pdf.cell(0, 10, f"- {kw} ({count})", ln=True)
|
@@ -302,4 +305,4 @@ def download_report():
|
|
302 |
return send_file(output_path, as_attachment=True)
|
303 |
|
304 |
if __name__ == "__main__":
|
305 |
-
app.run(debug=True, host="0.0.0.0", port=7860)
|
|
|
80 |
def calculate_oui(similarity, token_overlap, alpha=0.7, beta=0.3):
|
81 |
oui = alpha * (1 - similarity / 100) + beta * (1 - token_overlap / 100)
|
82 |
result = round(oui * 100, 2)
|
83 |
+
if result < 0:
|
84 |
+
result = 0 # Limita il valore a 0 se diventa negativo
|
85 |
+
return result
|
86 |
|
87 |
# Funzione di scoring degli articoli
|
88 |
def score_article(article, pdf_text):
|
|
|
120 |
'text': doc_text
|
121 |
}
|
122 |
score = score_article(article, pdf_text)
|
123 |
+
if score <= 100: # Esclude articoli con punteggi anomali
|
124 |
+
scored_articles.append((score, article))
|
125 |
|
126 |
# Ordina gli articoli in base al punteggio
|
127 |
scored_articles.sort(reverse=True, key=lambda x: x[0])
|
|
|
266 |
pdf = FPDF()
|
267 |
pdf.add_page()
|
268 |
pdf.set_font("Arial", "B", 16)
|
269 |
+
pdf.cell(0, 10, "NORUS Tool - Report Analysis", ln=True, align="C")
|
270 |
pdf.ln(10)
|
271 |
pdf.set_font('Arial', '', 12)
|
272 |
+
pdf.multi_cell(0, 10, "OUI Index = alpha(1 - similarity/100) + beta(1 - overlap/100), with alpha = 0.7 and beta = 0.3.\nLower OUI values indicate higher semantic and textual similarity.")
|
273 |
pdf.ln(5)
|
274 |
pdf.set_font("Arial", "B", 12)
|
275 |
+
pdf.cell(90, 10, "Title", 1)
|
276 |
pdf.cell(30, 10, "Sim %", 1)
|
277 |
pdf.cell(30, 10, "Overlap %", 1)
|
278 |
pdf.cell(30, 10, "OUI", 1)
|
|
|
290 |
if last_common_keywords:
|
291 |
pdf.ln(6)
|
292 |
pdf.set_font("Arial", "B", 12)
|
293 |
+
pdf.cell(0, 10, "Common Keywords:", ln=True)
|
294 |
pdf.set_font("Arial", "", 11)
|
295 |
for kw, count in last_common_keywords:
|
296 |
pdf.cell(0, 10, f"- {kw} ({count})", ln=True)
|
|
|
305 |
return send_file(output_path, as_attachment=True)
|
306 |
|
307 |
if __name__ == "__main__":
|
308 |
+
app.run(debug=True, host="0.0.0.0", port=7860)
|