mabil commited on
Commit
94bfde7
·
1 Parent(s): 6505fd8

Fix: Improved OUI calculation and report in English, added article scoring, and optimized article selection

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -80,7 +80,9 @@ def calculate_token_overlap(text1, text2):
80
  def calculate_oui(similarity, token_overlap, alpha=0.7, beta=0.3):
81
  oui = alpha * (1 - similarity / 100) + beta * (1 - token_overlap / 100)
82
  result = round(oui * 100, 2)
83
- return 0.0 if result == -0.0 else result
 
 
84
 
85
  # Funzione di scoring degli articoli
86
  def score_article(article, pdf_text):
@@ -118,7 +120,8 @@ def validate_document(pdf_path, comparison_sources, method="local", titles=None,
118
  'text': doc_text
119
  }
120
  score = score_article(article, pdf_text)
121
- scored_articles.append((score, article))
 
122
 
123
  # Ordina gli articoli in base al punteggio
124
  scored_articles.sort(reverse=True, key=lambda x: x[0])
@@ -263,13 +266,13 @@ def download_report():
263
  pdf = FPDF()
264
  pdf.add_page()
265
  pdf.set_font("Arial", "B", 16)
266
- pdf.cell(0, 10, "NORUS Tool - Report Analisi", ln=True, align="C")
267
  pdf.ln(10)
268
  pdf.set_font('Arial', '', 12)
269
- pdf.multi_cell(0, 10, "Indice OUI = alpha(1 - sim/100) + beta(1 - overlap/100), con alpha = 0.7 e beta = 0.3.\nValori più bassi di OUI indicano maggiore similarità semantica e testuale.")
270
  pdf.ln(5)
271
  pdf.set_font("Arial", "B", 12)
272
- pdf.cell(90, 10, "Titolo", 1)
273
  pdf.cell(30, 10, "Sim %", 1)
274
  pdf.cell(30, 10, "Overlap %", 1)
275
  pdf.cell(30, 10, "OUI", 1)
@@ -287,7 +290,7 @@ def download_report():
287
  if last_common_keywords:
288
  pdf.ln(6)
289
  pdf.set_font("Arial", "B", 12)
290
- pdf.cell(0, 10, "Parole chiave comuni:", ln=True)
291
  pdf.set_font("Arial", "", 11)
292
  for kw, count in last_common_keywords:
293
  pdf.cell(0, 10, f"- {kw} ({count})", ln=True)
@@ -302,4 +305,4 @@ def download_report():
302
  return send_file(output_path, as_attachment=True)
303
 
304
  if __name__ == "__main__":
305
- app.run(debug=True, host="0.0.0.0", port=7860)
 
80
  def calculate_oui(similarity, token_overlap, alpha=0.7, beta=0.3):
81
  oui = alpha * (1 - similarity / 100) + beta * (1 - token_overlap / 100)
82
  result = round(oui * 100, 2)
83
+ if result < 0:
84
+ result = 0 # Limita il valore a 0 se diventa negativo
85
+ return result
86
 
87
  # Funzione di scoring degli articoli
88
  def score_article(article, pdf_text):
 
120
  'text': doc_text
121
  }
122
  score = score_article(article, pdf_text)
123
+ if score <= 100: # Esclude articoli con punteggi anomali
124
+ scored_articles.append((score, article))
125
 
126
  # Ordina gli articoli in base al punteggio
127
  scored_articles.sort(reverse=True, key=lambda x: x[0])
 
266
  pdf = FPDF()
267
  pdf.add_page()
268
  pdf.set_font("Arial", "B", 16)
269
+ pdf.cell(0, 10, "NORUS Tool - Report Analysis", ln=True, align="C")
270
  pdf.ln(10)
271
  pdf.set_font('Arial', '', 12)
272
+ pdf.multi_cell(0, 10, "OUI Index = alpha(1 - similarity/100) + beta(1 - overlap/100), with alpha = 0.7 and beta = 0.3.\nLower OUI values indicate higher semantic and textual similarity.")
273
  pdf.ln(5)
274
  pdf.set_font("Arial", "B", 12)
275
+ pdf.cell(90, 10, "Title", 1)
276
  pdf.cell(30, 10, "Sim %", 1)
277
  pdf.cell(30, 10, "Overlap %", 1)
278
  pdf.cell(30, 10, "OUI", 1)
 
290
  if last_common_keywords:
291
  pdf.ln(6)
292
  pdf.set_font("Arial", "B", 12)
293
+ pdf.cell(0, 10, "Common Keywords:", ln=True)
294
  pdf.set_font("Arial", "", 11)
295
  for kw, count in last_common_keywords:
296
  pdf.cell(0, 10, f"- {kw} ({count})", ln=True)
 
305
  return send_file(output_path, as_attachment=True)
306
 
307
  if __name__ == "__main__":
308
+ app.run(debug=True, host="0.0.0.0", port=7860)