pentarosarium commited on
Commit
b4b8d2a
·
1 Parent(s): 7e00fac

progress more 20..

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -108,12 +108,14 @@ def fuzzy_deduplicate(df, column, threshold=65):
108
  def process_file(uploaded_file):
109
  df = pd.read_excel(uploaded_file, sheet_name='Публикации')
110
 
 
 
111
  # Apply fuzzy deduplication
112
  df = df.groupby('Объект').apply(
113
  lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
114
  ).reset_index(drop=True)
115
 
116
- original_news_count = len(pre_df)
117
  remaining_news_count = len(df)
118
  duplicates_removed = original_news_count - remaining_news_count
119
 
 
108
  def process_file(uploaded_file):
109
  df = pd.read_excel(uploaded_file, sheet_name='Публикации')
110
 
111
+ original_news_count = len(df)
112
+
113
  # Apply fuzzy deduplication
114
  df = df.groupby('Объект').apply(
115
  lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
116
  ).reset_index(drop=True)
117
 
118
+
119
  remaining_news_count = len(df)
120
  duplicates_removed = original_news_count - remaining_news_count
121