pentarosarium commited on
Commit
8a76f7a
·
1 Parent(s): b719885
Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -321,19 +321,26 @@ def create_download_link(df: pd.DataFrame, filename: str) -> str:
321
  return f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{filename}">Download {filename}</a>'
322
 
323
  def main():
324
- st.title("кластеризуем новости v.1.9")
325
  st.write("Upload Excel file with columns: company, datetime, text")
326
 
327
  uploaded_file = st.file_uploader("Choose Excel file", type=['xlsx'])
328
 
329
  if uploaded_file:
330
  try:
331
- # Read all columns from original sheet
332
  df_original = pd.read_excel(uploaded_file, sheet_name='Публикации')
 
333
 
334
  # Create working copy with required columns
335
  df = df_original.copy()
336
- df = df.iloc[:, [0,3,5,6]] # columns for company, datetime, title, text
 
 
 
 
 
 
337
  df.columns = ['company', 'datetime', 'title', 'text']
338
 
339
  st.success(f'Loaded {len(df)} records')
@@ -370,7 +377,7 @@ def main():
370
  # Get original indices from cluster
371
  cluster_indices = cluster_rows.index
372
  # Find the index with longest text
373
- text_lengths = df_original.iloc[cluster_indices]['text'].str.len()
374
  longest_text_idx = cluster_indices[text_lengths.argmax()]
375
  indices_to_keep.append(longest_text_idx)
376
 
@@ -406,11 +413,16 @@ def main():
406
 
407
  except Exception as e:
408
  st.error(f"Error: {str(e)}")
 
 
 
409
  finally:
410
  progress_bar.empty()
411
 
412
  except Exception as e:
413
  st.error(f"Error reading file: {str(e)}")
 
 
414
 
415
  if __name__ == "__main__":
416
  main()
 
321
  return f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{filename}">Download {filename}</a>'
322
 
323
  def main():
324
+ st.title("кластеризуем новости v.1.10")
325
  st.write("Upload Excel file with columns: company, datetime, text")
326
 
327
  uploaded_file = st.file_uploader("Choose Excel file", type=['xlsx'])
328
 
329
  if uploaded_file:
330
  try:
331
+ # First, let's look at the columns in the file
332
  df_original = pd.read_excel(uploaded_file, sheet_name='Публикации')
333
+ st.write("Available columns:", df_original.columns.tolist())
334
 
335
  # Create working copy with required columns
336
  df = df_original.copy()
337
+ # Assuming the order is fixed in the Excel file, adjust indices if needed
338
+ text_column = df_original.columns[6] # Adjust if needed
339
+ title_column = df_original.columns[5] # Adjust if needed
340
+ datetime_column = df_original.columns[3] # Adjust if needed
341
+ company_column = df_original.columns[0] # Adjust if needed
342
+
343
+ df = df_original[[company_column, datetime_column, title_column, text_column]].copy()
344
  df.columns = ['company', 'datetime', 'title', 'text']
345
 
346
  st.success(f'Loaded {len(df)} records')
 
377
  # Get original indices from cluster
378
  cluster_indices = cluster_rows.index
379
  # Find the index with longest text
380
+ text_lengths = df_original.iloc[cluster_indices][text_column].str.len()
381
  longest_text_idx = cluster_indices[text_lengths.argmax()]
382
  indices_to_keep.append(longest_text_idx)
383
 
 
413
 
414
  except Exception as e:
415
  st.error(f"Error: {str(e)}")
416
+ # Print more detailed error information
417
+ import traceback
418
+ st.error(traceback.format_exc())
419
  finally:
420
  progress_bar.empty()
421
 
422
  except Exception as e:
423
  st.error(f"Error reading file: {str(e)}")
424
+ import traceback
425
+ st.error(traceback.format_exc())
426
 
427
  if __name__ == "__main__":
428
  main()