Spaces:

pentarosarium
/

clusters

Sleeping

App Files Files Community

pentarosarium commited on Nov 29, 2024

Commit

8a76f7a

1 Parent(s): b719885

1.10

Browse files

Files changed (1) hide show

app.py +16 -4

app.py CHANGED Viewed

@@ -321,19 +321,26 @@ def create_download_link(df: pd.DataFrame, filename: str) -> str:
     return f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{filename}">Download {filename}</a>'
 def main():
-    st.title("кластеризуем новости v.1.9")
     st.write("Upload Excel file with columns: company, datetime, text")
     uploaded_file = st.file_uploader("Choose Excel file", type=['xlsx'])
     if uploaded_file:
         try:
-            # Read all columns from original sheet
             df_original = pd.read_excel(uploaded_file, sheet_name='Публикации')
             # Create working copy with required columns
             df = df_original.copy()
-            df = df.iloc[:, [0,3,5,6]]  # columns for company, datetime, title, text
             df.columns = ['company', 'datetime', 'title', 'text']
             st.success(f'Loaded {len(df)} records')
@@ -370,7 +377,7 @@ def main():
                         # Get original indices from cluster
                         cluster_indices = cluster_rows.index
                         # Find the index with longest text
-                        text_lengths = df_original.iloc[cluster_indices]['text'].str.len()
                         longest_text_idx = cluster_indices[text_lengths.argmax()]
                         indices_to_keep.append(longest_text_idx)
@@ -406,11 +413,16 @@ def main():
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
                 finally:
                     progress_bar.empty()
         except Exception as e:
             st.error(f"Error reading file: {str(e)}")
 if __name__ == "__main__":
     main()

     return f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{filename}">Download {filename}</a>'
 def main():
+    st.title("кластеризуем новости v.1.10")
     st.write("Upload Excel file with columns: company, datetime, text")
     uploaded_file = st.file_uploader("Choose Excel file", type=['xlsx'])
     if uploaded_file:
         try:
+            # First, let's look at the columns in the file
             df_original = pd.read_excel(uploaded_file, sheet_name='Публикации')
+            st.write("Available columns:", df_original.columns.tolist())
             # Create working copy with required columns
             df = df_original.copy()
+            # Assuming the order is fixed in the Excel file, adjust indices if needed
+            text_column = df_original.columns[6]  # Adjust if needed
+            title_column = df_original.columns[5]  # Adjust if needed
+            datetime_column = df_original.columns[3]  # Adjust if needed
+            company_column = df_original.columns[0]  # Adjust if needed
+            df = df_original[[company_column, datetime_column, title_column, text_column]].copy()
             df.columns = ['company', 'datetime', 'title', 'text']
             st.success(f'Loaded {len(df)} records')
                         # Get original indices from cluster
                         cluster_indices = cluster_rows.index
                         # Find the index with longest text
+                        text_lengths = df_original.iloc[cluster_indices][text_column].str.len()
                         longest_text_idx = cluster_indices[text_lengths.argmax()]
                         indices_to_keep.append(longest_text_idx)
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
+                    # Print more detailed error information
+                    import traceback
+                    st.error(traceback.format_exc())
                 finally:
                     progress_bar.empty()
         except Exception as e:
             st.error(f"Error reading file: {str(e)}")
+            import traceback
+            st.error(traceback.format_exc())
 if __name__ == "__main__":
     main()