Spaces:
Sleeping
Sleeping
Commit
·
8a76f7a
1
Parent(s):
b719885
1.10
Browse files
app.py
CHANGED
@@ -321,19 +321,26 @@ def create_download_link(df: pd.DataFrame, filename: str) -> str:
|
|
321 |
return f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{filename}">Download {filename}</a>'
|
322 |
|
323 |
def main():
|
324 |
-
st.title("кластеризуем новости v.1.
|
325 |
st.write("Upload Excel file with columns: company, datetime, text")
|
326 |
|
327 |
uploaded_file = st.file_uploader("Choose Excel file", type=['xlsx'])
|
328 |
|
329 |
if uploaded_file:
|
330 |
try:
|
331 |
-
#
|
332 |
df_original = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
|
|
333 |
|
334 |
# Create working copy with required columns
|
335 |
df = df_original.copy()
|
336 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
df.columns = ['company', 'datetime', 'title', 'text']
|
338 |
|
339 |
st.success(f'Loaded {len(df)} records')
|
@@ -370,7 +377,7 @@ def main():
|
|
370 |
# Get original indices from cluster
|
371 |
cluster_indices = cluster_rows.index
|
372 |
# Find the index with longest text
|
373 |
-
text_lengths = df_original.iloc[cluster_indices][
|
374 |
longest_text_idx = cluster_indices[text_lengths.argmax()]
|
375 |
indices_to_keep.append(longest_text_idx)
|
376 |
|
@@ -406,11 +413,16 @@ def main():
|
|
406 |
|
407 |
except Exception as e:
|
408 |
st.error(f"Error: {str(e)}")
|
|
|
|
|
|
|
409 |
finally:
|
410 |
progress_bar.empty()
|
411 |
|
412 |
except Exception as e:
|
413 |
st.error(f"Error reading file: {str(e)}")
|
|
|
|
|
414 |
|
415 |
if __name__ == "__main__":
|
416 |
main()
|
|
|
321 |
return f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{filename}">Download {filename}</a>'
|
322 |
|
323 |
def main():
|
324 |
+
st.title("кластеризуем новости v.1.10")
|
325 |
st.write("Upload Excel file with columns: company, datetime, text")
|
326 |
|
327 |
uploaded_file = st.file_uploader("Choose Excel file", type=['xlsx'])
|
328 |
|
329 |
if uploaded_file:
|
330 |
try:
|
331 |
+
# First, let's look at the columns in the file
|
332 |
df_original = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
333 |
+
st.write("Available columns:", df_original.columns.tolist())
|
334 |
|
335 |
# Create working copy with required columns
|
336 |
df = df_original.copy()
|
337 |
+
# Assuming the order is fixed in the Excel file, adjust indices if needed
|
338 |
+
text_column = df_original.columns[6] # Adjust if needed
|
339 |
+
title_column = df_original.columns[5] # Adjust if needed
|
340 |
+
datetime_column = df_original.columns[3] # Adjust if needed
|
341 |
+
company_column = df_original.columns[0] # Adjust if needed
|
342 |
+
|
343 |
+
df = df_original[[company_column, datetime_column, title_column, text_column]].copy()
|
344 |
df.columns = ['company', 'datetime', 'title', 'text']
|
345 |
|
346 |
st.success(f'Loaded {len(df)} records')
|
|
|
377 |
# Get original indices from cluster
|
378 |
cluster_indices = cluster_rows.index
|
379 |
# Find the index with longest text
|
380 |
+
text_lengths = df_original.iloc[cluster_indices][text_column].str.len()
|
381 |
longest_text_idx = cluster_indices[text_lengths.argmax()]
|
382 |
indices_to_keep.append(longest_text_idx)
|
383 |
|
|
|
413 |
|
414 |
except Exception as e:
|
415 |
st.error(f"Error: {str(e)}")
|
416 |
+
# Print more detailed error information
|
417 |
+
import traceback
|
418 |
+
st.error(traceback.format_exc())
|
419 |
finally:
|
420 |
progress_bar.empty()
|
421 |
|
422 |
except Exception as e:
|
423 |
st.error(f"Error reading file: {str(e)}")
|
424 |
+
import traceback
|
425 |
+
st.error(traceback.format_exc())
|
426 |
|
427 |
if __name__ == "__main__":
|
428 |
main()
|