tferhan commited on
Commit
52cefa9
·
verified ·
1 Parent(s): 70879f1

Update document_scrapped.py

Browse files
Files changed (1) hide show
  1. document_scrapped.py +2 -1
document_scrapped.py CHANGED
@@ -5,6 +5,7 @@ import json
5
  import io
6
  import fitz
7
  from pptx import Presentation
 
8
  import chardet
9
  from docx import Document
10
  import pandas as pd
@@ -96,7 +97,7 @@ def excel(link : str) -> str:
96
  response = requests.get(link)
97
  if response.status_code == 200:
98
  file_content = response.content
99
- df = pd.read_excel(file_content)
100
  if df.shape[0] > 50:
101
  sample_size = 50
102
  sample_df = df.sample(n=sample_size, random_state=42)
 
5
  import io
6
  import fitz
7
  from pptx import Presentation
8
+ from io import BytesIO
9
  import chardet
10
  from docx import Document
11
  import pandas as pd
 
97
  response = requests.get(link)
98
  if response.status_code == 200:
99
  file_content = response.content
100
+ df = pd.read_excel(BytesIO(file_content))
101
  if df.shape[0] > 50:
102
  sample_size = 50
103
  sample_df = df.sample(n=sample_size, random_state=42)