Spaces:
Sleeping
Sleeping
Update document_scrapped.py
Browse files- document_scrapped.py +2 -1
document_scrapped.py
CHANGED
@@ -5,6 +5,7 @@ import json
|
|
5 |
import io
|
6 |
import fitz
|
7 |
from pptx import Presentation
|
|
|
8 |
import chardet
|
9 |
from docx import Document
|
10 |
import pandas as pd
|
@@ -96,7 +97,7 @@ def excel(link : str) -> str:
|
|
96 |
response = requests.get(link)
|
97 |
if response.status_code == 200:
|
98 |
file_content = response.content
|
99 |
-
df = pd.read_excel(file_content)
|
100 |
if df.shape[0] > 50:
|
101 |
sample_size = 50
|
102 |
sample_df = df.sample(n=sample_size, random_state=42)
|
|
|
5 |
import io
|
6 |
import fitz
|
7 |
from pptx import Presentation
|
8 |
+
from io import BytesIO
|
9 |
import chardet
|
10 |
from docx import Document
|
11 |
import pandas as pd
|
|
|
97 |
response = requests.get(link)
|
98 |
if response.status_code == 200:
|
99 |
file_content = response.content
|
100 |
+
df = pd.read_excel(BytesIO(file_content))
|
101 |
if df.shape[0] > 50:
|
102 |
sample_size = 50
|
103 |
sample_df = df.sample(n=sample_size, random_state=42)
|