Spaces:
Sleeping
Sleeping
Update document_scrapped.py
Browse files- document_scrapped.py +4 -4
document_scrapped.py
CHANGED
@@ -104,7 +104,7 @@ def excel(link : str) -> str:
|
|
104 |
sample_df = df
|
105 |
json_data = sample_df.to_json(orient='records')
|
106 |
js = json.loads(json_data)
|
107 |
-
rs =
|
108 |
return rs
|
109 |
else:
|
110 |
print("Failed to download file")
|
@@ -134,7 +134,7 @@ def csv(link : str) -> str:
|
|
134 |
|
135 |
json_data = sample_df.to_json(orient='records')
|
136 |
js = json.loads(json_data)
|
137 |
-
rs =
|
138 |
return rs
|
139 |
|
140 |
except Exception as e:
|
@@ -156,7 +156,7 @@ def docx(url : str) -> str:
|
|
156 |
full_text.append(para.text)
|
157 |
|
158 |
f = "\n".join(full_text)
|
159 |
-
n = select_words_until_char_limit(f,
|
160 |
return n
|
161 |
except Exception as e:
|
162 |
print(f"An error occurred: {e}")
|
@@ -182,7 +182,7 @@ def pptx(url : str) -> str:
|
|
182 |
full_text.append(shape.text)
|
183 |
|
184 |
g = "\n".join(full_text)
|
185 |
-
c = select_words_until_char_limit(g,
|
186 |
return c
|
187 |
except Exception as e:
|
188 |
print(f"An error occurred: {e}")
|
|
|
104 |
sample_df = df
|
105 |
json_data = sample_df.to_json(orient='records')
|
106 |
js = json.loads(json_data)
|
107 |
+
rs = select_words_until_char_limit(f"{js}", 32000)
|
108 |
return rs
|
109 |
else:
|
110 |
print("Failed to download file")
|
|
|
134 |
|
135 |
json_data = sample_df.to_json(orient='records')
|
136 |
js = json.loads(json_data)
|
137 |
+
rs = select_words_until_char_limit(f"{js}", 32000)
|
138 |
return rs
|
139 |
|
140 |
except Exception as e:
|
|
|
156 |
full_text.append(para.text)
|
157 |
|
158 |
f = "\n".join(full_text)
|
159 |
+
n = select_words_until_char_limit(f, 32000)
|
160 |
return n
|
161 |
except Exception as e:
|
162 |
print(f"An error occurred: {e}")
|
|
|
182 |
full_text.append(shape.text)
|
183 |
|
184 |
g = "\n".join(full_text)
|
185 |
+
c = select_words_until_char_limit(g, 32000)
|
186 |
return c
|
187 |
except Exception as e:
|
188 |
print(f"An error occurred: {e}")
|