tferhan commited on
Commit
70879f1
·
verified ·
1 Parent(s): 5d0e0a6

Update document_scrapped.py

Browse files
Files changed (1) hide show
  1. document_scrapped.py +4 -4
document_scrapped.py CHANGED
@@ -104,7 +104,7 @@ def excel(link : str) -> str:
104
  sample_df = df
105
  json_data = sample_df.to_json(orient='records')
106
  js = json.loads(json_data)
107
- rs = trim_input_words(f"{js}")
108
  return rs
109
  else:
110
  print("Failed to download file")
@@ -134,7 +134,7 @@ def csv(link : str) -> str:
134
 
135
  json_data = sample_df.to_json(orient='records')
136
  js = json.loads(json_data)
137
- rs = trim_input_words(f"{js}")
138
  return rs
139
 
140
  except Exception as e:
@@ -156,7 +156,7 @@ def docx(url : str) -> str:
156
  full_text.append(para.text)
157
 
158
  f = "\n".join(full_text)
159
- n = select_words_until_char_limit(f, 30000)
160
  return n
161
  except Exception as e:
162
  print(f"An error occurred: {e}")
@@ -182,7 +182,7 @@ def pptx(url : str) -> str:
182
  full_text.append(shape.text)
183
 
184
  g = "\n".join(full_text)
185
- c = select_words_until_char_limit(g, 30000)
186
  return c
187
  except Exception as e:
188
  print(f"An error occurred: {e}")
 
104
  sample_df = df
105
  json_data = sample_df.to_json(orient='records')
106
  js = json.loads(json_data)
107
+ rs = select_words_until_char_limit(f"{js}", 32000)
108
  return rs
109
  else:
110
  print("Failed to download file")
 
134
 
135
  json_data = sample_df.to_json(orient='records')
136
  js = json.loads(json_data)
137
+ rs = select_words_until_char_limit(f"{js}", 32000)
138
  return rs
139
 
140
  except Exception as e:
 
156
  full_text.append(para.text)
157
 
158
  f = "\n".join(full_text)
159
+ n = select_words_until_char_limit(f, 32000)
160
  return n
161
  except Exception as e:
162
  print(f"An error occurred: {e}")
 
182
  full_text.append(shape.text)
183
 
184
  g = "\n".join(full_text)
185
+ c = select_words_until_char_limit(g, 32000)
186
  return c
187
  except Exception as e:
188
  print(f"An error occurred: {e}")