DrishtiSharma commited on
Commit
a4abe5f
·
verified ·
1 Parent(s): 0f83ec1

Update interim/app.py

Browse files
Files changed (1) hide show
  1. interim/app.py +11 -1
interim/app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import sys
2
  import os
3
  import re
@@ -61,7 +62,16 @@ def load_docs(document_path):
61
  )
62
  documents = loader.load()
63
  text_splitter = NLTKTextSplitter(chunk_size=1000)
64
- return text_splitter.split_documents(documents)
 
 
 
 
 
 
 
 
 
65
  except Exception as e:
66
  st.error(f"Failed to load and process PDF: {e}")
67
  st.stop()
 
1
+ # to-do: Enable downloading multiple patent PDFs via corresponding links
2
  import sys
3
  import os
4
  import re
 
62
  )
63
  documents = loader.load()
64
  text_splitter = NLTKTextSplitter(chunk_size=1000)
65
+ split_docs = text_splitter.split_documents(documents)
66
+
67
+ # Filter metadata to only include str, int, float, or bool
68
+ for doc in split_docs:
69
+ if hasattr(doc, "metadata") and isinstance(doc.metadata, dict):
70
+ doc.metadata = {
71
+ k: v for k, v in doc.metadata.items()
72
+ if isinstance(v, (str, int, float, bool))
73
+ }
74
+ return split_docs
75
  except Exception as e:
76
  st.error(f"Failed to load and process PDF: {e}")
77
  st.stop()