Spaces:

DrishtiSharma
/

chat-w-google-patents

Running

DrishtiSharma commited on Dec 20, 2024

Commit

a4abe5f

verified ·

1 Parent(s): 0f83ec1

Update interim/app.py

Files changed (1) hide show

interim/app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import sys
 import os
 import re
@@ -61,7 +62,16 @@ def load_docs(document_path):
         )
         documents = loader.load()
         text_splitter = NLTKTextSplitter(chunk_size=1000)
-        return text_splitter.split_documents(documents)
     except Exception as e:
         st.error(f"Failed to load and process PDF: {e}")
         st.stop()

+# to-do: Enable downloading multiple patent PDFs via corresponding links
 import sys
 import os
 import re
         )
         documents = loader.load()
         text_splitter = NLTKTextSplitter(chunk_size=1000)
+        split_docs = text_splitter.split_documents(documents)
+        # Filter metadata to only include str, int, float, or bool
+        for doc in split_docs:
+            if hasattr(doc, "metadata") and isinstance(doc.metadata, dict):
+                doc.metadata = {
+                    k: v for k, v in doc.metadata.items()
+                    if isinstance(v, (str, int, float, bool))
+                }
+        return split_docs
     except Exception as e:
         st.error(f"Failed to load and process PDF: {e}")
         st.stop()