from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_huggingface.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS def setup_vector_db(pdf_path): """Setup vector database from PDF""" # carregando e dividindo o PDF em chunks loader = PyPDFLoader(pdf_path) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=50 ) chunks = text_splitter.split_documents(documents) # criando banco de dados vetorial embeddings = HuggingFaceEmbeddings( model_name="Snowflake/snowflake-arctic-embed-l-v2.0" ) vector_db = FAISS.from_documents(chunks, embeddings) return vector_db def get_local_content(vector_db, query): """Get content from vector database""" docs = vector_db.similarity_search(query, k=5) return " ".join([doc.page_content for doc in docs]) def check_local_knowledge(query, vector_db, threshold=0.7): """ Verifica se a consulta pode ser respondida com base no conhecimento local. Retorna True se houver documentos relevantes no banco de dados vetorial. """ try: # buscando documentos relevantes do banco de dados docs = vector_db.similarity_search(query, k=1) if docs: return True # háá documentos relevantes return False # não há documentos relevantes except Exception as e: print(f"Erro ao verificar conhecimento local: {e}") return False