Spaces:
Sleeping
Sleeping
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_huggingface.embeddings import HuggingFaceEmbeddings | |
from langchain_community.vectorstores import FAISS | |
def setup_vector_db(pdf_path): | |
"""Setup vector database from PDF""" | |
# carregando e dividindo o PDF em chunks | |
loader = PyPDFLoader(pdf_path) | |
documents = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, | |
chunk_overlap=50 | |
) | |
chunks = text_splitter.split_documents(documents) | |
# criando banco de dados vetorial | |
embeddings = HuggingFaceEmbeddings( | |
model_name="Snowflake/snowflake-arctic-embed-l-v2.0" | |
) | |
vector_db = FAISS.from_documents(chunks, embeddings) | |
return vector_db | |
def get_local_content(vector_db, query): | |
"""Get content from vector database""" | |
docs = vector_db.similarity_search(query, k=5) | |
return " ".join([doc.page_content for doc in docs]) | |
def check_local_knowledge(query, vector_db, threshold=0.7): | |
""" | |
Verifica se a consulta pode ser respondida com base no conhecimento local. | |
Retorna True se houver documentos relevantes no banco de dados vetorial. | |
""" | |
try: | |
# buscando documentos relevantes do banco de dados | |
docs = vector_db.similarity_search(query, k=1) | |
if docs: | |
return True # háá documentos relevantes | |
return False # não há documentos relevantes | |
except Exception as e: | |
print(f"Erro ao verificar conhecimento local: {e}") | |
return False | |