Spaces:
Sleeping
Sleeping
File size: 1,656 Bytes
8da6fbf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
def setup_vector_db(pdf_path):
"""Setup vector database from PDF"""
# carregando e dividindo o PDF em chunks
loader = PyPDFLoader(pdf_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=50
)
chunks = text_splitter.split_documents(documents)
# criando banco de dados vetorial
embeddings = HuggingFaceEmbeddings(
model_name="Snowflake/snowflake-arctic-embed-l-v2.0"
)
vector_db = FAISS.from_documents(chunks, embeddings)
return vector_db
def get_local_content(vector_db, query):
"""Get content from vector database"""
docs = vector_db.similarity_search(query, k=5)
return " ".join([doc.page_content for doc in docs])
def check_local_knowledge(query, vector_db, threshold=0.7):
"""
Verifica se a consulta pode ser respondida com base no conhecimento local.
Retorna True se houver documentos relevantes no banco de dados vetorial.
"""
try:
# buscando documentos relevantes do banco de dados
docs = vector_db.similarity_search(query, k=1)
if docs:
return True # háá documentos relevantes
return False # não há documentos relevantes
except Exception as e:
print(f"Erro ao verificar conhecimento local: {e}")
return False
|