File size: 1,656 Bytes
8da6fbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

def setup_vector_db(pdf_path):
    """Setup vector database from PDF"""
    # carregando e dividindo o PDF em chunks
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=50
    )
    chunks = text_splitter.split_documents(documents)
    
    # criando banco de dados vetorial
    embeddings = HuggingFaceEmbeddings(
        model_name="Snowflake/snowflake-arctic-embed-l-v2.0"
    )
    vector_db = FAISS.from_documents(chunks, embeddings)
    
    return vector_db

def get_local_content(vector_db, query):
    """Get content from vector database"""
    docs = vector_db.similarity_search(query, k=5)
    return " ".join([doc.page_content for doc in docs])

def check_local_knowledge(query, vector_db, threshold=0.7):
    """

    Verifica se a consulta pode ser respondida com base no conhecimento local.

    Retorna True se houver documentos relevantes no banco de dados vetorial.

    """
    try:
        # buscando documentos relevantes do banco de dados
        docs = vector_db.similarity_search(query, k=1)
        if docs:
            return True  # háá documentos relevantes
        return False  # não há documentos relevantes
    except Exception as e:
        print(f"Erro ao verificar conhecimento local: {e}")
        return False