Spaces:
Running
on
Zero
Running
on
Zero
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain.schema.document import Document | |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings | |
from langchain_chroma import Chroma | |
import spaces | |
from langchain_text_splitters import MarkdownHeaderTextSplitter | |
import os | |
from transformers import AutoTokenizer | |
api_token = os.getenv("HF_TOKEN") | |
model_name = "meta-llama/Llama-3.1-8B-Instruct" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token) | |
embedding_model = HuggingFaceBgeEmbeddings( | |
model_name="BAAI/bge-large-en-v1.5", | |
model_kwargs={"device": "cuda"}, | |
encode_kwargs={"normalize_embeddings": True}, | |
query_instruction="" | |
) | |
def create_rag_index(text_no_prefix): | |
"""Loads the PDF, splits its text, and builds a vectorstore for naive RAG.""" | |
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( | |
tokenizer, | |
chunk_size=256, | |
chunk_overlap=0, | |
add_start_index=True, | |
strip_whitespace=True, | |
separators=["\n\n", "\n", ".", " ", ""], | |
) | |
# Concatenate pages and create Document objects. | |
docs = [Document(page_content=x) for x in text_splitter.split_text(text_no_prefix)] | |
vectorstore = Chroma.from_documents(documents=docs, embedding=embedding_model) | |
return vectorstore | |
def run_naive_rag_query(vectorstore, query, rag_token_size, prefix, task, few_shot_examples): | |
""" | |
For naive RAG, retrieves top-k chunks (k based on target token size) | |
and generates an answer using those chunks. | |
""" | |
k = max(1, rag_token_size // 256) | |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": k}) | |
retrieved_docs = retriever.invoke(query) | |
for doc in retrieved_docs: | |
print("=================") | |
print(doc.page_content) | |
print("=================") | |
formatted_context = "\n\n".join([doc.page_content for doc in retrieved_docs]) | |
rag_context = prefix + "Retrieved context: \n" + formatted_context + task + few_shot_examples | |
return rag_context |