from langchain.document_loaders import DirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import SentenceTransformerEmbeddings from langchain.vectorstores import Chroma from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from langchain.chains.question_answering import load_qa_chain import gradio as gr import torch from huggingface_hub import login import os directory = 'pets' HF_TOKEN = os.getenv("HF_TOKEN") login(token = HF_TOKEN) def load_docs(directory): loader = DirectoryLoader(directory) documents = loader.load() return documents def split_docs(documents,chunk_size=1000,chunk_overlap=20): text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) docs = text_splitter.split_documents(documents) return docs documents = load_docs(directory) docs = split_docs(documents) embeddings = SentenceTransformerEmbeddings(model_name="thenlper/gte-large") db = Chroma.from_documents(docs, embeddings) model_id = "google/gemma-1.1-2b-it" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id,torch_dtype=torch.bfloat16) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100) hf = HuggingFacePipeline(pipeline=pipe) chain = load_qa_chain(hf, chain_type="stuff",verbose=True) def output(query, history): matching_docs = db.similarity_search(query) answer = chain.run(input_documents=matching_docs, question=query) idx = answer.find("Answer") return answer[idx:] gr.ChatInterface(output).launch()