# import streamlit as st # import faiss # import pickle # import numpy as np # import torch # from transformers import T5Tokenizer, T5ForConditionalGeneration # from sentence_transformers import SentenceTransformer # # Load LLM model (local folder) # @st.cache_resource # def load_llm(): # model_path = "./Generator_Model" # tokenizer = T5Tokenizer.from_pretrained(model_path) # model = T5ForConditionalGeneration.from_pretrained(model_path) # return tokenizer, model # # Load embedding model (local folder) # @st.cache_resource # def load_embedding_model(): # embed_model_path = "./Embedding_Model1" # return SentenceTransformer(embed_model_path) # # Load FAISS index and embeddings # @st.cache_resource # def load_faiss(): # faiss_index = faiss.read_index("faiss_index_file.index") # data = np.load("embeddings_file.npy", allow_pickle=True) # return faiss_index, data # # Search function # def search(query, embed_model, index, data): # query_embedding = embed_model.encode([query]).astype('float32') # _, I = index.search(query_embedding, k=5) # Top 5 results # results = [data['texts'][i] for i in I[0] if i != -1] # return results # # Generate response using LLM # def generate_response(context, query, tokenizer, model): # input_text = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:" # inputs = tokenizer.encode(input_text, return_tensors="pt") # outputs = model.generate(inputs, max_length=512, do_sample=True, temperature=0.7) # response = tokenizer.decode(outputs[0], skip_special_tokens=True) # return response # # Streamlit App # def main(): # st.title("Local LLM + FAISS + Embedding Search App") # st.markdown("🔍 Ask a question, and get context-aware answers!") # # Load everything once # tokenizer, llm_model = load_llm() # embed_model = load_embedding_model() # faiss_index, data = load_faiss() # query = st.text_input("Enter your query:") # if query: # with st.spinner("Processing..."): # # Search relevant contexts # contexts = search(query, embed_model, faiss_index, data) # combined_context = " ".join(contexts) # # Generate answer # response = generate_response(combined_context, query, tokenizer, llm_model) # st.subheader("Response:") # st.write(response) # st.subheader("Top Retrieved Contexts:") # for idx, ctx in enumerate(contexts, 1): # st.markdown(f"**{idx}.** {ctx}") # if __name__ == "__main__": # main() ########################### import os import streamlit as st import faiss import pickle import numpy as np import torch from transformers import AutoTokenizer, AutoModel, T5Tokenizer, T5ForConditionalGeneration,AutoModelForSeq2SeqLM # Paths (everything is local now) FAISS_INDEX_PATH = "faiss_index_file.index" TEXTS_PATH = "texts.pkl" EMBEDDINGS_PATH = "embeddings_file.npy" # EMBEDDING_MODEL_NAME = "Ah1111/Embedding_Model" # GENERATOR_MODEL_NAME = "Ah1111/Generator_Model" # Load generator model (T5) @st.cache_resource def load_llm(): tokenizer = T5Tokenizer.from_pretrained("Ah1111/Generator_Model") model = T5ForConditionalGeneration.from_pretrained("Ah1111/Generator_Model") return tokenizer, model # model_name = "google/flan-t5-base" # tokenizer = AutoTokenizer.from_pretrained(model_name) # model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # return tokenizer, model # Load embedding model (custom Hugging Face model) @st.cache_resource def load_embedding_model(): tokenizer = AutoTokenizer.from_pretrained("Ah1111/Embedding_Model") model = AutoModel.from_pretrained("Ah1111/Embedding_Model") return tokenizer, model # Load FAISS index and texts @st.cache_resource def load_faiss(): faiss_index = faiss.read_index(FAISS_INDEX_PATH) with open(TEXTS_PATH, "rb") as f: data = pickle.load(f) embeddings = np.load(EMBEDDINGS_PATH, allow_pickle=True) return faiss_index, data, embeddings # Function to encode query using the embedding model def encode_query(query, tokenizer, model): inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): embeddings = model(**inputs).last_hidden_state.mean(dim=1) return embeddings.cpu().numpy() # Search top-k contexts def search(query, tokenizer, model, index, data, k=5): query_embedding = encode_query(query, tokenizer, model).astype('float32') _, I = index.search(query_embedding, k) results = [data[i] for i in I[0] if i != -1] return results # Generate response using generator model def generate_response(context, query, tokenizer, model): input_text = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:" inputs = tokenizer.encode(input_text, return_tensors="pt", truncation=True) outputs = model.generate(inputs, max_length=512, do_sample=True, temperature=0.7) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Streamlit app def main(): st.set_page_config(page_title="Clinical QA with RAG", page_icon="🩺") st.title("🔎 Clinical QA System (RAG + FAISS + T5)") st.markdown( """ Enter your **clinical question** below. The system will retrieve relevant context and generate an informed answer using a local model. 🚀 """ ) # Load models and files embed_tokenizer, embed_model = load_embedding_model() gen_tokenizer, gen_model = load_llm() faiss_index, data, embeddings = load_faiss() query = st.text_input("💬 Your Question:") if query: with st.spinner("🔍 Retrieving and Generating..."): contexts = search(query, embed_tokenizer, embed_model, faiss_index, data) combined_context = " ".join(contexts) response = generate_response(combined_context, query, gen_tokenizer, gen_model) st.success("✅ Answer Ready!") st.subheader("📄 Response:") st.write(response) if __name__ == "__main__": main()