Spaces:

bainskarman
/

AllAboutRAG

Sleeping

App Files Files Community

bainskarman commited on Mar 13

Commit

15f5963

verified ·

1 Parent(s): c0a164f

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -34

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import streamlit as st
 import os
-from huggingface_hub import InferenceApi
 from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -11,10 +11,24 @@ from langdetect import detect
 token = os.environ.get("KEY2")  # Replace "KEY2" with your secret key name
 # Initialize the Hugging Face Inference API
-def load_llm():
     model_name = "HuggingFaceH4/zephyr-7b-alpha"  # Replace with your preferred model
-    api = InferenceApi(repo_id=model_name, token=token)
-    return api
 # Extract text from PDF
 def extract_text_from_pdf(file):
@@ -44,7 +58,7 @@ def create_vector_store(chunks, indexing_method="multi-representation", **kwargs
     return vector_store
 # Query the PDF using the Hugging Face API
-def query_pdf(vector_store, query, api, query_method="multi-query", max_new_tokens=200, temperature=0.7, top_k=50):
     # Retrieve relevant chunks from the vector store
     docs = vector_store.similarity_search(query)
     context = " ".join([doc.page_content for doc in docs])
@@ -53,15 +67,8 @@ def query_pdf(vector_store, query, api, query_method="multi-query", max_new_toke
     prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
     # Query the Hugging Face API
-    response = api(
-        inputs=prompt,
-        parameters={
-            "max_new_tokens": max_new_tokens,
-            "temperature": temperature,
-            "top_k": top_k,
-        },
-    )
-    return response[0]["generated_text"], docs
 # Detect language of the text
 def detect_language(text):
@@ -107,43 +114,33 @@ def main():
             st.session_state.vector_store = create_vector_store(st.session_state.chunks, indexing_method=indexing_method)
             st.success("Vector store created!")
-    # Step 3: Load LLM (Hugging Face API)
-    if "api" not in st.session_state:
-        st.session_state.api = None
     if st.session_state.vector_store:
-        st.subheader("LLM Parameters")
-        temperature = st.slider("Temperature", 0.1, 1.0, 0.7, help="Controls randomness in the output.")
-        top_k = st.slider("Top-k", 1, 100, 50, help="Limits sampling to the top-k tokens.")
-        max_new_tokens = st.slider("Max New Tokens", 50, 500, 200, help="Maximum number of tokens to generate.")
-        if st.button("Load LLM"):
-            api = load_llm()
-            st.session_state.api = api
-            st.success("LLM loaded!")
-    # Step 4: Query the PDF
-    if st.session_state.api:
         st.subheader("Query Translation Options")
         query_method = st.selectbox(
             "Query Translation Method",
             ["multi-query", "rag-fusion", "decomposition", "step-back", "hyde"],
             help="Choose a method to improve query retrieval."
         )
         query = st.text_input("Ask a question about the PDF:")
         if query:
             answer, source_docs = query_pdf(
                 st.session_state.vector_store,
                 query,
-                st.session_state.api,
                 query_method=query_method,
                 max_new_tokens=max_new_tokens,
                 temperature=temperature,
                 top_k=top_k,
             )
-            st.write("**Answer:**", answer)
-            st.write("**Source Text:**")
-            for doc in source_docs:
-                st.write(doc.page_content)
 if __name__ == "__main__":
     main()

 import streamlit as st
 import os
+import requests
 from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
 token = os.environ.get("KEY2")  # Replace "KEY2" with your secret key name
 # Initialize the Hugging Face Inference API
+def query_huggingface_api(prompt, max_new_tokens=200, temperature=0.7, top_k=50):
     model_name = "HuggingFaceH4/zephyr-7b-alpha"  # Replace with your preferred model
+    api_url = f"https://api-inference.huggingface.co/models/{model_name}"
+    headers = {"Authorization": f"Bearer {token}"}
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "max_new_tokens": max_new_tokens,
+            "temperature": temperature,
+            "top_k": top_k,
+        },
+    }
+    response = requests.post(api_url, headers=headers, json=payload)
+    if response.status_code == 200:
+        return response.json()[0]["generated_text"]
+    else:
+        st.error(f"Error: {response.status_code} - {response.text}")
+        return None
 # Extract text from PDF
 def extract_text_from_pdf(file):
     return vector_store
 # Query the PDF using the Hugging Face API
+def query_pdf(vector_store, query, query_method="multi-query", max_new_tokens=200, temperature=0.7, top_k=50):
     # Retrieve relevant chunks from the vector store
     docs = vector_store.similarity_search(query)
     context = " ".join([doc.page_content for doc in docs])
     prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
     # Query the Hugging Face API
+    answer = query_huggingface_api(prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_k=top_k)
+    return answer, docs
 # Detect language of the text
 def detect_language(text):
             st.session_state.vector_store = create_vector_store(st.session_state.chunks, indexing_method=indexing_method)
             st.success("Vector store created!")
+    # Step 3: Query the PDF
     if st.session_state.vector_store:
         st.subheader("Query Translation Options")
         query_method = st.selectbox(
             "Query Translation Method",
             ["multi-query", "rag-fusion", "decomposition", "step-back", "hyde"],
             help="Choose a method to improve query retrieval."
         )
+        st.subheader("LLM Parameters")
+        temperature = st.slider("Temperature", 0.1, 1.0, 0.7, help="Controls randomness in the output.")
+        top_k = st.slider("Top-k", 1, 100, 50, help="Limits sampling to the top-k tokens.")
+        max_new_tokens = st.slider("Max New Tokens", 50, 500, 200, help="Maximum number of tokens to generate.")
         query = st.text_input("Ask a question about the PDF:")
         if query:
             answer, source_docs = query_pdf(
                 st.session_state.vector_store,
                 query,
                 query_method=query_method,
                 max_new_tokens=max_new_tokens,
                 temperature=temperature,
                 top_k=top_k,
             )
+            if answer:
+                st.write("**Answer:**", answer)
+                st.write("**Source Text:**")
+                for doc in source_docs:
+                    st.write(doc.page_content)
 if __name__ == "__main__":
     main()