vivekvar commited on
Commit
58cd55d
Β·
verified Β·
1 Parent(s): 79d6b4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -22
app.py CHANGED
@@ -1,21 +1,29 @@
1
  import streamlit as st
2
- import os
3
- import base64
4
- from huggingface_hub import InferenceApi
5
- from dotenv import load_dotenv
6
  from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
 
 
7
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
8
  from llama_index.core import Settings
 
 
9
 
10
  # Load environment variables
11
  load_dotenv()
12
 
13
- # Define the Hugging Face model API endpoint and your token
14
- model_name = "meta-llama/Llama-3.3-70B-Instruct"
15
- api_token = os.getenv("HF_TOKEN")
 
 
 
 
 
 
16
 
17
- # Initialize the HuggingFace API for inference
18
- inference = InferenceApi(repo_id=model_name, token=api_token)
 
 
19
 
20
  # Define the directory for persistent storage and data
21
  PERSIST_DIR = "./db"
@@ -25,38 +33,48 @@ DATA_DIR = "data"
25
  os.makedirs(DATA_DIR, exist_ok=True)
26
  os.makedirs(PERSIST_DIR, exist_ok=True)
27
 
28
- # Function to display the PDF file in Streamlit
29
  def displayPDF(file):
30
  with open(file, "rb") as f:
31
  base64_pdf = base64.b64encode(f.read()).decode('utf-8')
32
  pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
33
  st.markdown(pdf_display, unsafe_allow_html=True)
34
 
35
- # Function to process data ingestion
36
  def data_ingestion():
37
  documents = SimpleDirectoryReader(DATA_DIR).load_data()
38
  storage_context = StorageContext.from_defaults()
39
  index = VectorStoreIndex.from_documents(documents)
40
  index.storage_context.persist(persist_dir=PERSIST_DIR)
41
 
42
- # Function to handle the query using Hugging Face's Inference API
43
- def generate_response(input_text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  try:
45
- response = inference(inputs=input_text)
46
- return response['generated_text'] # Adjust based on actual response structure
47
  except Exception as e:
48
  return f"An error occurred: {str(e)}"
49
 
50
  # Streamlit app initialization
51
  st.title("Chat with your PDF πŸ“„")
52
  st.markdown("Built by [vivek](https://github.com/saravivek-cyber)")
53
- st.markdown("Chat here")
54
 
55
- # Initial message setup
56
  if 'messages' not in st.session_state:
57
  st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]
58
 
59
- # Sidebar for file upload and processing
60
  with st.sidebar:
61
  st.title("Menu:")
62
  uploaded_file = st.file_uploader("Upload your PDF File")
@@ -68,14 +86,12 @@ with st.sidebar:
68
  data_ingestion()
69
  st.success("Data ingestion completed.")
70
 
71
- # Handling user input for querying the PDF content
72
  user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
73
  if user_prompt:
74
  st.session_state.messages.append({'role': 'user', "content": user_prompt})
75
- response = generate_response(user_prompt) # Use Hugging Face inference directly
76
  st.session_state.messages.append({'role': 'assistant', "content": response})
77
 
78
- # Displaying chat messages
79
  for message in st.session_state.messages:
80
  with st.chat_message(message['role']):
81
- st.write(message['content'])
 
1
  import streamlit as st
 
 
 
 
2
  from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
3
+ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
4
+ from dotenv import load_dotenv
5
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
  from llama_index.core import Settings
7
+ import os
8
+ import base64
9
 
10
  # Load environment variables
11
  load_dotenv()
12
 
13
+ # Configure the Llama index settings for using Hugging Face LLaMA model
14
+ Settings.llm = HuggingFaceInferenceAPI(
15
+ model_name="facebook/bedrock-llama-7b", # Use LLaMA 7B model here
16
+ tokenizer_name="facebook/bedrock-llama-7b", # Tokenizer for the LLaMA model
17
+ context_window=30000, # Set context window size (adjust if necessary)
18
+ api_token=os.getenv("HF_TOKEN"), # Hugging Face API Token
19
+ max_new_tokens=512,
20
+ generate_kwargs={"temperature": 0.1}, # Control the generation temperature
21
+ )
22
 
23
+ # Set up Hugging Face Embedding model to use powerful LLaMA model
24
+ Settings.embed_model = HuggingFaceEmbedding(
25
+ model_name="facebook/bedrock-llama-7b" # Powerful model for embeddings
26
+ )
27
 
28
  # Define the directory for persistent storage and data
29
  PERSIST_DIR = "./db"
 
33
  os.makedirs(DATA_DIR, exist_ok=True)
34
  os.makedirs(PERSIST_DIR, exist_ok=True)
35
 
 
36
  def displayPDF(file):
37
  with open(file, "rb") as f:
38
  base64_pdf = base64.b64encode(f.read()).decode('utf-8')
39
  pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
40
  st.markdown(pdf_display, unsafe_allow_html=True)
41
 
 
42
  def data_ingestion():
43
  documents = SimpleDirectoryReader(DATA_DIR).load_data()
44
  storage_context = StorageContext.from_defaults()
45
  index = VectorStoreIndex.from_documents(documents)
46
  index.storage_context.persist(persist_dir=PERSIST_DIR)
47
 
48
+ def handle_query(query):
49
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
50
+ index = load_index_from_storage(storage_context)
51
+ chat_text_qa_msgs = [
52
+ (
53
+ "user",
54
+ """created by vivek created for Neonflake Enterprises OPC Pvt Ltd
55
+ Context:
56
+ {context}
57
+ Question:
58
+ {query}
59
+ """
60
+ )
61
+ ]
62
+ text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
63
+ query_engine = index.as_query_engine(text_qa_template=text_qa_template)
64
+
65
  try:
66
+ answer = query_engine.query({"context": "Extracted context from PDF", "query": query})
67
+ return answer.get('response', "Sorry, no answer found.")
68
  except Exception as e:
69
  return f"An error occurred: {str(e)}"
70
 
71
  # Streamlit app initialization
72
  st.title("Chat with your PDF πŸ“„")
73
  st.markdown("Built by [vivek](https://github.com/saravivek-cyber)")
 
74
 
 
75
  if 'messages' not in st.session_state:
76
  st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]
77
 
 
78
  with st.sidebar:
79
  st.title("Menu:")
80
  uploaded_file = st.file_uploader("Upload your PDF File")
 
86
  data_ingestion()
87
  st.success("Data ingestion completed.")
88
 
 
89
  user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
90
  if user_prompt:
91
  st.session_state.messages.append({'role': 'user', "content": user_prompt})
92
+ response = handle_query(user_prompt)
93
  st.session_state.messages.append({'role': 'assistant', "content": response})
94
 
 
95
  for message in st.session_state.messages:
96
  with st.chat_message(message['role']):
97
+ st.write(message['content'])