lozanopastor commited on
Commit
6498416
·
verified ·
1 Parent(s): f21e950

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +7 -5
  2. app.py +115 -0
  3. gitattributes +35 -0
  4. gitignore +1 -0
  5. requirements.txt +15 -0
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
- title: PDFChat
3
- emoji: 💻
4
- colorFrom: pink
5
- colorTo: green
6
  sdk: streamlit
7
- sdk_version: 1.42.1
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Chat Pdf File Using Deepseek Llm
3
+ emoji: 🚀
4
+ colorFrom: purple
5
+ colorTo: red
6
  sdk: streamlit
7
+ sdk_version: 1.41.1
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: This app uses DeepSeek R1 model for answering questions accu
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ import os
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings # Using Hugging Face embeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain_groq import ChatGroq
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from langchain.prompts import PromptTemplate
10
+ from dotenv import load_dotenv
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+ os.getenv("GROQ_API_KEY")
15
+
16
+ def get_pdf_text(pdf_docs):
17
+ """Extracts text from uploaded PDF files."""
18
+ text = ""
19
+ for pdf in pdf_docs:
20
+ pdf_reader = PdfReader(pdf)
21
+ for page in pdf_reader.pages:
22
+ text += page.extract_text()
23
+ return text
24
+
25
+ def get_text_chunks(text):
26
+ """Splits extracted text into manageable chunks."""
27
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
28
+ chunks = text_splitter.split_text(text)
29
+ return chunks
30
+
31
+ def get_vector_store(text_chunks):
32
+ """Creates and saves a FAISS vector store from text chunks."""
33
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Using Hugging Face embeddings
34
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
35
+ vector_store.save_local("faiss_index")
36
+
37
+ def get_conversational_chain():
38
+ """Sets up a conversational chain using Groq LLM."""
39
+ prompt_template = """
40
+ Answer the question as detailed as possible from the provided context. If the answer is not in
41
+ the provided context, just say, "answer is not available in the context." Do not provide incorrect answers.
42
+
43
+ Context:
44
+ {context}?
45
+
46
+ Question:
47
+ {question}
48
+
49
+ Answer:
50
+ """
51
+
52
+ model = ChatGroq(
53
+ temperature=0.3,
54
+ model_name="deepseek-r1-distill-llama-70b", # Using Mixtral model through Groq
55
+ groq_api_key=os.getenv("GROQ_API_KEY")
56
+ )
57
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
58
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
59
+ return chain
60
+
61
+ def user_input(user_question):
62
+ """Handles user queries by retrieving answers from the vector store."""
63
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Using Hugging Face embeddings
64
+
65
+ new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
66
+ docs = new_db.similarity_search(user_question)
67
+
68
+ chain = get_conversational_chain()
69
+
70
+ response = chain(
71
+ {"input_documents": docs, "question": user_question},
72
+ return_only_outputs=True
73
+ )
74
+
75
+ st.markdown(f"### Reply:\n{response['output_text']}")
76
+
77
+ def main():
78
+ """Main function to run the Streamlit app."""
79
+ st.set_page_config(page_title="Chat PDF", page_icon=":books:", layout="wide")
80
+ st.title("Chat with PDF using DeepSeek Ai")
81
+
82
+ st.sidebar.header("Upload & Process PDF Files")
83
+ st.sidebar.markdown(
84
+ "Using DeepSeek R1 model for advanced conversational capabilities.")
85
+
86
+ with st.sidebar:
87
+ pdf_docs = st.file_uploader(
88
+ "Upload your PDF files:",
89
+ accept_multiple_files=True,
90
+ type=["pdf"]
91
+ )
92
+ if st.button("Submit & Process"):
93
+ with st.spinner("Processing your files..."):
94
+ raw_text = get_pdf_text(pdf_docs)
95
+ text_chunks = get_text_chunks(raw_text)
96
+ get_vector_store(text_chunks)
97
+ st.success("PDFs processed and indexed successfully!")
98
+
99
+ st.markdown(
100
+ "### Ask Questions from Your PDF Files :mag:\n"
101
+ "Once you upload and process your PDFs, type your questions below."
102
+ )
103
+
104
+ user_question = st.text_input("Enter your question:", placeholder="What do you want to know?")
105
+
106
+ if user_question:
107
+ with st.spinner("Fetching your answer..."):
108
+ user_input(user_question)
109
+
110
+ st.sidebar.info(
111
+ "**Note:** This app uses DeepSeek R1 model for answering questions accurately."
112
+ )
113
+
114
+ if __name__ == "__main__":
115
+ main()
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ google-generativeai
3
+ python-dotenv
4
+ langchain
5
+ PyPDF2
6
+ chromadb
7
+ faiss-cpu
8
+ langchain_google_genai
9
+ langchain-community
10
+ langchain-groq
11
+ langchain-openai
12
+ langchainhub
13
+ pypdf
14
+ transformers
15
+ sentence-transformers