akashshahade commited on
Commit
90c030d
·
verified ·
1 Parent(s): d61e6d0

Upload 5 files

Browse files
Files changed (5) hide show
  1. .env +1 -0
  2. .gitattributes +35 -35
  3. README.md +13 -13
  4. app.py +108 -0
  5. requirements.txt +11 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GROQ_API_KEY=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
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,13 +1,13 @@
1
- ---
2
- title: DocuChat1
3
- emoji: 🐠
4
- colorFrom: red
5
- colorTo: blue
6
- sdk: streamlit
7
- sdk_version: 1.42.2
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Chatpdf
3
+ emoji: 🐠
4
+ colorFrom: green
5
+ colorTo: gray
6
+ sdk: streamlit
7
+ sdk_version: 1.42.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from dotenv import load_dotenv
4
+ import streamlit as st
5
+ from PyPDF2 import PdfReader
6
+ from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ from langchain.vectorstores import FAISS
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain_groq import ChatGroq
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ # Set up logging
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format='%(asctime)s - %(levelname)s - %(message)s'
20
+ )
21
+
22
+ # Function to extract text from PDF files
23
+ def get_pdf_text(pdf_docs):
24
+ text = ""
25
+ for pdf in pdf_docs:
26
+ pdf_reader = PdfReader(pdf)
27
+ for page in pdf_reader.pages:
28
+ text += page.extract_text() or ""
29
+ return text
30
+
31
+ # Function to split the extracted text into chunks
32
+ def get_text_chunks(text):
33
+ text_splitter = CharacterTextSplitter(
34
+ separator="\n",
35
+ chunk_size=1000,
36
+ chunk_overlap=200,
37
+ length_function=len
38
+ )
39
+ chunks = text_splitter.split_text(text)
40
+ return chunks
41
+
42
+ # Function to create a FAISS vectorstore
43
+ def get_vectorstore(text_chunks):
44
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
45
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
46
+ return vectorstore
47
+
48
+ # Function to set up the conversational retrieval chain
49
+ def get_conversation_chain(vectorstore):
50
+ try:
51
+ llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
52
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
53
+
54
+ conversation_chain = ConversationalRetrievalChain.from_llm(
55
+ llm=llm,
56
+ retriever=vectorstore.as_retriever(),
57
+ memory=memory
58
+ )
59
+
60
+ logging.info("Conversation chain created successfully.")
61
+ return conversation_chain
62
+ except Exception as e:
63
+ logging.error(f"Error creating conversation chain: {e}")
64
+ st.error("An error occurred while setting up the conversation chain.")
65
+
66
+ # Handle user input
67
+ def handle_userinput(user_question):
68
+ if st.session_state.conversation is not None:
69
+ response = st.session_state.conversation({'question': user_question})
70
+ st.session_state.chat_history = response['chat_history']
71
+
72
+ for i, message in enumerate(st.session_state.chat_history):
73
+ if i % 2 == 0:
74
+ st.write(f"*User:* {message.content}")
75
+ else:
76
+ st.write(f"*Bot:* {message.content}")
77
+ else:
78
+ st.warning("Please process the documents first.")
79
+
80
+ # Main function to run the Streamlit app
81
+ def main():
82
+ load_dotenv()
83
+ st.set_page_config(page_title="DocuChat - Chat with PDFs - by Akash", page_icon=":books:")
84
+
85
+ if "conversation" not in st.session_state:
86
+ st.session_state.conversation = None
87
+ if "chat_history" not in st.session_state:
88
+ st.session_state.chat_history = None
89
+
90
+ st.header("Chat with multiple PDFs :books:")
91
+ user_question = st.text_input("Ask a question about your documents:")
92
+ if user_question:
93
+ handle_userinput(user_question)
94
+
95
+ with st.sidebar:
96
+ st.subheader("Your documents")
97
+ pdf_docs = st.file_uploader(
98
+ "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
99
+ )
100
+ if st.button("Process"):
101
+ with st.spinner("Processing..."):
102
+ raw_text = get_pdf_text(pdf_docs)
103
+ text_chunks = get_text_chunks(raw_text)
104
+ vectorstore = get_vectorstore(text_chunks)
105
+ st.session_state.conversation = get_conversation_chain(vectorstore)
106
+
107
+ if __name__ == '__main__':
108
+ main()
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ langchain
3
+ langchain_groq
4
+ PyPDF2
5
+ python-dotenv
6
+ faiss-cpu
7
+ altair
8
+ tiktoken
9
+ sentence-transformers
10
+ pydantic
11
+ langchain_community