akashshahade commited on
Commit
6363d82
Β·
verified Β·
1 Parent(s): ec5a147

Upload 7 files

Browse files
Files changed (7) hide show
  1. .env +1 -0
  2. .gitattributes +2 -35
  3. app.py +32 -0
  4. groq_api.py +24 -0
  5. readme.md +17 -0
  6. requirements.txt +6 -0
  7. utils.py +26 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GROQ_API_KEY="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"
.gitattributes CHANGED
@@ -1,35 +1,2 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.py linguist-language=Python
2
+ *.txt linguist-language=Text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ from utils import process_uploaded_file
5
+ from groq_api import ask_groq
6
+
7
+ st.set_page_config(page_title="Multidoc Chat", layout="wide")
8
+
9
+ st.title("πŸ“„ Multidoc Chat - Ask Anything About Your Files")
10
+
11
+ uploaded_file = st.file_uploader("Upload PDF or Excel", type=["pdf", "xlsx"])
12
+
13
+ if uploaded_file:
14
+ with st.spinner("Processing file..."):
15
+ extracted_text = process_uploaded_file(uploaded_file)
16
+
17
+ if "history" not in st.session_state:
18
+ st.session_state.history = []
19
+
20
+ st.text_area("Extracted Text Preview", extracted_text[:1000], height=150, disabled=True)
21
+
22
+ user_query = st.text_input("Ask something about the document:")
23
+
24
+ if st.button("Get Answer") and user_query:
25
+ response = ask_groq(user_query, extracted_text)
26
+ st.session_state.history.append((user_query, response))
27
+
28
+ if st.session_state.history:
29
+ st.write("### Chat History:")
30
+ for q, a in st.session_state.history:
31
+ st.write(f"**Q:** {q}")
32
+ st.write(f"**A:** {a}")
groq_api.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
8
+ MODEL_NAME = "llama3-8b-chat" # Update model as needed
9
+ API_URL = "https://api.groq.com/v1/chat/completions"
10
+
11
+ def ask_groq(user_query, context_text):
12
+ """Send user query and extracted text to Groq API"""
13
+ headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}
14
+
15
+ payload = {
16
+ "model": MODEL_NAME,
17
+ "messages": [
18
+ {"role": "system", "content": "You are an AI that answers questions based on the given document."},
19
+ {"role": "user", "content": f"Document: {context_text[:4000]}\n\nQuestion: {user_query}"}
20
+ ]
21
+ }
22
+
23
+ response = requests.post(API_URL, json=payload, headers=headers)
24
+ return response.json().get("choices", [{}])[0].get("message", {}).get("content", "No response")
readme.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # πŸ“„ Multidoc Chat - Hugging Face Space
2
+
3
+ Multidoc Chat allows users to upload **PDFs and Excel files** and ask questions about the content. It uses:
4
+ - **Groq API** + **Llama model** for answering queries
5
+ - **Streamlit** for a simple and interactive UI
6
+
7
+ ## πŸš€ Features
8
+ - Upload **PDFs & Excel** files
9
+ - Extract text automatically
10
+ - Ask questions & get AI-powered responses
11
+ - Interactive chat history
12
+
13
+ ## πŸ”§ Installation
14
+ 1. **Clone this repository:**
15
+ ```sh
16
+ git clone https://huggingface.co/spaces/akashshahade/multidoc_chat
17
+ cd multidoc_chat
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ groq
3
+ pypdf
4
+ pandas
5
+ openpyxl
6
+ python-dotenv
utils.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import io
3
+ from pypdf import PdfReader
4
+
5
+ def process_uploaded_file(uploaded_file):
6
+ """Extracts text from uploaded PDF or Excel files"""
7
+ if uploaded_file.type == "application/pdf":
8
+ return extract_text_from_pdf(uploaded_file)
9
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
10
+ return extract_text_from_excel(uploaded_file)
11
+ else:
12
+ return "Unsupported file format."
13
+
14
+ def extract_text_from_pdf(pdf_file):
15
+ """Extract text from a PDF"""
16
+ reader = PdfReader(pdf_file)
17
+ text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
18
+ return text
19
+
20
+ def extract_text_from_excel(excel_file):
21
+ """Extract text from an Excel file"""
22
+ df = pd.read_excel(excel_file, sheet_name=None)
23
+ text = ""
24
+ for sheet, data in df.items():
25
+ text += f"\nSheet: {sheet}\n" + data.to_string(index=False)
26
+ return text