Spaces:
Sleeping
Sleeping
Upload 7 files
Browse files- .env +1 -0
- .gitattributes +2 -35
- app.py +32 -0
- groq_api.py +24 -0
- readme.md +17 -0
- requirements.txt +6 -0
- utils.py +26 -0
.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
GROQ_API_KEY="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"
|
.gitattributes
CHANGED
@@ -1,35 +1,2 @@
|
|
1 |
-
*.
|
2 |
-
*.
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
*.py linguist-language=Python
|
2 |
+
*.txt linguist-language=Text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
+
from utils import process_uploaded_file
|
5 |
+
from groq_api import ask_groq
|
6 |
+
|
7 |
+
st.set_page_config(page_title="Multidoc Chat", layout="wide")
|
8 |
+
|
9 |
+
st.title("π Multidoc Chat - Ask Anything About Your Files")
|
10 |
+
|
11 |
+
uploaded_file = st.file_uploader("Upload PDF or Excel", type=["pdf", "xlsx"])
|
12 |
+
|
13 |
+
if uploaded_file:
|
14 |
+
with st.spinner("Processing file..."):
|
15 |
+
extracted_text = process_uploaded_file(uploaded_file)
|
16 |
+
|
17 |
+
if "history" not in st.session_state:
|
18 |
+
st.session_state.history = []
|
19 |
+
|
20 |
+
st.text_area("Extracted Text Preview", extracted_text[:1000], height=150, disabled=True)
|
21 |
+
|
22 |
+
user_query = st.text_input("Ask something about the document:")
|
23 |
+
|
24 |
+
if st.button("Get Answer") and user_query:
|
25 |
+
response = ask_groq(user_query, extracted_text)
|
26 |
+
st.session_state.history.append((user_query, response))
|
27 |
+
|
28 |
+
if st.session_state.history:
|
29 |
+
st.write("### Chat History:")
|
30 |
+
for q, a in st.session_state.history:
|
31 |
+
st.write(f"**Q:** {q}")
|
32 |
+
st.write(f"**A:** {a}")
|
groq_api.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
8 |
+
MODEL_NAME = "llama3-8b-chat" # Update model as needed
|
9 |
+
API_URL = "https://api.groq.com/v1/chat/completions"
|
10 |
+
|
11 |
+
def ask_groq(user_query, context_text):
|
12 |
+
"""Send user query and extracted text to Groq API"""
|
13 |
+
headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}
|
14 |
+
|
15 |
+
payload = {
|
16 |
+
"model": MODEL_NAME,
|
17 |
+
"messages": [
|
18 |
+
{"role": "system", "content": "You are an AI that answers questions based on the given document."},
|
19 |
+
{"role": "user", "content": f"Document: {context_text[:4000]}\n\nQuestion: {user_query}"}
|
20 |
+
]
|
21 |
+
}
|
22 |
+
|
23 |
+
response = requests.post(API_URL, json=payload, headers=headers)
|
24 |
+
return response.json().get("choices", [{}])[0].get("message", {}).get("content", "No response")
|
readme.md
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# π Multidoc Chat - Hugging Face Space
|
2 |
+
|
3 |
+
Multidoc Chat allows users to upload **PDFs and Excel files** and ask questions about the content. It uses:
|
4 |
+
- **Groq API** + **Llama model** for answering queries
|
5 |
+
- **Streamlit** for a simple and interactive UI
|
6 |
+
|
7 |
+
## π Features
|
8 |
+
- Upload **PDFs & Excel** files
|
9 |
+
- Extract text automatically
|
10 |
+
- Ask questions & get AI-powered responses
|
11 |
+
- Interactive chat history
|
12 |
+
|
13 |
+
## π§ Installation
|
14 |
+
1. **Clone this repository:**
|
15 |
+
```sh
|
16 |
+
git clone https://huggingface.co/spaces/akashshahade/multidoc_chat
|
17 |
+
cd multidoc_chat
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
groq
|
3 |
+
pypdf
|
4 |
+
pandas
|
5 |
+
openpyxl
|
6 |
+
python-dotenv
|
utils.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import io
|
3 |
+
from pypdf import PdfReader
|
4 |
+
|
5 |
+
def process_uploaded_file(uploaded_file):
|
6 |
+
"""Extracts text from uploaded PDF or Excel files"""
|
7 |
+
if uploaded_file.type == "application/pdf":
|
8 |
+
return extract_text_from_pdf(uploaded_file)
|
9 |
+
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
|
10 |
+
return extract_text_from_excel(uploaded_file)
|
11 |
+
else:
|
12 |
+
return "Unsupported file format."
|
13 |
+
|
14 |
+
def extract_text_from_pdf(pdf_file):
|
15 |
+
"""Extract text from a PDF"""
|
16 |
+
reader = PdfReader(pdf_file)
|
17 |
+
text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
|
18 |
+
return text
|
19 |
+
|
20 |
+
def extract_text_from_excel(excel_file):
|
21 |
+
"""Extract text from an Excel file"""
|
22 |
+
df = pd.read_excel(excel_file, sheet_name=None)
|
23 |
+
text = ""
|
24 |
+
for sheet, data in df.items():
|
25 |
+
text += f"\nSheet: {sheet}\n" + data.to_string(index=False)
|
26 |
+
return text
|