Spaces:
Running
Running
T-K-O-H
commited on
Commit
·
3076d04
1
Parent(s):
efde092
Update requirements.txts
Browse files
app.py
CHANGED
@@ -105,35 +105,39 @@ def extract_text_from_pdf(pdf_file):
|
|
105 |
# Sidebar for document upload
|
106 |
with st.sidebar:
|
107 |
st.header("Document Management")
|
108 |
-
uploaded_file = st.file_uploader("Upload a document", type=["txt", "pdf"])
|
109 |
if uploaded_file:
|
110 |
try:
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
text = extract_text_from_pdf(uploaded_file)
|
115 |
else:
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
137 |
except Exception as e:
|
138 |
logger.error(f"Error processing document: {str(e)}")
|
139 |
st.error(f"Error processing document: {str(e)}")
|
|
|
105 |
# Sidebar for document upload
|
106 |
with st.sidebar:
|
107 |
st.header("Document Management")
|
108 |
+
uploaded_file = st.file_uploader("Upload a document (max 10MB)", type=["txt", "pdf"])
|
109 |
if uploaded_file:
|
110 |
try:
|
111 |
+
# Check file size (10MB = 10 * 1024 * 1024 bytes)
|
112 |
+
if uploaded_file.size > 10 * 1024 * 1024:
|
113 |
+
st.error("File size exceeds 10MB limit. Please upload a smaller file.")
|
|
|
114 |
else:
|
115 |
+
logger.info(f"Processing uploaded file: {uploaded_file.name}")
|
116 |
+
# Process the document based on file type
|
117 |
+
if uploaded_file.type == "application/pdf":
|
118 |
+
text = extract_text_from_pdf(uploaded_file)
|
119 |
+
else:
|
120 |
+
# For text files, detect encoding
|
121 |
+
raw_data = uploaded_file.getvalue()
|
122 |
+
result = chardet.detect(raw_data)
|
123 |
+
encoding = result['encoding']
|
124 |
+
text = raw_data.decode(encoding)
|
125 |
+
|
126 |
+
if not text.strip():
|
127 |
+
raise ValueError("No text content found in the document")
|
128 |
+
|
129 |
+
# Process text into semantic chunks
|
130 |
+
chunks = process_text(text)
|
131 |
+
|
132 |
+
if not chunks:
|
133 |
+
raise ValueError("No valid text chunks could be created from the document")
|
134 |
+
|
135 |
+
# Add to vectorstore
|
136 |
+
logger.info(f"Adding {len(chunks)} chunks to vectorstore")
|
137 |
+
vectorstore.add_texts(chunks)
|
138 |
+
|
139 |
+
st.success("Document processed and added to the knowledge base!")
|
140 |
+
st.info(f"Processed {len(chunks)} chunks of text")
|
141 |
except Exception as e:
|
142 |
logger.error(f"Error processing document: {str(e)}")
|
143 |
st.error(f"Error processing document: {str(e)}")
|