T-K-O-H commited on
Commit
3076d04
·
1 Parent(s): efde092

Update requirements.txts

Browse files
Files changed (1) hide show
  1. app.py +30 -26
app.py CHANGED
@@ -105,35 +105,39 @@ def extract_text_from_pdf(pdf_file):
105
  # Sidebar for document upload
106
  with st.sidebar:
107
  st.header("Document Management")
108
- uploaded_file = st.file_uploader("Upload a document", type=["txt", "pdf"])
109
  if uploaded_file:
110
  try:
111
- logger.info(f"Processing uploaded file: {uploaded_file.name}")
112
- # Process the document based on file type
113
- if uploaded_file.type == "application/pdf":
114
- text = extract_text_from_pdf(uploaded_file)
115
  else:
116
- # For text files, detect encoding
117
- raw_data = uploaded_file.getvalue()
118
- result = chardet.detect(raw_data)
119
- encoding = result['encoding']
120
- text = raw_data.decode(encoding)
121
-
122
- if not text.strip():
123
- raise ValueError("No text content found in the document")
124
-
125
- # Process text into semantic chunks
126
- chunks = process_text(text)
127
-
128
- if not chunks:
129
- raise ValueError("No valid text chunks could be created from the document")
130
-
131
- # Add to vectorstore
132
- logger.info(f"Adding {len(chunks)} chunks to vectorstore")
133
- vectorstore.add_texts(chunks)
134
-
135
- st.success("Document processed and added to the knowledge base!")
136
- st.info(f"Processed {len(chunks)} chunks of text")
 
 
 
 
 
137
  except Exception as e:
138
  logger.error(f"Error processing document: {str(e)}")
139
  st.error(f"Error processing document: {str(e)}")
 
105
  # Sidebar for document upload
106
  with st.sidebar:
107
  st.header("Document Management")
108
+ uploaded_file = st.file_uploader("Upload a document (max 10MB)", type=["txt", "pdf"])
109
  if uploaded_file:
110
  try:
111
+ # Check file size (10MB = 10 * 1024 * 1024 bytes)
112
+ if uploaded_file.size > 10 * 1024 * 1024:
113
+ st.error("File size exceeds 10MB limit. Please upload a smaller file.")
 
114
  else:
115
+ logger.info(f"Processing uploaded file: {uploaded_file.name}")
116
+ # Process the document based on file type
117
+ if uploaded_file.type == "application/pdf":
118
+ text = extract_text_from_pdf(uploaded_file)
119
+ else:
120
+ # For text files, detect encoding
121
+ raw_data = uploaded_file.getvalue()
122
+ result = chardet.detect(raw_data)
123
+ encoding = result['encoding']
124
+ text = raw_data.decode(encoding)
125
+
126
+ if not text.strip():
127
+ raise ValueError("No text content found in the document")
128
+
129
+ # Process text into semantic chunks
130
+ chunks = process_text(text)
131
+
132
+ if not chunks:
133
+ raise ValueError("No valid text chunks could be created from the document")
134
+
135
+ # Add to vectorstore
136
+ logger.info(f"Adding {len(chunks)} chunks to vectorstore")
137
+ vectorstore.add_texts(chunks)
138
+
139
+ st.success("Document processed and added to the knowledge base!")
140
+ st.info(f"Processed {len(chunks)} chunks of text")
141
  except Exception as e:
142
  logger.error(f"Error processing document: {str(e)}")
143
  st.error(f"Error processing document: {str(e)}")