Spaces:

M17idd
/

army

Running

M17idd commited on 19 days ago

Commit

2b9cbaf

verified ·

1 Parent(s): b5be236

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -127,26 +127,35 @@ class HuggingFaceEmbeddings(Embeddings):
 @st.cache_resource
 def get_pdf_index():
     with st.spinner('📄 در حال پردازش فایل PDF...'):
-        # Load the PDF file
         loader = PyPDFLoader('test1.pdf')
         pages = loader.load()
-        # Extract text from each page
-        full_text = "\n".join([page.page_content for page in pages])
         # Split the text into chunks
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=1024,  # Chunk size
             chunk_overlap=128  # Overlap between chunks
         )
         texts = text_splitter.split_text(full_text)
         # Create embeddings
         embeddings = HuggingFaceEmbeddings(model_name="FacebookAI/xlm-roberta-large")
         # Create FAISS vector store
         vector_store = FAISS.from_texts(texts, embeddings)
         return vector_store
 index = get_pdf_index()

 @st.cache_resource
 def get_pdf_index():
     with st.spinner('📄 در حال پردازش فایل PDF...'):
+        # Load the PDF file with chunks
         loader = PyPDFLoader('test1.pdf')
         pages = loader.load()
+        # Instead of loading all pages, process them in batches
+        batch_size = 5  # Processing 5 pages at a time
+        all_texts = []
+        for i in range(0, len(pages), batch_size):
+            batch = pages[i:i + batch_size]
+            batch_text = "\n".join([page.page_content for page in batch])
+            all_texts.append(batch_text)
+        # Combine all texts for further processing
+        full_text = "\n".join(all_texts)
         # Split the text into chunks
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=1024,  # Chunk size
             chunk_overlap=128  # Overlap between chunks
         )
         texts = text_splitter.split_text(full_text)
         # Create embeddings
         embeddings = HuggingFaceEmbeddings(model_name="FacebookAI/xlm-roberta-large")
         # Create FAISS vector store
         vector_store = FAISS.from_texts(texts, embeddings)
         return vector_store
 index = get_pdf_index()