Spaces:

M17idd
/

army

Running

App Files Files Community

M17idd commited on 18 days ago

Commit

4bf7119

verified ·

1 Parent(s): 2b9cbaf

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -9

app.py CHANGED Viewed

@@ -127,33 +127,42 @@ class HuggingFaceEmbeddings(Embeddings):
 @st.cache_resource
 def get_pdf_index():
     with st.spinner('📄 در حال پردازش فایل PDF...'):
-        # Load the PDF file with chunks
         loader = PyPDFLoader('test1.pdf')
         pages = loader.load()
-        # Instead of loading all pages, process them in batches
-        batch_size = 5  # Processing 5 pages at a time
         all_texts = []
         for i in range(0, len(pages), batch_size):
             batch = pages[i:i + batch_size]
             batch_text = "\n".join([page.page_content for page in batch])
             all_texts.append(batch_text)
-        # Combine all texts for further processing
         full_text = "\n".join(all_texts)
-        # Split the text into chunks
         text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=1024,  # Chunk size
-            chunk_overlap=128  # Overlap between chunks
         )
         texts = text_splitter.split_text(full_text)
-        # Create embeddings
         embeddings = HuggingFaceEmbeddings(model_name="FacebookAI/xlm-roberta-large")
-        # Create FAISS vector store
         vector_store = FAISS.from_texts(texts, embeddings)
         return vector_store

 @st.cache_resource
 def get_pdf_index():
     with st.spinner('📄 در حال پردازش فایل PDF...'):
+        # بارگذاری فایل PDF
         loader = PyPDFLoader('test1.pdf')
         pages = loader.load()
+        # تقسیم صفحات به دسته‌های ۵ تایی
+        batch_size = 5  # پردازش ۵ صفحه در هر بار
         all_texts = []
+        progress = st.progress(0)  # نوار پیشرفت
+        total_batches = len(pages) // batch_size + (1 if len(pages) % batch_size != 0 else 0)  # تعداد دسته‌ها
+        # پردازش هر دسته
         for i in range(0, len(pages), batch_size):
             batch = pages[i:i + batch_size]
             batch_text = "\n".join([page.page_content for page in batch])
             all_texts.append(batch_text)
+            # به‌روزرسانی نوار پیشرفت
+            progress.progress((i // batch_size + 1) / total_batches)
+            time.sleep(0.5)  # شبیه‌سازی زمان پردازش
+        # ترکیب تمام متن‌ها برای پردازش بیشتر
         full_text = "\n".join(all_texts)
+        # تقسیم متن به بخش‌ها
         text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1024,  # اندازه هر بخش
+            chunk_overlap=128  # هم‌پوشانی بین بخش‌ها
         )
         texts = text_splitter.split_text(full_text)
+        # ایجاد انتشارات
         embeddings = HuggingFaceEmbeddings(model_name="FacebookAI/xlm-roberta-large")
+        # ایجاد FAISS vector store
         vector_store = FAISS.from_texts(texts, embeddings)
         return vector_store