Spaces:

M17idd
/

army

Running

App Files Files Community

M17idd commited on 4 days ago

Commit

5606c57

verified ·

1 Parent(s): e21f87d

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -55

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ import faiss
 # ----------------- تنظیمات صفحه -----------------
 st.set_page_config(page_title="چت‌ بات توانا", page_icon="🪖", layout="wide")
 st.markdown("""
     <style>
     @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;700&display=swap');
@@ -84,17 +85,6 @@ st.markdown("""
     </style>
 """, unsafe_allow_html=True)
-col1, col2, col3 = st.columns([1, 0.1, 1])
-with col2:
-    st.image("army.png", width=240)
-st.markdown("""
-    <div class="header-text">
-        <h1>چت‌ بات توانا</h1>
-        <div class="subtitle">دستیار هوشمند برای تصمیم‌گیری در میدان نبرد</div>
-    </div>
-""", unsafe_allow_html=True)
 # ----------------- لود PDF و ساخت ایندکس -----------------
 @st.cache_resource
@@ -103,47 +93,23 @@ def get_pdf_index():
         loader = PyPDFLoader('test1.pdf')
         documents = loader.load()
-        splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=128)
         texts = []
         for doc in documents:
             texts.extend(splitter.split_text(doc.page_content))
-        # مدل امبدینگ
-        model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-        # تولید امبدینگ‌ها
         embeddings = model.encode(texts, convert_to_numpy=True)
-        # ساخت ایندکس Faiss
-        index = faiss.IndexFlatL2(embeddings.shape[1])
         index.add(embeddings)
         docs = [{"text": text} for text in texts]
         return docs, embeddings, index, model
-# ----------------- تعریف LLM -----------------
-llm = ChatOpenAI(
-    base_url="https://api.together.xyz/v1",
-    api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979',
-    model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
-)
-# ----------------- تعریف SimpleRetriever -----------------
-class SimpleRetriever(BaseRetriever):
-    documents: List[dict] = Field(...)
-    embeddings: np.ndarray = Field(...)
-    index: faiss.Index
-    model: SentenceTransformer
-    def _get_relevant_documents(self, query: str) -> List[Document]:
-        query_embedding = self.model.encode([query], convert_to_numpy=True)
-        _, indices = self.index.search(query_embedding, 5)
-        results = []
-        for i in indices[0]:
-            results.append(Document(page_content=self.documents[i]['text']))
-        return results
 # ----------------- بارگذاری دیتا -----------------
 documents, embeddings, index, model = get_pdf_index()
@@ -153,22 +119,14 @@ retriever = SimpleRetriever(
     index=index,
     model=model
 )
-custom_prompt = PromptTemplate(
-    input_variables=["context", "question"],
-    template="""
-شما فقط مجاز هستید از اطلاعات زیر پاسخ دهید. اگر اطلاعات لازم برای پاسخ دقیق وجود ندارد، لطفاً تلاش کنید تا نزدیک‌ترین و مفیدترین پاسخ را از اطلاعات موجود پیدا کنید. در صورتی که اطلاعات مورد نظر وجود ندارد، به‌جای دادن جواب نادرست، بگویید که اطلاعات مورد نیاز را ندارید یا نمی‌توانید پاسخ دقیقی بدهید.
-اطلاعات:
-{context}
-سوال:
-{question}
-پاسخ دقیق و کامل بده:
-"""
 )
 # ----------------- ساخت Chain -----------------
 qa_chain = RetrievalQA.from_chain_type(
     llm=llm,
@@ -204,16 +162,20 @@ if st.session_state.pending_prompt:
         thinking.markdown("🤖 در حال فکر کردن...")
         try:
             response = qa_chain.run(st.session_state.pending_prompt)
-            answer = response.strip()
         except Exception as e:
-            answer = f"خطا در پاسخ‌دهی: {str(e)}"
         thinking.empty()
         full_response = ""
         placeholder = st.empty()
-        for word in answer.split():
             full_response += word + " "
             placeholder.markdown(full_response + "▌")
             time.sleep(0.03)

 # ----------------- تنظیمات صفحه -----------------
 st.set_page_config(page_title="چت‌ بات توانا", page_icon="🪖", layout="wide")
+# تنظیمات استایل
 st.markdown("""
     <style>
     @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;700&display=swap');
     </style>
 """, unsafe_allow_html=True)
 # ----------------- لود PDF و ساخت ایندکس -----------------
 @st.cache_resource
         loader = PyPDFLoader('test1.pdf')
         documents = loader.load()
+        splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=256)
         texts = []
         for doc in documents:
             texts.extend(splitter.split_text(doc.page_content))
+        model = SentenceTransformer("togethercomputer/m2-bert-80M-8k-retrieval", trust_remote_code=True)
         embeddings = model.encode(texts, convert_to_numpy=True)
+        index = faiss.IndexIVFFlat(embeddings.shape[1], 100)
+        index.train(embeddings)
         index.add(embeddings)
         docs = [{"text": text} for text in texts]
         return docs, embeddings, index, model
 # ----------------- بارگذاری دیتا -----------------
 documents, embeddings, index, model = get_pdf_index()
     index=index,
     model=model
 )
+# ----------------- تعریف LLM -----------------
+llm = ChatOpenAI(
+    base_url="https://api.together.xyz/v1",
+    api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979',
+    model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
 )
 # ----------------- ساخت Chain -----------------
 qa_chain = RetrievalQA.from_chain_type(
     llm=llm,
         thinking.markdown("🤖 در حال فکر کردن...")
         try:
+            # اگر مدل نتواند پاسخ دقیقی پیدا کند
             response = qa_chain.run(st.session_state.pending_prompt)
+            if not response.strip():  # اگر پاسخ خالی یا بی‌فایده بود
+                response = "متاسفانه اطلاعات دقیقی برای پاسخ به این سوال موجود نیست."
+            else:
+                response = response.strip()
         except Exception as e:
+            response = "متاسفانه اطلاعات لازم برای پاسخ به این سوال موجود نیست."
         thinking.empty()
         full_response = ""
         placeholder = st.empty()
+        for word in response.split():
             full_response += word + " "
             placeholder.markdown(full_response + "▌")
             time.sleep(0.03)