Spaces:

M17idd
/

army

Running

App Files Files Community

M17idd commited on 9 days ago

Commit

dab4d10

verified ·

1 Parent(s): 33a09b2

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -118

app.py CHANGED Viewed

@@ -181,10 +181,12 @@ st.markdown('<div class="chat-message">👋 سلام! چطور میتونم کم
-# ----------------- لود csv و ساخت ایندکس -----------------
-# --- Embedding Class ---
 class TogetherEmbeddings(Embeddings):
     def __init__(self, model_name: str, api_key: str):
         self.model_name = model_name
@@ -202,123 +204,145 @@ class TogetherEmbeddings(Embeddings):
     def embed_query(self, text: str) -> List[float]:
         return self.embed_documents([text])[0]
-# --- Load CSV and Create Index ---
 @st.cache_resource
-def get_csv_index(csv_file):
-    with st.spinner('📄 در حال پردازش فایل CSV...'):
-        df = pd.read_csv(csv_file)
-        texts = df.iloc[:, 0].astype(str).tolist()
-        texts = [text for text in texts if text.strip()]
-        text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=2048,
-            chunk_overlap=256,
-            length_function=len,
-            separators=["\n\n", "\n", " ", ""]
-        )
-        split_texts = []
-        for text in texts:
-            split_texts.extend(text_splitter.split_text(text))
-        documents = [Document(page_content=text) for text in split_texts]
-        embeddings = TogetherEmbeddings(
-            model_name="togethercomputer/m2-bert-80M-32k-retrieval",
-            api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
-        )
-        vectorstore = FAISS.from_documents(documents, embeddings)
-        return vectorstore, embeddings
-# --- Load CSV ---
-csv_file_path = 'output (1).csv'
-try:
-    vectorstore, embedding_model = get_csv_index(csv_file_path)
-except Exception as e:
-    st.error(f"خطا در ساخت ایندکس: {str(e)}")
-    st.stop()
-# --- Load LLM ---
-llm = ChatOpenAI(
-    base_url="https://api.together.xyz/v1",
-    api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979',
-    model="Qwen/Qwen3-235B-A22B-fp8-tput"
-)
-# --- Chat UI ---
-if 'messages' not in st.session_state:
-    st.session_state.messages = []
-if 'pending_prompt' not in st.session_state:
-    st.session_state.pending_prompt = None
-# نمایش پیام‌های قبلی
-for msg in st.session_state.messages:
-    with st.chat_message(msg['role']):
-        st.markdown(f"🗨️ {msg['content']}", unsafe_allow_html=True)
-# ورودی جدید کاربر
-prompt = st.chat_input("چطور می‌تونم کمک کنم؟")
-if prompt:
-    st.session_state.messages.append({'role': 'user', 'content': prompt})
-    st.session_state.pending_prompt = prompt
-    st.rerun()
-# پردازش سوال
-if st.session_state.pending_prompt:
-    with st.chat_message('ai'):
-        thinking = st.empty()
-        thinking.markdown("🤖 در حال فکر کردن...")
-        try:
-            # امبد کردن سوال
-            query = st.session_state.pending_prompt
-            query_embedding = embedding_model.embed_query(query)
-            # بازیابی اسناد مشابه
-            docs = vectorstore.similarity_search_by_vector(query_embedding, k=4)
-            context = "\n".join([doc.page_content for doc in docs])
-            # ساخت پرامپت نهایی برای LLM
-            final_prompt = f"""با توجه به اطلاعات زیر، بهترین جواب رو فقط از داخل این اطلاعات به زبان فارسی بده. لطفاً از خارج از این اطلاعات استفاده نکن و اگر اطلاعات کافی برای جواب دادن نبود، بهترین  پاسخ رو بده.
-🔹 اطلاعات:\n{context}\n\n❓ سؤال: {query}
 """
-            # ارسال به LLM
-            response = llm.invoke(final_prompt)
-            raw_answer = response.content.strip()
-            # فیلتر کردن خطوط غیرپاسخ (مثلاً <think> یا توضیح مدل)
-            answer_lines = raw_answer.split('\n')
-            filtered_lines = [
-                line for line in answer_lines
-                if not line.strip().startswith("<")
-                and not line.strip().lower().startswith("think")
-                and not line.strip().startswith("#")
-                and not line.strip().lower().startswith("note")
-            ]
-            clean_answer = "\n".join(filtered_lines).strip()
-            if not clean_answer:
-                clean_answer = "متأسفم، اطلاعات دقیقی در این مورد ندارم."
-            # تایپ کردن تدریجی پاسخ
-            thinking.empty()
-            full_response = ""
-            placeholder = st.empty()
-            for word in clean_answer.split():
-                full_response += word + " "
-                placeholder.markdown(full_response + "▌")
-                time.sleep(0.03)
-            placeholder.markdown(full_response)
-            st.session_state.messages.append({'role': 'ai', 'content': full_response})
-            st.session_state.pending_prompt = None
-        except Exception as e:
-            thinking.empty()
-            st.error(f"خطا در پاسخ‌دهی: {str(e)}")

+# ----------- تعریف کلاس امبدینگ با Together -----------
 class TogetherEmbeddings(Embeddings):
     def __init__(self, model_name: str, api_key: str):
         self.model_name = model_name
     def embed_query(self, text: str) -> List[float]:
         return self.embed_documents([text])[0]
+# ----------- پردازش و ایندکس کردن CSV -----------
 @st.cache_resource
+def build_vectorstore_from_csv(csv_file_path: str):
+    df = pd.read_csv(csv_file_path)
+    texts = df.iloc[:, 0].astype(str).tolist()
+    texts = [text.strip() for text in texts if text.strip()]
+    # برش متن‌ها
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=2048,
+        chunk_overlap=256,
+        length_function=len,
+        separators=["\n\n", "\n", " ", ""]
+    )
+    split_texts = []
+    for text in texts:
+        split_texts.extend(text_splitter.split_text(text))
+    documents = [Document(page_content=text) for text in split_texts]
+    embeddings = TogetherEmbeddings(
+        model_name="togethercomputer/m2-bert-80M-32k-retrieval",
+        api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
+    )
+    vectorstore = FAISS.from_documents(documents, embeddings)
+    return vectorstore, embeddings
+# ----------- بارگذاری مدل زبانی -----------
+def load_llm():
+    return ChatOpenAI(
+        base_url="https://api.together.xyz/v1",
+        api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979',
+        model="Qwen/Qwen3-235B-A22B-fp8-tput"
+    )
+# ----------- ساخت پرامپت نهایی برای LLM -----------
+def build_prompt(context: str, user_question: str) -> str:
+    return f"""با توجه به اطلاعات زیر، فقط بر اساس آن‌ها به سؤال پاسخ بده. اگر اطلاعات کافی نیست، بگو اطلاعات کافی ندارم.
+🔹 اطلاعات:\n{context}\n\n❓ سؤال: {user_question}
 """
+# ----------- تمیز کردن خروجی مدل از پاسخ‌های اضافی -----------
+def clean_llm_response(response_text: str) -> str:
+    lines = response_text.split('\n')
+    filtered = [
+        line for line in lines
+        if not line.strip().startswith("<")
+        and not line.strip().lower().startswith(("think", "note", "#"))
+    ]
+    return "\n".join(filtered).strip() or "متأسفم، اطلاعات دقیقی در این مورد ندارم."
+# ----------- پردازش سوال و بازیابی پاسخ‌ها -----------
+def process_user_query(query: str, vectorstore, embedding_model, llm):
+    # 1. ساخت embedding از سوال
+    query_embedding = embedding_model.embed_query(query)
+    # 2. پیدا کردن 3 پاسخ مشابه با cosine similarity
+    docs = vectorstore.similarity_search_by_vector(query_embedding, k=3)
+    context = "\n".join([doc.page_content for doc in docs])
+    # 3. ساخت پرامپت نهایی با استفاده از پاسخ‌های مشابه به عنوان context
+    final_prompt = build_prompt(context, query)
+    # 4. ارسال پرامپت به LLM و دریافت پاسخ
+    response = llm.invoke(final_prompt)
+    raw_answer = response.content.strip()
+    # 5. تمیز کردن و نمایش پاسخ نهایی
+    clean_answer = clean_llm_response(raw_answer)
+    return clean_answer
+# ----------- اجرای Streamlit UI -----------
+def run_chat_ui():
+    st.title("💬 دستیار هوشمند متنی بر اساس فایل CSV")
+    # بارگذاری ایندکس
+    csv_file_path = 'output (1).csv'
+    try:
+        vectorstore, embedding_model = build_vectorstore_from_csv(csv_file_path)
+    except Exception as e:
+        st.error(f"خطا در پردازش فایل: {str(e)}")
+        st.stop()
+    llm = load_llm()
+    if 'messages' not in st.session_state:
+        st.session_state.messages = []
+    if 'pending_prompt' not in st.session_state:
+        st.session_state.pending_prompt = None
+    # نمایش پیام‌های قبلی
+    for msg in st.session_state.messages:
+        with st.chat_message(msg['role']):
+            st.markdown(f"{msg['content']}", unsafe_allow_html=True)
+    # گرفتن سوال جدید
+    user_prompt = st.chat_input("سوال خود را وارد کنید...")
+    if user_prompt:
+        st.session_state.messages.append({'role': 'user', 'content': user_prompt})
+        st.session_state.pending_prompt = user_prompt
+        st.rerun()
+    # پردازش سوال
+    if st.session_state.pending_prompt:
+        with st.chat_message("ai"):
+            thinking = st.empty()
+            thinking.markdown("🤖 در حال فکر کردن...")
+            try:
+                # پردازش سوال و دریافت پاسخ نهایی
+                query = st.session_state.pending_prompt
+                clean_answer = process_user_query(query, vectorstore, embedding_model, llm)
+                thinking.empty()
+                full_response = ""
+                placeholder = st.empty()
+                for word in clean_answer.split():
+                    full_response += word + " "
+                    placeholder.markdown(full_response + "▌")
+                    time.sleep(0.03)
+                placeholder.markdown(full_response)
+                st.session_state.messages.append({'role': 'ai', 'content': full_response})
+                st.session_state.pending_prompt = None
+            except Exception as e:
+                thinking.empty()
+                st.error(f"خطا در پردازش مدل: {str(e)}")
+# اجرای برنامه
+if __name__ == "__main__":
+    run_chat_ui()