Spaces:

M17idd
/

army

Running

App Files Files Community

M17idd commited on 11 days ago

Commit

2242369

verified ·

1 Parent(s): d9bc83c

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -9

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from typing import List
 from together import Together
 import pandas as pd
 import streamlit as st
 import numpy as np
@@ -191,7 +192,7 @@ st.markdown('<div class="chat-message">👋 سلام! چطور میتونم کم
 # ⚙️ مدل Embedding ساده و سریع
 @st.cache_resource
 def get_embedding_model():
-    return SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
 @st.cache_resource
 def process_csv(csv_file):
@@ -200,7 +201,7 @@ def process_csv(csv_file):
     texts = [text for text in texts if text.strip()]
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=300,
         chunk_overlap=50,
         length_function=len,
         separators=["\n\n", "\n", " ", ""]
@@ -214,9 +215,9 @@ def process_csv(csv_file):
     model = get_embedding_model()
     embeddings = model.encode(split_texts, show_progress_bar=True)
-    # ساخت ایندکس FAISS
     dim = embeddings.shape[1]
-    index = faiss.IndexFlatL2(dim)
     index.add(np.array(embeddings))
     return split_texts, embeddings, index
@@ -253,13 +254,22 @@ if st.session_state.pending_prompt:
         thinking = st.empty()
         thinking.markdown("🤖 در حال جستجو...")
-        # امبد کردن سؤال و جستجو
         model = get_embedding_model()
         query_vector = model.encode([st.session_state.pending_prompt])
-        D, I = index.search(np.array(query_vector), k=3)  # 3 نتیجه نزدیک
-        results = [texts[i] for i in I[0]]
-        response = "🧠 نزدیک‌ترین پاسخ‌ها:\n\n" + "\n\n---\n\n".join(results)
         thinking.empty()
         full_response = ""

 from together import Together
 import pandas as pd
 import streamlit as st
+from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np
 # ⚙️ مدل Embedding ساده و سریع
 @st.cache_resource
 def get_embedding_model():
+    return SentenceTransformer("HooshvareLab/bert-fa-zwnj-base")
 @st.cache_resource
 def process_csv(csv_file):
     texts = [text for text in texts if text.strip()]
     text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=200,
         chunk_overlap=50,
         length_function=len,
         separators=["\n\n", "\n", " ", ""]
     model = get_embedding_model()
     embeddings = model.encode(split_texts, show_progress_bar=True)
     dim = embeddings.shape[1]
+    index = faiss.IndexHNSWFlat(dim, 32)
+    index.hnsw.efSearch = 50
     index.add(np.array(embeddings))
     return split_texts, embeddings, index
         thinking = st.empty()
         thinking.markdown("🤖 در حال جستجو...")
         model = get_embedding_model()
         query_vector = model.encode([st.session_state.pending_prompt])
+        D, I = index.search(np.array(query_vector), k=10)
+        top_indices = I[0]
+        top_texts = [texts[i] for i in top_indices]
+        top_vectors = np.array([embeddings[i] for i in top_indices])
+        similarities = cosine_similarity(query_vector, top_vectors)[0]
+        # پیدا کردن دقیق‌ترین متن
+        best_match_relative_index = np.argmax(similarities)
+        best_match_index = top_indices[best_match_relative_index]
+        best_match_text = texts[best_match_index]
+        response = "🧠 پاسخ سوال :\n\n" .join(best_match_text)
         thinking.empty()
         full_response = ""