Update app.py
Browse files
app.py
CHANGED
@@ -6,67 +6,43 @@ from langchain_together import TogetherEmbeddings
|
|
6 |
from langchain.vectorstores import FAISS
|
7 |
from langchain.chat_models import ChatOpenAI
|
8 |
from langchain.chains import RetrievalQA
|
|
|
9 |
|
10 |
-
# --- 📄 ساخت امبدینگها با batch 50 تایی
|
11 |
-
def batch_embed(texts, embeddings_model, batch_size=50):
|
12 |
-
all_embeddings = []
|
13 |
-
for i in range(0, len(texts), batch_size):
|
14 |
-
batch = texts[i:i+batch_size]
|
15 |
-
embs = embeddings_model.embed_documents([doc.page_content for doc in batch])
|
16 |
-
all_embeddings.extend(embs)
|
17 |
-
return all_embeddings
|
18 |
|
19 |
@st.cache_resource
|
20 |
def load_chunks_and_embeddings():
|
21 |
-
|
22 |
-
|
23 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
|
24 |
-
docs = text_splitter.split_documents(pages)
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
vectorstore = None # هنوز نساختیم
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
else:
|
44 |
-
vectorstore.add_embeddings(embeddings_batch, batch_docs)
|
45 |
-
|
46 |
-
progress.progress(min((i+batch_size)/total, 1.0))
|
47 |
-
|
48 |
-
progress.empty()
|
49 |
-
return vectorstore
|
50 |
-
|
51 |
-
# --- 🛠️ آماده کردن دیتابیس
|
52 |
-
with st.spinner("📚 در حال بارگذاری فایل و ساخت امبدینگها... لطفا صبور باشید"):
|
53 |
-
vectorstore = load_chunks_and_embeddings()
|
54 |
-
|
55 |
-
# --- 🤖 آماده سازی مدل LLM
|
56 |
llm = ChatOpenAI(
|
57 |
base_url="https://api.together.xyz/v1",
|
58 |
api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979',
|
59 |
model="meta-llama/Llama-3-70B-Instruct-Turbo-Free"
|
60 |
)
|
61 |
|
62 |
-
|
63 |
|
64 |
chain = RetrievalQA.from_chain_type(
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
)
|
70 |
|
71 |
# --- 💬 چت بات
|
72 |
if 'messages' not in st.session_state:
|
|
|
6 |
from langchain.vectorstores import FAISS
|
7 |
from langchain.chat_models import ChatOpenAI
|
8 |
from langchain.chains import RetrievalQA
|
9 |
+
from langchain.indexes import VectorstoreIndexCreator
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
@st.cache_resource
|
13 |
def load_chunks_and_embeddings():
|
14 |
+
with st.spinner("در حال بارگذاری فایل و آمادهسازی... لطفاً صبور باشید 🙏"):
|
15 |
+
progress_bar = st.progress(0, text="در حال بارگذاری فایل PDF...")
|
|
|
|
|
16 |
|
17 |
+
pdf_loader = PyPDFLoader('test1.pdf')
|
18 |
+
pages = pdf_loader.load()
|
19 |
+
progress_bar.progress(30, text="صفحات PDF بارگذاری شد. در حال ایجاد مدل برداری...")
|
|
|
|
|
20 |
|
21 |
+
embeddings = TogetherEmbeddings(
|
22 |
+
api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
|
23 |
+
)
|
24 |
+
progress_bar.progress(60, text="مدل Embedding ساخته شد. در حال ایجاد ایندکس...")
|
25 |
|
26 |
+
index = VectorstoreIndexCreator(
|
27 |
+
embedding=embeddings,
|
28 |
+
text_splitter=RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
|
29 |
+
).from_loaders([pdf_loader])
|
30 |
|
31 |
+
progress_bar.progress(100, text="بارگذاری کامل شد! ✅")
|
32 |
+
return index
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
llm = ChatOpenAI(
|
34 |
base_url="https://api.together.xyz/v1",
|
35 |
api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979',
|
36 |
model="meta-llama/Llama-3-70B-Instruct-Turbo-Free"
|
37 |
)
|
38 |
|
39 |
+
index = load_chunks_and_embeddings()
|
40 |
|
41 |
chain = RetrievalQA.from_chain_type(
|
42 |
+
llm=llm,
|
43 |
+
chain_type='stuff',
|
44 |
+
retriever=index.vectorstore.as_retriever(), input_key='question')
|
45 |
+
|
|
|
46 |
|
47 |
# --- 💬 چت بات
|
48 |
if 'messages' not in st.session_state:
|