Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,8 @@ from pydantic import Field
|
|
13 |
import numpy as np
|
14 |
from sentence_transformers import SentenceTransformer
|
15 |
import faiss
|
|
|
|
|
16 |
|
17 |
# ----------------- تنظیمات صفحه -----------------
|
18 |
st.set_page_config(page_title="چت بات توانا", page_icon="🪖", layout="wide")
|
@@ -103,22 +105,19 @@ def get_pdf_index():
|
|
103 |
loader = PyPDFLoader('test1.pdf')
|
104 |
documents = loader.load()
|
105 |
|
106 |
-
splitter = RecursiveCharacterTextSplitter(chunk_size=
|
107 |
texts = []
|
108 |
for doc in documents:
|
109 |
texts.extend(splitter.split_text(doc.page_content))
|
110 |
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
index = faiss.IndexIVFFlat(embeddings.shape[1], 100)
|
116 |
-
index.train(embeddings)
|
117 |
-
index.add(embeddings)
|
118 |
-
|
119 |
-
docs = [{"text": text} for text in texts]
|
120 |
-
|
121 |
-
return docs, embeddings, index, model
|
122 |
|
123 |
# ----------------- بارگذاری دیتا -----------------
|
124 |
documents, embeddings, index, model = get_pdf_index()
|
|
|
13 |
import numpy as np
|
14 |
from sentence_transformers import SentenceTransformer
|
15 |
import faiss
|
16 |
+
from langchain.indexes import VectorstoreIndexCreator
|
17 |
+
from langchain.vectorstores import FAISS
|
18 |
|
19 |
# ----------------- تنظیمات صفحه -----------------
|
20 |
st.set_page_config(page_title="چت بات توانا", page_icon="🪖", layout="wide")
|
|
|
105 |
loader = PyPDFLoader('test1.pdf')
|
106 |
documents = loader.load()
|
107 |
|
108 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=128)
|
109 |
texts = []
|
110 |
for doc in documents:
|
111 |
texts.extend(splitter.split_text(doc.page_content))
|
112 |
|
113 |
+
vectorstore_index_creator = VectorstoreIndexCreator(
|
114 |
+
vectorstore_cls=FAISS,
|
115 |
+
embedding_function=SentenceTransformer("togethercomputer/m2-bert-80M-8k-retrieval", trust_remote_code=True)
|
116 |
+
)
|
117 |
+
|
118 |
+
index = vectorstore_index_creator.from_documents([Document(page_content=text) for text in texts])
|
119 |
|
120 |
+
return index
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
# ----------------- بارگذاری دیتا -----------------
|
123 |
documents, embeddings, index, model = get_pdf_index()
|