M17idd commited on
Commit
a0c39f3
·
verified ·
1 Parent(s): b84e65e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -11
app.py CHANGED
@@ -13,6 +13,8 @@ from pydantic import Field
13
  import numpy as np
14
  from sentence_transformers import SentenceTransformer
15
  import faiss
 
 
16
 
17
  # ----------------- تنظیمات صفحه -----------------
18
  st.set_page_config(page_title="چت‌ بات توانا", page_icon="🪖", layout="wide")
@@ -103,22 +105,19 @@ def get_pdf_index():
103
  loader = PyPDFLoader('test1.pdf')
104
  documents = loader.load()
105
 
106
- splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=256)
107
  texts = []
108
  for doc in documents:
109
  texts.extend(splitter.split_text(doc.page_content))
110
 
111
- model = SentenceTransformer("togethercomputer/m2-bert-80M-8k-retrieval", trust_remote_code=True)
 
 
 
 
 
112
 
113
- embeddings = model.encode(texts, convert_to_numpy=True)
114
-
115
- index = faiss.IndexIVFFlat(embeddings.shape[1], 100)
116
- index.train(embeddings)
117
- index.add(embeddings)
118
-
119
- docs = [{"text": text} for text in texts]
120
-
121
- return docs, embeddings, index, model
122
 
123
  # ----------------- بارگذاری دیتا -----------------
124
  documents, embeddings, index, model = get_pdf_index()
 
13
  import numpy as np
14
  from sentence_transformers import SentenceTransformer
15
  import faiss
16
+ from langchain.indexes import VectorstoreIndexCreator
17
+ from langchain.vectorstores import FAISS
18
 
19
  # ----------------- تنظیمات صفحه -----------------
20
  st.set_page_config(page_title="چت‌ بات توانا", page_icon="🪖", layout="wide")
 
105
  loader = PyPDFLoader('test1.pdf')
106
  documents = loader.load()
107
 
108
+ splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=128)
109
  texts = []
110
  for doc in documents:
111
  texts.extend(splitter.split_text(doc.page_content))
112
 
113
+ vectorstore_index_creator = VectorstoreIndexCreator(
114
+ vectorstore_cls=FAISS,
115
+ embedding_function=SentenceTransformer("togethercomputer/m2-bert-80M-8k-retrieval", trust_remote_code=True)
116
+ )
117
+
118
+ index = vectorstore_index_creator.from_documents([Document(page_content=text) for text in texts])
119
 
120
+ return index
 
 
 
 
 
 
 
 
121
 
122
  # ----------------- بارگذاری دیتا -----------------
123
  documents, embeddings, index, model = get_pdf_index()