Update app.py
Browse files
app.py
CHANGED
@@ -108,16 +108,15 @@ def get_pdf_index():
|
|
108 |
# بارگذاری PDF
|
109 |
loader = PyPDFLoader('test1.pdf')
|
110 |
documents = loader.load_and_split() # اینجا متن PDF را استخراج میکنیم
|
|
|
|
|
|
|
111 |
|
112 |
-
|
113 |
-
model_name = "togethercomputer/m2-bert-80M-8k-retrieval" # نام مدل
|
114 |
-
model = SentenceTransformer(model_name, trust_remote_code=True)
|
115 |
-
|
116 |
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
|
117 |
texts = []
|
118 |
for doc in documents:
|
119 |
-
texts.extend(splitter.split_text(doc.page_content))
|
120 |
-
|
121 |
progress_bar = st.progress(0)
|
122 |
total_docs = len(texts)
|
123 |
|
@@ -133,13 +132,10 @@ def get_pdf_index():
|
|
133 |
time.sleep(1)
|
134 |
progress_bar.empty()
|
135 |
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
# استفاده از VectorstoreIndexCreator
|
141 |
-
return documents, embeddings, index # بازگشت به اسناد و ایندکس
|
142 |
-
|
143 |
|
144 |
# ----------------- تعریف LLM از Groq -----------------
|
145 |
llm = ChatOpenAI(
|
|
|
108 |
# بارگذاری PDF
|
109 |
loader = PyPDFLoader('test1.pdf')
|
110 |
documents = loader.load_and_split() # اینجا متن PDF را استخراج میکنیم
|
111 |
+
model = TogetherEmbeddings(
|
112 |
+
model_name="togethercomputer/m2-bert-80M-8k-retrieval",
|
113 |
+
api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
|
114 |
|
115 |
+
)
|
|
|
|
|
|
|
116 |
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
|
117 |
texts = []
|
118 |
for doc in documents:
|
119 |
+
texts.extend(splitter.split_text(doc.page_content))
|
|
|
120 |
progress_bar = st.progress(0)
|
121 |
total_docs = len(texts)
|
122 |
|
|
|
132 |
time.sleep(1)
|
133 |
progress_bar.empty()
|
134 |
|
135 |
+
return VectorstoreIndexCreator(
|
136 |
+
embedding=embeddings,
|
137 |
+
text_splitter=RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
|
138 |
+
).from_loaders(loader)
|
|
|
|
|
|
|
139 |
|
140 |
# ----------------- تعریف LLM از Groq -----------------
|
141 |
llm = ChatOpenAI(
|