M17idd commited on
Commit
06aed9c
·
verified ·
1 Parent(s): b457318

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -118,6 +118,13 @@ class TogetherEmbeddings(Embeddings):
118
  def get_pdf_index():
119
  with st.spinner('📄 در حال پردازش فایل PDF...'):
120
  loader = [PyPDFLoader('test1.pdf')]
 
 
 
 
 
 
 
121
  embeddings = TogetherEmbeddings(
122
  model_name="togethercomputer/m2-bert-80M-8k-retrieval",
123
  api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
@@ -126,10 +133,10 @@ def get_pdf_index():
126
 
127
 
128
  )
129
- return VectorstoreIndexCreator(
130
- embedding=embeddings,
131
- text_splitter=RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
132
- ).from_loaders(loader)
133
 
134
  index = get_pdf_index()
135
 
@@ -168,7 +175,7 @@ if st.session_state.pending_prompt:
168
  thinking = st.empty()
169
  thinking.markdown("🤖 در حال فکر کردن...")
170
 
171
- response = chain.run(f'question:پاسخ را فقط به زبان فارسی جواب بده {st.session_state.pending_prompt}')
172
  answer = response.split("Helpful Answer:")[-1].strip()
173
  if not answer:
174
  answer = "متأسفم، اطلاعات دقیقی در این مورد ندارم."
 
118
  def get_pdf_index():
119
  with st.spinner('📄 در حال پردازش فایل PDF...'):
120
  loader = [PyPDFLoader('test1.pdf')]
121
+ pages = loader.load()
122
+ full_text = "\n".join([page.page_content for page in pages])
123
+ text_splitter = RecursiveCharacterTextSplitter(
124
+ chunk_size=2048,
125
+ chunk_overlap=256
126
+ )
127
+ texts = text_splitter.split_text(full_text)
128
  embeddings = TogetherEmbeddings(
129
  model_name="togethercomputer/m2-bert-80M-8k-retrieval",
130
  api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
 
133
 
134
 
135
  )
136
+
137
+ vectorstore = FAISS.from_texts(texts, embeddings)
138
+
139
+ return vectorstore
140
 
141
  index = get_pdf_index()
142
 
 
175
  thinking = st.empty()
176
  thinking.markdown("🤖 در حال فکر کردن...")
177
 
178
+ response = chain.run(f'لطفاً فقط به زبان فارسی پاسخ بده: {st.session_state.pending_prompt}')
179
  answer = response.split("Helpful Answer:")[-1].strip()
180
  if not answer:
181
  answer = "متأسفم، اطلاعات دقیقی در این مورد ندارم."