M17idd commited on
Commit
7a0f03d
·
verified ·
1 Parent(s): f3e4793

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -102
app.py CHANGED
@@ -1,24 +1,12 @@
 
1
  import time
2
- import tiktoken
3
  import streamlit as st
4
- from langchain.document_loaders import PyPDFLoader
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.embeddings.base import Embeddings
7
- from langchain.vectorstores import FAISS
8
- from langchain.indexes import VectorstoreIndexCreator
9
- from langchain.chains import RetrievalQA
10
- from langchain.chat_models import ChatOpenAI
11
- from typing import List
12
- from together import Together
13
 
 
 
14
 
15
- # from langchain.embeddings import TogetherEmbeddings
16
- from langchain.schema import Document as LangchainDocument
17
-
18
-
19
-
20
- st.set_page_config(page_title="چت‌ بات ارتش", page_icon="🪖", layout="wide")
21
-
22
  st.markdown("""
23
  <style>
24
  @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;700&display=swap');
@@ -86,108 +74,38 @@ st.markdown("""
86
  </style>
87
  """, unsafe_allow_html=True)
88
 
 
89
  col1, col2, col3 = st.columns([1, 1, 1])
90
  with col2:
91
  st.image("army.png", width=240)
92
 
93
  st.markdown("""
94
  <div class="header-text">
95
- <h1>چت‌ بات ارتش</h1>
96
- <div class="subtitle">دستیار هوشمند برای تصمیم‌گیری در میدان نبرد</div>
97
  </div>
98
  """, unsafe_allow_html=True)
99
 
 
 
100
 
101
- class TogetherEmbeddings(Embeddings):
102
- def __init__(self, model_name: str, api_key: str):
103
- self.model_name = model_name
104
- self.client = Together(api_key=api_key)
105
-
106
- def embed_documents(self, texts: List[str]) -> List[List[float]]:
107
- response = self.client.embeddings.create(model=self.model_name, input=texts)
108
- return [item.embedding for item in response.data]
109
-
110
- def embed_query(self, text: str) -> List[float]:
111
- return self.embed_documents([text])[0]
112
-
113
-
114
- def count_tokens(text, model_name="gpt-3.5-turbo"):
115
- enc = tiktoken.encoding_for_model(model_name)
116
- return len(enc.encode(text))
117
-
118
- @st.cache_resource
119
- def get_pdf_index():
120
- with st.spinner('📄 در حال پردازش فایل PDF...'):
121
- loader = [PyPDFLoader('test1.pdf')]
122
- pages = []
123
- for l in loader:
124
- pages.extend(l.load())
125
-
126
- splitter_initial = RecursiveCharacterTextSplitter(
127
- chunk_size=124,
128
- chunk_overlap=25
129
- )
130
-
131
- small_chunks = []
132
- for page in pages:
133
- text = page.page_content
134
- if len(text) > 124:
135
- small_chunks.extend(splitter_initial.split_text(text))
136
- else:
137
- small_chunks.append(text)
138
-
139
- final_chunks = []
140
- max_tokens = 128
141
-
142
- for chunk in small_chunks:
143
- token_count = count_tokens(chunk, model_name="gpt-3.5-turbo")
144
- if token_count > max_tokens:
145
- splitter_token_safe = RecursiveCharacterTextSplitter(
146
- chunk_size=128,
147
- chunk_overlap=64
148
- )
149
- smaller_chunks = splitter_token_safe.split_text(chunk)
150
- final_chunks.extend(smaller_chunks)
151
- else:
152
- final_chunks.append(chunk)
153
-
154
- documents = [LangchainDocument(page_content=text) for text in final_chunks]
155
-
156
- embeddings = TogetherEmbeddings(
157
- model_name="togethercomputer/m2-bert-80M-32k-retrieval",
158
- api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
159
- )
160
-
161
- # اینجا دیگه Vectorstore مستقیم میسازیم با FAISS
162
- vectordb = FAISS.from_documents(documents, embedding=embeddings)
163
-
164
- return vectordb
165
-
166
- index = get_pdf_index()
167
-
168
- llm = ChatOpenAI(
169
- base_url="https://api.together.xyz/v1",
170
- api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979',
171
- model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
172
- )
173
-
174
- chain = RetrievalQA.from_chain_type(
175
- llm=llm,
176
- chain_type='stuff',
177
- retriever=index.vectorstore.as_retriever(),
178
- input_key='question'
179
- )
180
 
 
 
 
181
  if 'messages' not in st.session_state:
182
  st.session_state.messages = []
183
 
184
  if 'pending_prompt' not in st.session_state:
185
  st.session_state.pending_prompt = None
186
 
 
187
  for msg in st.session_state.messages:
188
  with st.chat_message(msg['role']):
189
  st.markdown(f"���️ {msg['content']}", unsafe_allow_html=True)
190
 
 
191
  prompt = st.chat_input("چطور می‌تونم کمک کنم؟")
192
 
193
  if prompt:
@@ -195,17 +113,28 @@ if prompt:
195
  st.session_state.pending_prompt = prompt
196
  st.rerun()
197
 
 
198
  if st.session_state.pending_prompt:
199
  with st.chat_message('ai'):
200
  thinking = st.empty()
201
  thinking.markdown("🤖 در حال فکر کردن...")
202
 
203
- response = chain.run(f'question:پاسخ را فقط به زبان فارسی جواب بده {st.session_state.pending_prompt}')
204
- answer = response.split("Helpful Answer:")[-1].strip()
205
- if not answer:
206
- answer = "متأسفم، اطلاعات دقیقی در این مورد ندارم."
 
 
 
 
 
 
 
 
207
 
208
  thinking.empty()
 
 
209
  full_response = ""
210
  placeholder = st.empty()
211
  for word in answer.split():
 
1
+ import os
2
  import time
 
3
  import streamlit as st
4
+ from groq import Groq
 
 
 
 
 
 
 
 
5
 
6
+ # ----------------- تنظیمات صفحه -----------------
7
+ st.set_page_config(page_title="چت‌بات ارتش - Powered by Groq", page_icon="🪖", layout="wide")
8
 
9
+ # استایل فارسی و بک‌گراند
 
 
 
 
 
 
10
  st.markdown("""
11
  <style>
12
  @import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;700&display=swap');
 
74
  </style>
75
  """, unsafe_allow_html=True)
76
 
77
+ # ----------------- لوگو و عنوان -----------------
78
  col1, col2, col3 = st.columns([1, 1, 1])
79
  with col2:
80
  st.image("army.png", width=240)
81
 
82
  st.markdown("""
83
  <div class="header-text">
84
+ <h1>چت‌بات ارتش</h1>
85
+ <div class="subtitle">دستیار هوشمند میدان نبرد - Powered by Groq</div>
86
  </div>
87
  """, unsafe_allow_html=True)
88
 
89
+ # ----------------- اتصال به Groq -----------------
90
+ api_key = "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"
91
 
92
+ client = Groq(api_key=api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
+ selected_model = "llama3-70b-8192" # بهترین مدل Groq
95
+
96
+ # ----------------- استیت ذخیره‌ی پیام‌ها -----------------
97
  if 'messages' not in st.session_state:
98
  st.session_state.messages = []
99
 
100
  if 'pending_prompt' not in st.session_state:
101
  st.session_state.pending_prompt = None
102
 
103
+ # ----------------- نمایش پیام‌های قبلی -----------------
104
  for msg in st.session_state.messages:
105
  with st.chat_message(msg['role']):
106
  st.markdown(f"���️ {msg['content']}", unsafe_allow_html=True)
107
 
108
+ # ----------------- ورودی چت -----------------
109
  prompt = st.chat_input("چطور می‌تونم کمک کنم؟")
110
 
111
  if prompt:
 
113
  st.session_state.pending_prompt = prompt
114
  st.rerun()
115
 
116
+ # ----------------- پاسخ دادن مدل -----------------
117
  if st.session_state.pending_prompt:
118
  with st.chat_message('ai'):
119
  thinking = st.empty()
120
  thinking.markdown("🤖 در حال فکر کردن...")
121
 
122
+ try:
123
+ chat_completion = client.chat.completions.create(
124
+ messages=[
125
+ {"role": "system", "content": "پاسخ را همیشه رسمی و فارسی بده."},
126
+ {"role": "user", "content": st.session_state.pending_prompt}
127
+ ],
128
+ model=selected_model,
129
+ )
130
+ answer = chat_completion.choices[0].message.content.strip()
131
+
132
+ except Exception as e:
133
+ answer = f"خطا در پاسخ‌دهی: {str(e)}"
134
 
135
  thinking.empty()
136
+
137
+ # انیمیشن تایپ پاسخ
138
  full_response = ""
139
  placeholder = st.empty()
140
  for word in answer.split():