Update app.py
Browse files
app.py
CHANGED
@@ -9,18 +9,10 @@ from langchain.chains import RetrievalQA
|
|
9 |
from langchain.chat_models import ChatOpenAI
|
10 |
from typing import List
|
11 |
from together import Together
|
12 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
13 |
-
from transformers import AutoTokenizer, AutoModel
|
14 |
-
import torch
|
15 |
-
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
|
16 |
|
17 |
|
18 |
-
|
19 |
-
from PIL import Image
|
20 |
-
|
21 |
-
st.set_page_config(page_title="چت بات توانا", page_icon="🪖", layout="wide")
|
22 |
|
23 |
-
# استایل
|
24 |
st.markdown("""
|
25 |
<style>
|
26 |
@import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;700&display=swap');
|
@@ -30,7 +22,7 @@ st.markdown("""
|
|
30 |
text-align: right;
|
31 |
}
|
32 |
.stApp {
|
33 |
-
background: url("military_bg.jpeg") no-repeat center center fixed;
|
34 |
background-size: cover;
|
35 |
backdrop-filter: blur(2px);
|
36 |
}
|
@@ -88,49 +80,47 @@ st.markdown("""
|
|
88 |
</style>
|
89 |
""", unsafe_allow_html=True)
|
90 |
|
91 |
-
# لوگو در وسط با columns
|
92 |
col1, col2, col3 = st.columns([1, 1, 1])
|
93 |
with col2:
|
94 |
-
|
95 |
-
image = Image.open("army.png")
|
96 |
-
st.image(image, width=240)
|
97 |
-
except FileNotFoundError:
|
98 |
-
st.error("📁 فایل 'army.png' پیدا نشد. مطمئن شو کنار فایل اصلی Streamlit هست.")
|
99 |
|
100 |
-
# تیتر
|
101 |
st.markdown("""
|
102 |
<div class="header-text">
|
103 |
-
<h1>چت بات
|
104 |
<div class="subtitle">دستیار هوشمند برای تصمیمگیری در میدان نبرد</div>
|
105 |
</div>
|
106 |
""", unsafe_allow_html=True)
|
107 |
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
114 |
-
self.model = AutoModel.from_pretrained(model_name)
|
115 |
|
116 |
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
117 |
-
embeddings =
|
118 |
-
for
|
119 |
-
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
120 |
-
with torch.no_grad():
|
121 |
-
outputs = self.model(**inputs)
|
122 |
-
embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().tolist())
|
123 |
-
return embeddings
|
124 |
|
125 |
def embed_query(self, text: str) -> List[float]:
|
126 |
return self.embed_documents([text])[0]
|
127 |
|
128 |
@st.cache_resource
|
129 |
-
def get_pdf_index(
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
index = get_pdf_index()
|
135 |
|
136 |
llm = ChatOpenAI(
|
@@ -168,7 +158,7 @@ if st.session_state.pending_prompt:
|
|
168 |
thinking = st.empty()
|
169 |
thinking.markdown("🤖 در حال فکر کردن...")
|
170 |
|
171 |
-
response = chain.run(f'
|
172 |
answer = response.split("Helpful Answer:")[-1].strip()
|
173 |
if not answer:
|
174 |
answer = "متأسفم، اطلاعات دقیقی در این مورد ندارم."
|
|
|
9 |
from langchain.chat_models import ChatOpenAI
|
10 |
from typing import List
|
11 |
from together import Together
|
|
|
|
|
|
|
|
|
12 |
|
13 |
|
14 |
+
st.set_page_config(page_title="چت بات ارتش", page_icon="🪖", layout="wide")
|
|
|
|
|
|
|
15 |
|
|
|
16 |
st.markdown("""
|
17 |
<style>
|
18 |
@import url('https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;700&display=swap');
|
|
|
22 |
text-align: right;
|
23 |
}
|
24 |
.stApp {
|
25 |
+
background: url("./military_bg.jpeg") no-repeat center center fixed;
|
26 |
background-size: cover;
|
27 |
backdrop-filter: blur(2px);
|
28 |
}
|
|
|
80 |
</style>
|
81 |
""", unsafe_allow_html=True)
|
82 |
|
|
|
83 |
col1, col2, col3 = st.columns([1, 1, 1])
|
84 |
with col2:
|
85 |
+
st.image("army.png", width=240)
|
|
|
|
|
|
|
|
|
86 |
|
|
|
87 |
st.markdown("""
|
88 |
<div class="header-text">
|
89 |
+
<h1>چت بات ارتش</h1>
|
90 |
<div class="subtitle">دستیار هوشمند برای تصمیمگیری در میدان نبرد</div>
|
91 |
</div>
|
92 |
""", unsafe_allow_html=True)
|
93 |
|
94 |
|
95 |
+
class TogetherEmbeddings(Embeddings):
|
96 |
+
def __init__(self, model_name: str, api_key: str):
|
97 |
+
self.model_name = model_name
|
98 |
+
self.client = Together(api_key=api_key)
|
|
|
|
|
99 |
|
100 |
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
101 |
+
response = self.client.embeddings.create(model=self.model_name, input=texts)
|
102 |
+
return [item.embedding for item in response.data]
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
def embed_query(self, text: str) -> List[float]:
|
105 |
return self.embed_documents([text])[0]
|
106 |
|
107 |
@st.cache_resource
|
108 |
+
def get_pdf_index():
|
109 |
+
with st.spinner('📄 در حال پردازش فایل PDF...'):
|
110 |
+
loader = [PyPDFLoader('test1.pdf')]
|
111 |
+
embeddings = TogetherEmbeddings(
|
112 |
+
model_name="togethercomputer/m2-bert-80M-8k-retrieval",
|
113 |
+
api_key="0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979"
|
114 |
+
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
)
|
119 |
+
return VectorstoreIndexCreator(
|
120 |
+
embedding=embeddings,
|
121 |
+
text_splitter=RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
|
122 |
+
).from_loaders(loader)
|
123 |
+
|
124 |
index = get_pdf_index()
|
125 |
|
126 |
llm = ChatOpenAI(
|
|
|
158 |
thinking = st.empty()
|
159 |
thinking.markdown("🤖 در حال فکر کردن...")
|
160 |
|
161 |
+
response = chain.run(f'question:پاسخ را فقط به زبان فارسی جواب بده {st.session_state.pending_prompt}')
|
162 |
answer = response.split("Helpful Answer:")[-1].strip()
|
163 |
if not answer:
|
164 |
answer = "متأسفم، اطلاعات دقیقی در این مورد ندارم."
|