|
import streamlit as st |
|
from hazm import Normalizer, SentenceTokenizer |
|
import os |
|
import docx |
|
from langchain.chat_models import ChatOpenAI |
|
|
|
|
|
llm = ChatOpenAI( |
|
base_url="https://api.together.xyz/v1", |
|
api_key='0291f33aee03412a47fa5d8e562e515182dcc5d9aac5a7fb5eefdd1759005979', |
|
model="deepseek-ai/DeepSeek-R1" |
|
) |
|
|
|
folder_path = '46' |
|
texts = [] |
|
|
|
for filename in os.listdir(folder_path): |
|
if filename.endswith(".docx"): |
|
full_path = os.path.join(folder_path, filename) |
|
doc = docx.Document(full_path) |
|
file_text = "\n".join([para.text for para in doc.paragraphs]) |
|
if file_text.strip(): |
|
texts.append(file_text) |
|
|
|
normalizer = Normalizer() |
|
sentence_tokenizer = SentenceTokenizer() |
|
|
|
all_sentences = [] |
|
for text in texts: |
|
normalized = normalizer.normalize(text) |
|
sentences = sentence_tokenizer.tokenize(normalized) |
|
all_sentences.extend(sentences) |
|
|
|
query = st.text_input("🔎 کلمه یا عبارت موردنظر خود را وارد کنید:") |
|
|
|
|
|
if query: |
|
found = False |
|
for idx, sentence in enumerate(all_sentences): |
|
if query in sentence: |
|
|
|
next_sentences = [] |
|
for i in range(1, 6): |
|
if idx + i < len(all_sentences): |
|
next_sentences.append(all_sentences[idx + i]) |
|
|
|
|
|
total_text = sentence + " " + " ".join(next_sentences) |
|
prompt = f"پاسخی که باید بازنویسی شود:\n{total_text}\n\nلطفاً این پاسخ را با در نظر گرفتن محتوای سوال زیر و لحن آن بازنویسی کن:\n\nسوال: {query}" |
|
|
|
|
|
response = llm([{"role": "system", "content": "You are a helpful assistant."}, |
|
{"role": "user", "content": prompt}]) |
|
|
|
rewritten = response.strip() |
|
st.markdown("🎨 **بازنویسی شده با LLM:**") |
|
st.write(rewritten) |
|
st.warning("پاسخ دریافتی معتبر نیست.") |
|
|
|
found = True |
|
break |
|
|
|
if not found: |
|
st.warning("عبارت موردنظر در متن یافت نشد.") |
|
|