Langchained_PGPS_RAG

Sleeping

App Files Files Community

SergeyO7 commited on Mar 26

Commit

b381fdf

verified ·

1 Parent(s): a8d757c

Create app.py

Browse files

Files changed (1) hide show

app.py +125 -0

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import gradio as gr
+from langchain_community.document_loaders import UnstructuredMarkdownLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_core.documents import Document
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain_community.llms import HuggingFaceHub
+from langchain.prompts import ChatPromptTemplate
+from dotenv import load_dotenv
+import os
+# Загрузка переменных окружения
+load_dotenv()
+DATA_PATH = ""
+PROMPT_TEMPLATE = """
+Ответь на вопрос, используя только следующий контекст:
+{context}
+---
+Ответь на вопрос на основе приведенного контекста: {question}
+"""
+# Глобальная переменная для статуса
+status_message = "Инициализация..."
+def initialize_vectorstore():
+    global status_message
+    try:
+        status_message = "Загрузка и обработка документов..."
+        documents = load_documents()
+        chunks = split_text(documents)
+        status_message = "Создание векторной базы..."
+        vectorstore = save_to_faiss(chunks)
+        status_message = "База данных готова к использованию."
+        return vectorstore
+    except Exception as e:
+        status_message = f"Ошибка инициализации: {str(e)}"
+        raise
+def generate_data_store():
+    documents = load_documents()
+    if documents:
+        chunks = split_text(documents)
+        return save_to_faiss(chunks)
+def load_documents():
+    file_path = os.path.join(DATA_PATH, "pl250320252.md")
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"Файл {file_path} не найден")
+    loader = UnstructuredMarkdownLoader(file_path)
+    return loader.load()
+def split_text(documents: list[Document]):
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=900,
+        chunk_overlap=300,
+        length_function=len,
+        add_start_index=True,
+    )
+    return text_splitter.split_documents(documents)
+def save_to_faiss(chunks: list[Document]):
+    embeddings = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+        model_kwargs={'device': 'cpu'},
+        encode_kwargs={'normalize_embeddings': True}
+    )
+    return FAISS.from_documents(chunks, embeddings)
+def process_query(query_text: str, vectorstore):
+    if vectorstore is None:
+        return "База данных не инициализирована", []
+    try:
+        results = vectorstore.similarity_search_with_relevance_scores(query_text, k=3)
+        global status_message
+        status_message += f"\nНайдено {len(results)} результатов"
+        if not results:
+            return "Не найдено результатов.", []
+        context_text = "\n\n---\n\n".join([
+            f"Релевантность: {score:.2f}\n{doc.page_content}"
+            for doc, score in results
+        ])
+        prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
+        prompt = prompt_template.format(context=context_text, question=query_text)
+        model = HuggingFaceHub(
+            repo_id="google/flan-t5-small",
+            model_kwargs={"temperature": 0.5, "max_length": 512}
+        )
+        response_text = model.predict(prompt)
+        sources = list(set([doc.metadata.get("source", "") for doc, _ in results]))
+        return response_text, sources
+    except Exception as e:
+        return f"Ошибка обработки запроса: {str(e)}", []
+def chat_interface(query_text):
+    global status_message
+    try:
+        vectorstore = initialize_vectorstore()
+        response, sources = process_query(query_text, vectorstore)
+        full_response = f"{status_message}\n\nОтвет: {response}\n\nИсточники: {', '.join(sources) if sources else 'Нет источников'}"
+        return full_response
+    except Exception as e:
+        return f"Критическая ошибка: {str(e)}"
+# Интерфейс Gradio
+interface = gr.Interface(
+    fn=chat_interface,
+    inputs=gr.Textbox(lines=2, placeholder="Введите ваш вопрос здесь..."),
+    outputs="text",
+    title="Чат с документами",
+    description="Задайте вопрос, и я отвечу на основе загруженных документов."
+)
+if __name__ == "__main__":
+    interface.launch()