Spaces:

stiv14
/

pdf-multilanguage-qa-role

Running

App Files Files Community

stivenDR14 commited on Feb 28

Commit

67f0d18

0 Parent(s):

Initial commit

Browse files

Files changed (6) hide show

.github/workflows/manual.yml +20 -0
.gitignore +14 -0
README.md +141 -0
app.py +246 -0
pdf_processor.py +353 -0
utils.py +190 -0

.github/workflows/manual.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [main]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push https://HF_USERNAME:[email protected]/spaces/stiv14/pdf-multilanguage-qa-role main

.gitignore ADDED Viewed

	@@ -0,0 +1,14 @@

+#ignore env variables
+.env
+#ignore pycache folder and all files in it
+__pycache__
+#ignore chroma_db folder
+chroma_db
+#ignore ollama folder
+ollama
+#ignore llama_index folder
+llama_index

README.md ADDED Viewed

	@@ -0,0 +1,141 @@

+# 🤖 PDF AI Assistant
+A multilingual PDF processing application that leverages various AI models to analyze, summarize, and interact with PDF documents. Built with Python, Gradio, and LangChain.
+## 🌟 Features
+- **Multiple AI Models Support**:
+  - OpenAI GPT-4
+  - IBM Granite 3.1
+  - Mistral Small 24B
+  - SmolLM2 1.7B
+  - Local Ollama models
+- **Multilingual Interface**:
+  - English
+  - Español
+  - Deutsch
+  - Français
+  - Português
+- **Core Functionalities**:
+  - 📝 Text extraction from PDFs
+  - 💬 Interactive Q&A with document content
+  - 📋 Document summarization
+  - 👨‍💼 Customizable specialist advisor
+  - 🔄 Dynamic chunk size and overlap settings
+## 🛠️ Installation
+1. Clone the repository:
+```bash
+git clone <repository-url>
+cd pdf-ai-assistant
+```
+2. Install required dependencies:
+```bash
+pip install -r requirements.txt
+```
+3. Set up environment variables:
+```bash
+# Create .env file
+touch .env
+# Add your API keys (if using)
+WATSONX_APIKEY=your_watsonx_api_key
+WATSONX_PROJECT_ID=your_watsonx_project_id
+```
+## 📦 Dependencies
+- gradio
+- langchain
+- chromadb
+- PyPDF2
+- ollama (for local models)
+- python-dotenv
+- requests
+- ibm-watsonx-ai
+## 🚀 Usage
+1. Start the application:
+```bash
+python app.py
+```
+2. Open your web browser and navigate to the provided URL (usually http://localhost:7860)
+3. Select your preferred:
+   - Language
+   - AI Model
+   - Model Type (Local/API)
+4. Upload a PDF file and process it
+5. Use any of the three main features:
+   - Ask questions about the document
+   - Generate a comprehensive summary
+   - Get specialized analysis using the custom advisor
+## 💡 Features in Detail
+### Q&A System
+- Interactive chat interface
+- Context-aware responses
+- Source page references
+### Summarization
+- Chunk-based processing
+- Configurable chunk sizes
+- Comprehensive document overview
+### Specialist Advisor
+- Customizable expert roles
+- Detailed analysis based on expertise
+- Structured insights and recommendations
+## 🔧 Configuration
+The application supports various AI models:
+- Local models via Ollama
+- API-based models (OpenAI, IBM WatsonX)
+- Hugging Face models
+For Ollama local models, ensure:
+```bash
+ollama pull granite3.1-dense
+ollama pull granite-embedding:278m
+```
+## 🌐 Language Support
+The interface and AI responses are available in:
+- English
+- Spanish
+- German
+- French
+- Portuguese
+## 📝 License
+[MIT License]
+## 🤝 Contributing
+Contributions, issues, and feature requests are welcome!

app.py ADDED Viewed

	@@ -0,0 +1,246 @@

+import gradio as gr
+from pdf_processor import PDFProcessor
+from utils import AI_MODELS, TRANSLATIONS
+class PDFProcessorUI:
+    def __init__(self):
+        self.processor = PDFProcessor()
+        self.current_language = "English"
+        self.current_ai_model = "Huggingface / IBM granite granite 3.1 8b Instruct"
+        self.current_type_model = "Api Key"
+    def change_language(self, language):
+        self.current_language = language
+        self.processor.set_language(language)
+        # Retornamos todos los textos que necesitan ser actualizados
+        return [
+            TRANSLATIONS[language]["title"],
+            gr.update(label=TRANSLATIONS[language]["upload_pdf"]),
+            gr.update(label=TRANSLATIONS[language]["chunk_size"]),
+            gr.update(label=TRANSLATIONS[language]["chunk_overlap"]),
+            gr.update(value=TRANSLATIONS[language]["process_btn"]),
+            gr.update(label=TRANSLATIONS[language]["processing_status"]),
+            gr.update(label=TRANSLATIONS[language]["qa_tab"]),
+            gr.update(label=TRANSLATIONS[language]["summary_tab"]),
+            gr.update(label=TRANSLATIONS[language]["specialist_tab"]),
+            gr.update(label=TRANSLATIONS[language]["mini_summary_title"]),
+            gr.update(label=TRANSLATIONS[language]["mini_analysis_title"]),
+            gr.update(placeholder=TRANSLATIONS[language]["chat_placeholder"]),
+            TRANSLATIONS[language]["chat_title"],
+            gr.update(value=TRANSLATIONS[language]["chat_btn"]),
+            gr.update(value=TRANSLATIONS[language]["generate_summary"]),
+            gr.update(label=TRANSLATIONS[language]["summary_label"]),
+            gr.update(label=TRANSLATIONS[language]["ai_model"]),
+            TRANSLATIONS[language]["specialist_title"],
+            gr.update(label=TRANSLATIONS[language]["specialist_label"]),
+            gr.update(label=TRANSLATIONS[language]["specialist_output"]),
+            gr.update(value=TRANSLATIONS[language]["specialist_btn"])
+        ]
+    def change_ai_model(self, ai_model):
+        self.current_ai_model = ai_model
+        if ai_model == "IBM Granite3.1 dense / Ollama local":
+            return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False, maximum=2048), gr.update(visible=False, maximum=200)
+        elif ai_model == "Open AI / GPT-4o-mini":
+            return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False, maximum=2048), gr.update(visible=False, maximum=200)
+        else:
+            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False, maximum=500), gr.update(visible=False, maximum=100)
+    def change_type_model(self, type_model):
+        self.current_type_model = type_model
+        if type_model == "Api Key":
+            if self.current_ai_model == "IBM Granite3.1 dense / Ollama local":
+                return gr.update(visible=False), gr.update(visible=False)
+            else:
+                return gr.update(visible=True), gr.update(visible=False)
+        else:
+            return gr.update(visible=False), gr.update(visible=False)
+    def process_pdf(self, pdf_file, chunk_size, chunk_overlap, ai_model, type_model, api_key, project_id_watsonx):
+        return self.processor.process_pdf(pdf_file, chunk_size, chunk_overlap, ai_model, type_model, api_key, project_id_watsonx)
+    def qa_interface(self, message, history, ai_model, type_model, api_key, project_id_watsonx):
+        return self.processor.get_qa_response(message, history, ai_model, type_model, api_key, project_id_watsonx)
+    def summarize_interface(self, ai_model, type_model, api_key, project_id_watsonx):
+        return self.processor.get_summary(ai_model, type_model, api_key, project_id_watsonx)
+    def specialist_opinion(self, ai_model, type_model, api_key, project_id_watsonx, specialist_prompt):
+        return self.processor.get_specialist_opinion(ai_model, type_model, api_key, project_id_watsonx, specialist_prompt)
+    def upload_file(files):
+        file_paths = [file.name for file in files]
+        return file_paths[0]
+    def create_ui(self):
+        with gr.Blocks() as demo:
+            title = gr.Markdown(TRANSLATIONS[self.current_language]["title"])
+            with gr.Row():
+                language_dropdown = gr.Dropdown(
+                    choices=list(TRANSLATIONS.keys()),
+                    value=self.current_language,
+                    label="Language/Idioma/Sprache/Langue/Língua",
+                    key="language_dropdown"
+                )
+                ai_model_dropdown = gr.Dropdown(
+                    choices=list(AI_MODELS.keys()),
+                    value=self.current_ai_model,
+                    label= TRANSLATIONS[self.current_language]["ai_model"],
+                    key="ai_model_dropdown"
+                )
+            with gr.Row():
+                with gr.Column():
+                    with gr.Row():
+                        pdf_file = gr.File(
+                            label=TRANSLATIONS[self.current_language]["upload_pdf"],
+                            file_types=[".pdf"]
+                        )
+                        with gr.Column():
+                            type_model=gr.Radio(choices=["Local", "Api Key"], label=TRANSLATIONS[self.current_language]["model_type"], visible=False, value="Api Key")
+                            api_key_input = gr.Textbox(label="Api Key", placeholder=TRANSLATIONS[self.current_language]["api_key_placeholder"], visible=False)
+                            project_id_watsonx = gr.Textbox(label="Project ID", placeholder=TRANSLATIONS[self.current_language]["project_id_placeholder"], visible=False)
+                    chunk_size = gr.Slider(
+                        value=250,
+                        label=TRANSLATIONS[self.current_language]["chunk_size"],
+                        minimum=100,
+                        maximum=500,
+                        step=10,
+                        visible=False
+                    )
+                    chunk_overlap = gr.Slider(
+                        value=25,
+                        label=TRANSLATIONS[self.current_language]["chunk_overlap"],
+                        minimum=10,
+                        maximum=100,
+                        step=5,
+                        visible=False
+                    )
+                    process_btn = gr.Button(
+                        TRANSLATIONS[self.current_language]["process_btn"]
+                    )
+                    process_output = gr.Textbox(
+                        label=TRANSLATIONS[self.current_language]["processing_status"]
+                    )
+            with gr.Tabs() as tabs:
+                qa_tab = gr.Tab(TRANSLATIONS[self.current_language]["qa_tab"])
+                summary_tab = gr.Tab(TRANSLATIONS[self.current_language]["summary_tab"])
+                specialist_tab = gr.Tab(TRANSLATIONS[self.current_language]["specialist_tab"])
+            with qa_tab:
+                chat_title = gr.Markdown(TRANSLATIONS[self.current_language]["chat_title"])
+                chat_placeholder = gr.Textbox(
+                    placeholder=TRANSLATIONS[self.current_language]["chat_placeholder"],
+                    container=False,
+                    show_label=False
+                )
+                chat_btn = gr.Button(TRANSLATIONS[self.current_language]["chat_btn"])
+                chatbot = gr.Markdown(height=400)
+            with summary_tab:
+                with gr.Accordion(TRANSLATIONS[self.current_language]["mini_analysis_title"], open=False, visible=False):
+                    minisummaries_output = gr.Textbox(
+                        label=TRANSLATIONS[self.current_language]["mini_analysis_title"],
+                        lines=10
+                    )
+                summary_output = gr.Textbox(
+                    label=TRANSLATIONS[self.current_language]["summary_label"],
+                    lines=10
+                )
+                summarize_btn = gr.Button(
+                    TRANSLATIONS[self.current_language]["generate_summary"]
+                )
+            with specialist_tab:
+                specialist_title = gr.Markdown(TRANSLATIONS[self.current_language]["specialist_title"])
+                specialist_placeholder = gr.Textbox(
+                    label=TRANSLATIONS[self.current_language]["specialist_label"],
+                    lines=10
+                )
+                with gr.Accordion(TRANSLATIONS[self.current_language]["mini_analysis_title"], open=False, visible=False):
+                    minianalysis_output = gr.Textbox(
+                        label=TRANSLATIONS[self.current_language]["mini_analysis_title"],
+                        lines=10
+                    )
+                specialist_output = gr.Textbox(label=TRANSLATIONS[self.current_language]["specialist_output"], lines=20)
+                specialist_btn = gr.Button(TRANSLATIONS[self.current_language]["specialist_btn"])
+            language_dropdown.change(
+                fn=self.change_language,
+                inputs=[language_dropdown],
+                outputs=[
+                    title,
+                    pdf_file,
+                    chunk_size,
+                    chunk_overlap,
+                    process_btn,
+                    process_output,
+                    qa_tab,
+                    summary_tab,
+                    specialist_tab,
+                    minisummaries_output,
+                    minianalysis_output,
+                    chat_placeholder,
+                    chat_title,
+                    chat_btn,
+                    summarize_btn,
+                    summary_output,
+                    ai_model_dropdown,
+                    specialist_title,
+                    specialist_placeholder,
+                    specialist_output,
+                    specialist_btn
+                ]
+            )
+            ai_model_dropdown.change(
+                fn=self.change_ai_model,
+                inputs=[ai_model_dropdown],
+                outputs=[type_model, api_key_input, project_id_watsonx, chunk_size, chunk_overlap]
+            )
+            type_model.change(
+                fn=self.change_type_model,
+                inputs=[type_model],
+                outputs=[api_key_input,project_id_watsonx]
+            )
+            chat_placeholder.submit(
+                fn=self.qa_interface,
+                inputs=[chat_placeholder, chatbot, ai_model_dropdown, type_model, api_key_input, project_id_watsonx],
+                outputs=[chatbot]
+            )
+            process_btn.click(
+                fn=self.process_pdf,
+                inputs=[pdf_file, chunk_size, chunk_overlap, ai_model_dropdown, type_model, api_key_input, project_id_watsonx],
+                outputs=[process_output]
+            )
+            summarize_btn.click(
+                fn=self.summarize_interface,
+                inputs=[ai_model_dropdown, type_model, api_key_input, project_id_watsonx],
+                outputs=[summary_output]
+            )
+            specialist_btn.click(
+                fn=self.specialist_opinion,
+                inputs=[ai_model_dropdown, type_model, api_key_input, project_id_watsonx, specialist_placeholder],
+                outputs=[specialist_output]
+            )
+            chat_btn.click(
+                fn=self.qa_interface,
+                inputs=[chat_placeholder, chatbot, ai_model_dropdown, type_model, api_key_input, project_id_watsonx],
+                outputs=[chatbot]
+            )
+        return demo
+if __name__ == "__main__":
+    ui = PDFProcessorUI()
+    demo = ui.create_ui()
+    demo.launch()

pdf_processor.py ADDED Viewed

	@@ -0,0 +1,353 @@

+import json
+import tempfile
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_ollama import OllamaEmbeddings
+from langchain_community.vectorstores import Chroma
+from langchain_ollama import OllamaLLM
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames
+from langchain_ibm import WatsonxLLM, WatsonxEmbeddings
+from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
+from ibm_watsonx_ai import APIClient, Credentials
+from utils import AI_MODELS, TRANSLATIONS
+import chromadb
+import requests
+import os
+from dotenv import load_dotenv
+OLLAMA_LLM = "granite3.1-dense"
+OLLAMA_EMBEDDINGS = "granite-embedding:278m"
+load_dotenv()
+api_key_watsonx = os.getenv('WATSONX_APIKEY')
+projectid_watsonx = os.getenv('WATSONX_PROJECT_ID')
+endpoint_watsonx = "https://us-south.ml.cloud.ibm.com"
+def set_up_watsonx():
+    token_watsonx = authenticate_watsonx(api_key_watsonx)
+    if token_watsonx == None:
+        return None
+    parameters = {
+        "max_new_tokens": 1500,
+        "min_new_tokens": 1,
+        "temperature": 0.7,
+        "top_k": 50,
+        "top_p": 1,
+    }
+    embed_params = {
+        EmbedTextParamsMetaNames.TRUNCATE_INPUT_TOKENS: 1,
+        EmbedTextParamsMetaNames.RETURN_OPTIONS: {"input_text": True},
+    }
+    credentials = Credentials(
+        url = endpoint_watsonx,
+        api_key = api_key_watsonx,
+    )
+    client = APIClient(credentials, project_id=projectid_watsonx)
+    client.set_token(token_watsonx)
+    watsonx_llm = WatsonxLLM(
+        model_id="ibm/granite-3-2-8b-instruct",
+        watsonx_client=client,
+        params = parameters
+    )
+    watsonx_embedding = WatsonxEmbeddings(
+        model_id="ibm/granite-embedding-278m-multilingual",
+        url=endpoint_watsonx,
+        project_id=projectid_watsonx,
+        params=embed_params,
+    )
+    return watsonx_llm, watsonx_embedding
+def authenticate_watsonx(api_key):
+    url = "https://iam.cloud.ibm.com/identity/token"
+    headers = {
+        "Content-Type": "application/x-www-form-urlencoded"
+    }
+    data = {
+        "grant_type": "urn:ibm:params:oauth:grant-type:apikey",
+        "apikey": api_key
+    }
+    response = requests.post(url, headers=headers, data=data)
+    if response.status_code == 200:
+        token = response.json().get('access_token')
+        os.environ["WATSONX_TOKEN"] = token
+        return token
+    else:
+        print("Authentication failed. Status code:", response.status_code)
+        print("Response:", response.text)
+        return None
+class PDFProcessor:
+    def __init__(self):
+        self.vectorstore = None
+        self.language = "English"
+    def set_language(self, language):
+        self.language = language
+    def set_llm(self, ai_model, type_model, api_key, project_id_watsonx):
+        if ai_model == "Open AI / GPT-4o-mini":
+            current_llm = ChatOpenAI(
+                    model="gpt-4o",
+                    temperature=0.5,
+                    max_tokens=None,
+                    timeout=None,
+                    max_retries=2,
+                    api_key=api_key,
+            )
+            embeding_model = OpenAIEmbeddings(
+                model="text-embedding-3-small",
+                api_key=api_key,
+            )
+        elif ai_model == "IBM Granite3.1 dense / Ollama local":
+            if type_model == "Local":
+                try:
+                    # Verificar que Ollama está funcionando y el modelo está disponible
+                    current_llm = OllamaLLM(model=OLLAMA_LLM)
+                    # Intenta hacer un embedding de prueba
+                    test_embedding = OllamaEmbeddings(model=OLLAMA_EMBEDDINGS)
+                    test_embedding.embed_query("test")
+                    embeding_model = test_embedding
+                except Exception as e:
+                    print(f"Error with Ollama: {e}")
+                    # Fallback a otro modelo o manejo de error
+                    raise Exception("Please ensure Ollama is running and the models are pulled: \n" +
+                                  f"ollama pull {OLLAMA_LLM}\n" +
+                                  f"ollama pull {OLLAMA_EMBEDDINGS}")
+            else:
+                current_llm, embeding_model = set_up_watsonx()
+        else:
+            current_llm = HuggingFaceEndpoint(
+                repo_id= AI_MODELS[ai_model],
+                temperature=0.5,
+            )
+            embeding_model = HuggingFaceEmbeddings(
+                model_name="ibm-granite/granite-embedding-278m-multilingual",
+            )
+        return current_llm, embeding_model
+    def process_pdf(self, pdf_file, chunk_size, chunk_overlap, ai_model, type_model, api_key, project_id_watsonx):
+        defined_chunk_size = 1000
+        defined_chunk_overlap = 100
+        if (ai_model == "Open AI / GPT-4o-mini" and (api_key == "")) : #or (ai_model == "IBM Granite3.1 dense / Ollama local" and type_model == "Api Key" and (api_key == "" or project_id_watsonx == "")
+            return TRANSLATIONS[self.language]["api_key_required"]
+        if pdf_file is not None:
+                loader = PyPDFLoader(file_path=pdf_file.name)
+                documents = loader.load()
+                #delete empty page_content documents from documents
+                documents = [doc for doc in documents if doc.page_content]
+                if(ai_model == "Open AI / GPT-4o-mini" or ai_model == "IBM Granite3.1 dense / Ollama local"):
+                    if type_model == "Api Key":
+                        text_splitter = RecursiveCharacterTextSplitter(
+                            chunk_size=defined_chunk_size,
+                            chunk_overlap=defined_chunk_overlap,
+                            separators=["\n\n", "\n"]
+                        )
+                    else:
+                        text_splitter = RecursiveCharacterTextSplitter(
+                            chunk_size=defined_chunk_size,
+                            chunk_overlap=defined_chunk_overlap,
+                        )
+                else:
+                    text_splitter = RecursiveCharacterTextSplitter(
+                        chunk_size=defined_chunk_size,
+                        chunk_overlap=defined_chunk_overlap
+                    )
+                #print(text_splitter)
+                texts = text_splitter.split_documents(documents)
+                _, embeddings = self.set_llm(ai_model, type_model, api_key, project_id_watsonx)
+                #delete all documents from the vectorstore
+                if self.vectorstore:
+                    self.vectorstore.delete_collection()
+                new_client = chromadb.EphemeralClient()
+                self.vectorstore = Chroma.from_documents(
+                    documents=texts,
+                    embedding=embeddings,
+                    client=new_client,
+                    collection_name="pdf_collection"
+                    #persist_directory="./chroma_db"
+                )
+                return TRANSLATIONS[self.language]["pdf_processed"] + f" ---- Chunks: {len(self.vectorstore.get()["documents"])}"
+        else:
+            return TRANSLATIONS[self.language]["load_pdf_first"]
+    def get_qa_response(self, message, history, ai_model, type_model, api_key, project_id_watsonx, k=4):
+        current_llm, _ = self.set_llm(ai_model, type_model, api_key, project_id_watsonx)
+        if not self.vectorstore:
+            return TRANSLATIONS[self.language]["load_pdf_first"]
+        retriever = self.vectorstore.as_retriever(search_kwargs={"k": k})
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=current_llm,
+            chain_type="stuff",
+            retriever=retriever,
+            return_source_documents=True,
+        )
+        result = qa_chain.invoke({"query": f"{message}.\n You must answer it in {self.language}. Remember not to mention anything that is not in the text. Do not extend information that is not provided in the text. "})
+        unique_page_labels = {doc.metadata['page_label'] for doc in result["source_documents"]}
+        page_labels_text = " & ".join([f"Page: {page}" for page in sorted(unique_page_labels)])
+        return result["result"] + "\n\nSources: " + page_labels_text
+    def summarizer_by_k_top_n(self, ai_model, type_model, api_key, project_id_watsonx, k, summary_prompt, just_get_documments=False):
+        if not self.vectorstore:
+            return TRANSLATIONS[self.language]["load_pdf_first"]
+        current_llm, _ = self.set_llm(ai_model, type_model, api_key, project_id_watsonx)
+        # Get all documents from the vectorstore
+        retriever = self.vectorstore.as_retriever(search_kwargs={"k": k})
+        documents = retriever.invoke('Summary of the document and key points')
+        if just_get_documments:
+            return  "\n".join([doc.page_content for doc in documents])
+        summary_chain = summary_prompt | current_llm
+        final_summary = summary_chain.invoke({"texts": "\n".join([doc.page_content for doc in documents]), "language": self.language})
+        return final_summary
+        # Get the top k documents by score
+    def get_summary(self, ai_model, type_model, api_key, project_id_watsonx, just_get_documments=False, k=10):
+        final_summary_prompt = PromptTemplate(
+            input_variables=["texts", "language"],
+            template="""
+            Combine the following texts into a cohesive and structured final summary:
+            ------------
+            {texts}
+            ------------
+            The final summary should be between 2 and 4 paragraphs.
+            Preserve the original meaning without adding external information or interpretations.
+            Ensure clarity, logical flow, and coherence between the combined points.
+            The summary must be in {language}.
+            """
+        )
+        return self.summarizer_by_k_top_n(ai_model, type_model, api_key, project_id_watsonx, k, final_summary_prompt, just_get_documments)
+    def get_specialist_opinion(self, ai_model, type_model, api_key, project_id_watsonx, specialist_prompt):
+        questions_prompt = PromptTemplate(
+            input_variables=["text", "specialist_prompt", "language"],
+            template="""
+            * Act as a specialist based on the following instructions and behaviour that you will follow:
+            ------------
+            {specialist_prompt}
+            ------------
+            * Based on your role as specialist, create some different sintetized and concise aspects to ask to the knowledge base of the document about the following text:
+            ------------
+            {text}
+            ------------
+            * The key aspects and questions must be provided in JSON format with the following structure:
+            {{
+                "aspects": [
+                    "Aspect 1",
+                    "Aspect 2",
+                    "Aspect 3",
+                    "Aspect 4",
+                    "Aspect 5",
+                    "Aspect 6",
+                    "Aspect 7",
+                    "Aspect 8",
+                    "Aspect 9",
+                    "Aspect 10",
+                ]
+            }}
+            ------------
+            *Example of valid output:
+            {{
+                "aspects": [
+                    "Finished date of the project",
+                    "Payment of the project",
+                    "Project extension"
+                    ]
+            }}
+            ------------
+            * The aspects must be redacted in the language of {language}.
+            * The given structure must be followed strictly in front of the keys, just use the list of aspects, do not add any other key.
+            * Generate until 10 different aspects.
+            ------------
+            Answer:
+            """
+        )
+        if not self.vectorstore:
+            return TRANSLATIONS[self.language]["load_pdf_first"]
+        current_llm, _ = self.set_llm(ai_model, type_model, api_key, project_id_watsonx)
+        summary_text = self.get_summary(ai_model, type_model, api_key, project_id_watsonx, True, 10)
+        questions_chain = questions_prompt | current_llm
+        questions = questions_chain.invoke({"text": summary_text, "specialist_prompt": specialist_prompt, "language": self.language})
+        print(questions)
+        #clean the questions variable, delete all the text before the json and after the json
+        questions = questions.split("{")[1]
+        questions = questions.split("}")[0]
+        questions = questions.strip()
+        print(questions)
+        questions = json.loads(questions)
+        print(questions)
+        if len(questions["aspects"]) > 15:
+            questions["aspects"] = questions["aspects"][:15]
+        else:
+            questions["aspects"] = questions["aspects"]
+        aspects_text = "\n".join([f"* {aspect}: {self.get_qa_response(aspect, [], ai_model, type_model, api_key, project_id_watsonx, 2)}" for aspect in questions["aspects"]])
+        return aspects_text
+    """ Actúa como un abogado altamente experimentado en derecho civil y contractual.
+    Examina si existen cláusulas abusivas, desproporcionadas o contrarias a la normativa vigente, y explícalas con claridad.
+    Basa tu análisis en principios relevantes del derecho civil y contractual.
+    Ofrece un argumento estructurado y recomendaciones prácticas.
+    Si hay múltiples interpretaciones posibles, preséntalas de manera objetiva.
+    Mantén un tono profesional, preciso y fundamentado.
+    Basado en lo que analices, proporciona una evaluación legal detallada """
+    """ Actúa como un asesor e ingeniero financiero experto en lectura de reportes y análisis de datos.
+    Basado en los datos y conclusiones del reporte, proporciona una evaluación financiera detallada y posibles escenarios tanto negativos como positivos que se puedan presentar.
+    Establece el riesgo que se corre en cada escenario, la probabilidad de ocurrencia de cada uno y la magnitud del impacto en el recurso.
+    Si hay múltiples interpretaciones posibles, preséntalas de manera objetiva.
+    Realiza una hipótesis que pronostique el futuro de la situación o recurso analizado, teniendo en cuenta los datos y conclusiones del reporte.
+    Presenta tus hipotesis en 3 aspectos, corto, mediano y largo plazo.
+    Mantén un tono profesional, preciso y fundamentado.
+    Basado en lo que analices, proporciona una evaluación en detalle sobre los activos, reportes y/o recursos que se analizaron"""

utils.py ADDED Viewed

	@@ -0,0 +1,190 @@

+AI_MODELS = {
+    "Huggingface / IBM granite granite 3.1 8b Instruct": "ibm-granite/granite-3.1-8b-instruct",
+    "Huggingface / Mistral Small 24B Instruct": "mistralai/Mistral-Small-24B-Instruct-2501",
+    "Huggingface / SmolLM2 1.7B Instruct": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
+    "IBM Granite3.1 dense / Ollama local": "ollama",
+    "Open AI / GPT-4o-mini": "openai",
+}
+TRANSLATIONS = {
+    "Español": {
+        "title": "# 📚 Procesador de PDF con QA y Resumen",
+        "api_key_required": "Para usar este modelo, necesitas una clave de API.",
+        "model_type": "Tipo de modelo",
+        "api_key_placeholder": "Ingresa tu clave de API",
+        "project_id_placeholder": "Ingresa tu ID de proyecto",
+        "ai_model": "Modelo AI",
+        "upload_pdf": "Cargar PDF",
+        "upload_images": "Cargar imágenes",
+        "chunk_size": "Tamaño de chunk",
+        "chunk_overlap": "Superposición de chunk",
+        "process_btn": "Procesar",
+        "processing_status": "Estado del procesamiento",
+        "qa_tab": "Preguntas y Respuestas",
+        "summary_tab": "Resumen",
+        "chat_placeholder": "Haz una pregunta sobre el documento...",
+        "chat_title": "Pregunta al documento",
+        "chat_btn": "Preguntar",
+        "generate_summary": "Generar Resumen",
+        "summary_label": "Resumen del documento",
+        "pdf_processed": "PDF procesado y almacenado correctamente",
+        "load_pdf_first": "Por favor, carga un PDF primero.",
+        "map_prompt": """Escribe un resumen conciso del siguiente texto:
+        "{text}"
+        RESUMEN CONCISO:""",
+        "combine_prompt": """Escribe un resumen detallado basado en los siguientes resúmenes de diferentes secciones del texto:
+        "{text}"
+        RESUMEN DETALLADO:""",
+        "mini_summary_title": "Resúmenes de cada fragmento",
+        "mini_analysis_title": "Análisis de cada fragmento",
+        "specialist_tab": "Asesor a tu medida",
+        "specialist_title": "Asesor a tu medida",
+        "specialist_label": "Establece el comportamiento y rol de tu asesor. Ej: Eres un especialista de finanzas que ayuda a interpretar los datos de un reporte financiero. A partir del documento y tu basta experiencia cuéntame que oportunidades y riesgos ves al invertir en lo que te proponen.",
+        "specialist_output": "Respuesta de tu asesor",
+        "specialist_btn": "Generar Respuesta"
+    },
+    "English": {
+        "title": "# 📚 PDF Processor with QA and Summary",
+        "api_key_required": "To use this model, you need an API key.",
+        "model_type": "Model type",
+        "api_key_placeholder": "Enter your API key",
+        "project_id_placeholder": "Enter your project ID",
+        "ai_model": "AI Model",
+        "upload_pdf": "Upload PDF",
+        "upload_images": "Upload Images",
+        "chunk_size": "Chunk size",
+        "chunk_overlap": "Chunk overlap",
+        "process_btn": "Process",
+        "processing_status": "Processing status",
+        "qa_tab": "Questions and Answers",
+        "summary_tab": "Summary",
+        "chat_placeholder": "Ask a question about the document...",
+        "chat_title": "Question to document",
+        "chat_btn": "Ask",
+        "generate_summary": "Generate Summary",
+        "summary_label": "Document summary",
+        "pdf_processed": "PDF processed and stored successfully",
+        "load_pdf_first": "Please load a PDF first.",
+        "map_prompt": """Write a concise summary of the following text:
+        "{text}"
+        CONCISE SUMMARY:""",
+        "combine_prompt": """Write a detailed summary based on the following summaries from different sections of the text:
+        "{text}"
+        DETAILED SUMMARY:""",
+        "mini_summary_title": "Summaries of each fragment",
+        "mini_analysis_title": "Analysis of each fragment",
+        "specialist_tab": "Customized Advisor",
+        "specialist_title": "Customized Advisor",
+        "specialist_label": "Set the behavior and role of your advisor. Example: You are a financial expert who helps interpret the data of a financial report. Based on the document and your extensive experience, tell me what opportunities and risks you see in what they propose.",
+        "specialist_output": "Answer of your advisor",
+        "specialist_btn": "Generate Answer"
+    },
+    "Deutsch": {
+        "title": "# 📚 PDF-Prozessor mit Q&A und Zusammenfassung",
+        "model_type": "Modelltyp",
+        "api_key_required": "Um dieses Modell zu verwenden, benötigen Sie einen API-Schlüssel.",
+        "api_key_placeholder": "API-Schlüssel eingeben",
+        "project_id_placeholder": "Projekt-ID eingeben",
+        "ai_model": "AI-Modell",
+        "upload_pdf": "PDF hochladen",
+        "upload_images": "Bilder hochladen",
+        "chunk_size": "Chunk-Größe",
+        "chunk_overlap": "Chunk-Überlappung",
+        "process_btn": "PDF verarbe",
+        "processing_status": "Verarbeitungsstatus",
+        "qa_tab": "Fragen und Antworten",
+        "summary_tab": "Zusammenfassung",
+        "chat_placeholder": "Stellen Sie eine Frage zum Dokument...",
+        "chat_title": "Frage zum Dokument",
+        "chat_btn": "Fragen",
+        "generate_summary": "Zusammenfassung generieren",
+        "summary_label": "Dokumentzusammenfassung",
+        "pdf_processed": "PDF erfolgreich verarbeitet und gespeichert",
+        "load_pdf_first": "Bitte laden Sie zuerst ein PDF hoch.",
+        "map_prompt": """Schreiben Sie eine kurze Zusammenfassung des folgenden Textes:
+        "{text}"
+        KURZE ZUSAMMENFASSUNG:""",
+        "combine_prompt": """Schreiben Sie eine detaillierte Zusammenfassung basierend auf den folgenden Zusammenfassungen verschiedener Textabschnitte:
+        "{text}"
+        DETAILLIERTE ZUSAMMENFASSUNG:""",
+        "mini_summary_title": "Zusammenfassungen von jedem Fragment",
+        "mini_analysis_title": "Analyse von jedem Fragment",
+        "specialist_tab": "Anpassbarer Berater",
+        "specialist_title": "Anpassbarer Berater",
+        "specialist_label": "Setzen Sie das Verhalten und die Rolle Ihres Beraters fest. Beispiel: Sie sind ein Finanzexperte, der bei der Interpretation von Finanzdaten aus einem Bericht hilft. Basierend auf dem Dokument und Ihrer umfassenden Erfahrung, erzählen Sie mir, was Sie in dem sehen, was sie Ihnen vorschlagen.",
+        "specialist_output": "Antwort Ihres Beraters",
+        "specialist_btn": "Antwort generieren"
+    },
+    "Français": {
+        "title": "# 📚 Processeur PDF avec QR et Résumé",
+        "model_type": "Type de modèle",
+        "api_key_required": "Pour utiliser ce modèle, vous avez besoin d'une clé API.",
+        "api_key_placeholder": "Entrez votre clé API",
+        "project_id_placeholder": "Entrez votre ID de projet",
+        "ai_model": "Modèle AI",
+        "upload_pdf": "Charger PDF",
+        "upload_images": "Charger images",
+        "chunk_size": "Taille du chunk",
+        "chunk_overlap": "Chevauchement du chunk",
+        "process_btn": "Traiter le",
+        "processing_status": "État du traitement",
+        "qa_tab": "Questions et Réponses",
+        "summary_tab": "Résumé",
+        "chat_placeholder": "Posez une question sur le document...",
+        "chat_title": "Question au document",
+        "chat_btn": "Poser une question",
+        "generate_summary": "Générer le résumé",
+        "summary_label": "Résumé du document",
+        "pdf_processed": "PDF traité et enregistré avec succès",
+        "load_pdf_first": "Veuillez d'abord charger un PDF.",
+        "map_prompt": """Écrivez un résumé concis du texte suivant :
+        "{text}"
+        RÉSUMÉ CONCIS :""",
+        "combine_prompt": """Écrivez un résumé détaillé basé sur les résumés suivants de différentes sections du texte :
+        "{text}"
+        RÉSUMÉ DÉTAILLÉ :""",
+        "mini_summary_title": "Résumés de chaque fragment",
+        "mini_analysis_title": "Analyse de chaque fragment",
+        "specialist_tab": "Conseiller personnalisé",
+        "specialist_title": "Conseiller personnalisé",
+        "specialist_label": "Définissez le comportement et le rôle de votre conseiller. Exemple : Vous êtes un expert financier qui aide à interpréter les données d'un rapport financier. Basé sur le document et votre vaste expérience, partagez-moi ce que vous voyez dans ce qu'ils vous proposent.",
+        "specialist_output": "Réponse de votre conseiller",
+        "specialist_btn": "Générer la réponse"
+    },
+    "Português": {
+        "title": "# 📚 Processador de PDF com P&R e Resumo",
+        "model_type": "Tipo de modelo",
+        "api_key_required": "Para usar este modelo, necesitas una clave de API.",
+        "api_key_placeholder": "Digite sua chave API",
+        "project_id_placeholder": "Digite seu ID de projeto",
+        "ai_model": "Modelo AI",
+        "upload_pdf": "Carregar PDF",
+        "upload_images": "Carregar imagens",
+        "chunk_size": "Tamanho do chunk",
+        "chunk_overlap": "Sobreposição do chunk",
+        "process_btn": "Processar",
+        "processing_status": "Status do processamento",
+        "qa_tab": "Perguntas e Respostas",
+        "summary_tab": "Resumo",
+        "chat_placeholder": "Faça uma pergunta sobre o documento...",
+        "chat_title": "Pergunta ao documento",
+        "chat_btn": "Perguntar",
+        "generate_summary": "Gerar Resumo",
+        "summary_label": "Resumo do documento",
+        "pdf_processed": "PDF processado e armazenado com sucesso",
+        "load_pdf_first": "Por favor, carregue um PDF primeiro.",
+        "map_prompt": """Escreva um resumo conciso do seguinte texto:
+        "{text}"
+        RESUMO CONCISO:""",
+        "combine_prompt": """Escreva um resumo detalhado baseado nos seguintes resumos de diferentes seções do texto:
+        "{text}"
+        RESUMO DETALHADO:""",
+        "mini_summary_title": "Resúmenes de cada fragmento",
+        "mini_analysis_title": "Análisis de cada fragmento",
+        "specialist_tab": "Assistente Personalizado",
+        "specialist_title": "Assistente Personalizado",
+        "specialist_label": "Defina o comportamento e o papel do seu assistente. Exemplo: Você é um especialista em finanças que ajuda a interpretar os dados de um relatório financeiro. Com base no documento e em sua ampla experiência, compartilhe comigo o que você vê naquilo que eles lhe propõem.",
+        "specialist_output": "Resposta do seu assistente",
+        "specialist_btn": "Gerar Resposta"
+    }
+}