File size: 10,801 Bytes
2a7178c
 
 
 
 
 
 
 
 
 
 
 
 
 
547460d
2a7178c
 
d94788c
 
 
 
 
 
 
2a7178c
 
 
 
 
 
61d1ad7
5ccbefb
d94788c
ed9e96d
2a7178c
 
d94788c
2a7178c
 
 
 
 
 
 
 
74631d0
2a7178c
 
 
 
74631d0
2a7178c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a964a99
2a7178c
 
 
 
1fd02ed
a7da315
9d8afbe
aa6ca59
9d8afbe
a7da315
9d8afbe
 
 
 
a7da315
1fd02ed
 
 
a7da315
96b05a8
1fd02ed
2a7178c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79ef33a
2a7178c
 
 
 
 
 
 
 
 
 
 
 
 
 
2c8b688
2a7178c
2c8b688
 
 
 
 
 
 
5ccbefb
2a7178c
6faecbe
 
 
 
 
 
 
d94788c
2a7178c
d94788c
2a7178c
f8af325
 
ba82520
f8af325
2a7178c
1009eb6
 
2a7178c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79832e2
2a7178c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d94788c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import os
import time
from operator import itemgetter
from collections import Counter
from langchain.schema.runnable import Runnable, RunnablePassthrough, RunnableLambda
from langchain.schema.runnable.config import RunnableConfig
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema import StrOutputParser
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.vectorstores import Pinecone
import pinecone
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
import pandas as pd
import numpy as np
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain_openai import ChatOpenAI
import chainlit as cl
from chainlit.input_widget import TextInput
from chainlit import user_session
from offres_emploi import Api
from offres_emploi.utils import dt_to_str_iso
import datetime

os.environ["TOKENIZERS_PARALLELISM"] = os.environ["TOKENIZERS_PARALLELISM"]
os.environ['OPENAI_API_KEY'] = os.environ['OPENAI_API_KEY']

@cl.author_rename
def rename(orig_author: str):
    rename_dict = {"ConversationalRetrievalChain": "💬 Assistant conversationnel", "Retriever": "Agent conversationnel", "StuffDocumentsChain": "Chaîne de documents", "LLMChain": "Agent", "Chat OpenAI": "🤖 IA"}
    return rename_dict.get(orig_author, orig_author)

@cl.action_callback("download")
async def on_action(action):
    content = []
    content.append(action.value)
    arrayContent = np.array(content)
    df = pd.DataFrame(arrayContent)
    with open('./' + action.description + '.txt', 'wb') as csv_file:
        df.to_csv(path_or_buf=csv_file, index=False,header=False, encoding='utf-8')
    elements = [
        cl.File(
            name= action.description + ".txt",
            path="./" + action.description + ".txt",
            display="inline",
        ),
    ]
    await cl.Message(
        author="🌐🌐🌐", content="[Lien] 🔗", elements=elements
    ).send()
    await action.remove()
    
@cl.action_callback("close_button")
async def on_action(action):
    time.sleep(0.5)
    track = user_session.get("tracker")
    await track.remove()
        
@cl.action_callback("action_button")
async def on_action(action):
    task_list = cl.TaskList()
    # Create the TaskList
    # Create a task and put it in the running state
    task1 = cl.Task(title="Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data \n\n Processing data", status=cl.TaskStatus.READY)
    await task_list.add_task(task1)
    task2 = cl.Task(title=action.value, status=cl.TaskStatus.READY)
    await task_list.add_task(task2)
    # Perform some action on your end
    await task_list.send()
    tracking = user_session.set("tracker", task_list)
    
    others = [
        cl.Action(name="close_button", value="closed", label="Fermer", description="Fermer le volet d'information!")
    ]
    await cl.Message(author="🌐🌐🌐",content="Fermer le panneau d'information", actions=others).send()
    
@cl.cache
def to_cache(file):
    return "https://cipen.univ-gustave-eiffel.fr/fileadmin/CIPEN/datas/assets/docs/" + file + ".csv"

@cl.cache
def retriever_to_cache():
    index_name = os.environ['PINECONE_INDEX_NAME']
    time.sleep(5)
    embeddings = HuggingFaceEmbeddings()
    time.sleep(5)
    pinecone.init(
        api_key=os.environ['PINECONE_API_KEY'],
        environment="us-west4-gcp-free"
    )
    time.sleep(5)
    vectorstore = Pinecone.from_existing_index(
        index_name=index_name, embedding=embeddings
    )
    time.sleep(10)
    retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": .7, "k": 30,"filter": {'categorie': {'$eq': 'OF'}}})
    return retriever

@cl.set_chat_profiles
async def chat_profile():
    return [
        cl.ChatProfile(name="OF - Offre de formation",markdown_description="Requêter sur l'offre de formation - OF",icon="./public/favicon.png",),
        cl.ChatProfile(name="Emplois - En direct de Pole Emploi",markdown_description="Emplois - En direct de Pole Emploi",icon="./public/favicon.png",),
        cl.ChatProfile(name="K1902 - LP MDAI",markdown_description="K1902 - LP MDAI : requête sur les offres d'emploi",icon="./public/favicon.png",),
        cl.ChatProfile(name="M1802-I1401-M1810-M1801-M1805 - Licence Maths-Info",markdown_description="M1802-I1401-M1810-M1801-M1805 - Licence Maths-Info : requête sur les offres d'emploi",icon="./public/favicon.png",),
        cl.ChatProfile(name="K1207-G1202-G1204 - Licence STAPS",markdown_description="K1207-G1202-G1204 - Licence STAPS : requête sur les offres d'emploi",icon="./public/favicon.png",),
    ]
@cl.on_chat_start
async def start():
    chat_profile = cl.user_session.get("chat_profile")
    chatProfile = chat_profile.split(' - ')
    
    if chatProfile[0] == 'OF':
        connexion = cl.TaskList()
        connexion.status = "Running..."

        # Create a task and put it in the running state
        task1 = cl.Task(title="Chargement des données, en attente...", status=cl.TaskStatus.RUNNING)
        await connexion.add_task(task1)
        await connexion.send()
    
    
        logo = [
        cl.Image(name="Logo", size="small", display="inline", path="./public/logo_light.png")
        ]
    
        await cl.Message(author="🌐🌐🌐",content="", elements=logo).send()
        await cl.Message(
            author="🌐🌐🌐",content=f"Commencez à poser vos questions sur les données \"{chat_profile}\"\n\n💡Voici des exemples de question \n\t1️⃣ Basée sur les formations : Quelles sont toutes les formations licences générales?\n\t2️⃣ Basée sur les compétences : Quelles sont les compétences de la licence Economie et gestion?\n\t3️⃣ Basée sur les métiers : Quels sont les métiers possibles de la licence Economie et gestion?\n\t4️⃣ Basée sur un souhait : Quelles formations si je veux travailler dans la vente?\n\t5️⃣ Basée sur un savoir-être : Quelles formations si j'aime travailler en équipe?\n\t6️⃣ Basée sur un état : Quelles formations si je suis créatif?\n\t7️⃣ Question multi-critère : Quelles sont les activités, les compétences et les métiers possibles de la licence Economie et gestion?"
        ).send()
        settings = await cl.ChatSettings(
            [
                TextInput(id="AgentName", label="Renseigner votre code ROME", initial=""),
            ]
        ).send()
        value = settings["AgentName"]
        task1.status = cl.TaskStatus.DONE
        await cl.sleep(0.5)
        await connexion.remove()
    
        if value:
            await cl.Message(author="🌐🌐🌐",content=settings["AgentName"]).send()
    
        from langchain_core.prompts.prompt import PromptTemplate

        _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

                    Chat History:
                    {chat_history}
                    Follow Up Input: {question}
                    Standalone question:"""
        CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
        
        ########## Chain with streaming ##########
        message_history = ChatMessageHistory()
        memory = ConversationBufferMemory(
            memory_key="chat_history",
            output_key="answer",
            chat_memory=message_history,
            return_messages=True,
        )
        streaming_llm = ChatOpenAI(
            streaming=True,
            temperature=1
        )
        qa = ConversationalRetrievalChain.from_llm(
            streaming_llm,
            memory=memory,
            chain_type="stuff",
            return_source_documents=True,
            verbose=False,
            retriever=retriever_to_cache()
        )
        cl.user_session.set("conversation_chain", qa)
    
@cl.on_message
async def main(message: cl.Message):
    chat_profile = cl.user_session.get("chat_profile")
    chatProfile = chat_profile.split(' - ')
    if chatProfile[0] == "OF":
        chain = cl.user_session.get("conversation_chain")
        cb = cl.AsyncLangchainCallbackHandler()
        res = await chain.acall("Contexte : Réponds à la question suivante de la manière la plus pertinente, la plus exhaustive et la plus détaillée possible, avec au minimum 3000 tokens jusqu'à 4000 tokens, seulement et strictement dans le contexte et les informations fournies. Question : " + message.content, callbacks=[cb])
        answer = res["answer"]
        source_documents = res["source_documents"]
    
        text_elements = []
        metadatas = ''
        if source_documents:
            for source_idx, source_doc in enumerate(source_documents):
                numSource = source_idx + 1
                source_name = f"Source n°{numSource}"
                text_elements.append(
                    cl.Text(content="Formations : " + source_doc.metadata['ABREGE_LIBELLES'] + " " + source_doc.metadata['INTITULE'] + "\n\nROME : " + source_doc.metadata['CODES_ROME'] + "\nLibellés ROME : " + source_doc.metadata['LIBELLES_ROME'] + "\n\nActivités : " + source_doc.metadata['ACTIVITES_VISEES'].replace('œ','oe') + "\n\nEmplois accessibles : " + source_doc.metadata['TYPE_EMPLOI_ACCESSIBLES'] + "\n\nCompétences : " + source_doc.metadata['CAPACITES_ATTESTEES'].replace('œ','oe').replace('…','oe'), name=source_name)
                )
            source_names = [text_el.name for countMetadata, text_el in enumerate(text_elements) if countMetadata < 10]
            if source_names:
                metadatas += ', '.join(source_names)
            else:
                metadatas += "\n\nPas de source trouvée!"

        actions = [
            cl.Action(name="download", value="Question : " + message.content + "\n\nRéponse : " + answer, description="download_offre_formation")
        ]

        await cl.Message(author="🌐🌐🌐",content=answer).send()
        await cl.Message(author="🌐🌐🌐",content="Download", actions=actions).send()
    
        if metadatas:
            await cl.Message(author="🌐🌐🌐",content="Sources : " + metadatas, elements=text_elements).send()