Spaces:
Sleeping
Sleeping
import os | |
import time | |
from operator import itemgetter | |
from collections import Counter | |
from langchain.schema.runnable import Runnable, RunnablePassthrough, RunnableLambda | |
from langchain.schema.runnable.config import RunnableConfig | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder | |
from langchain.schema import StrOutputParser | |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain | |
from langchain_pinecone import PineconeVectorStore | |
from pinecone import Pinecone | |
from langchain.memory import ChatMessageHistory, ConversationBufferMemory | |
import pandas as pd | |
import numpy as np | |
from langchain_core.messages import HumanMessage, SystemMessage | |
from langchain_core.prompts.chat import ( | |
ChatPromptTemplate, | |
HumanMessagePromptTemplate, | |
SystemMessagePromptTemplate, | |
) | |
from langchain_openai import ChatOpenAI | |
import chainlit as cl | |
from chainlit.input_widget import TextInput | |
from chainlit import user_session | |
from offres_emploi import Api | |
from offres_emploi.utils import dt_to_str_iso | |
import datetime | |
import bcrypt | |
import json | |
def auth_callback(username: str, password: str): | |
auth = json.loads(os.environ['CHAINLIT_AUTH_LOGIN']) | |
ident = next(d['ident'] for d in auth if d['ident'] == username) | |
pwd = next(d['pwd'] for d in auth if d['ident'] == username) | |
resultLogAdmin = bcrypt.checkpw(username.encode('utf-8'), bcrypt.hashpw(ident.encode('utf-8'), bcrypt.gensalt())) | |
resultPwdAdmin = bcrypt.checkpw(password.encode('utf-8'), bcrypt.hashpw(pwd.encode('utf-8'), bcrypt.gensalt())) | |
resultRole = next(d['role'] for d in auth if d['ident'] == username) | |
if resultLogAdmin and resultPwdAdmin and resultRole == "admindatapcc": | |
return cl.User( | |
identifier=ident + " : 🧑💼 Admin Datapcc", metadata={"role": "admin", "provider": "credentials"} | |
) | |
elif resultLogAdmin and resultPwdAdmin and resultRole == "userdatapcc": | |
return cl.User( | |
identifier=ident + " : 🧑🎓 User Datapcc", metadata={"role": "user", "provider": "credentials"} | |
) | |
os.environ["TOKENIZERS_PARALLELISM"] = os.environ["TOKENIZERS_PARALLELISM"] | |
os.environ['OPENAI_API_KEY'] = os.environ['OPENAI_API_KEY'] | |
def rename(orig_author: str): | |
rename_dict = {"DatapccSkillStream": "Datapcc", "ConversationalRetrievalChain": "Assistant conversationnel 💬", "Retriever": "Agent conversationnel", "StuffDocumentsChain": "Chaîne de documents", "LLMChain": "Agent", "ChatOpenAI": "IA🤖"} | |
return rename_dict.get(orig_author, orig_author) | |
async def on_action(action): | |
content = [] | |
content.append(action.value) | |
arrayContent = np.array(content) | |
df = pd.DataFrame(arrayContent) | |
with open('./' + action.description + '.txt', 'wb') as csv_file: | |
df.to_csv(path_or_buf=csv_file, index=False,header=False, encoding='utf-8') | |
elements = [ | |
cl.File( | |
name= action.description + ".txt", | |
path="./" + action.description + ".txt", | |
display="inline", | |
), | |
] | |
await cl.Message( | |
author="Datapcc 🌐🌐🌐", content="[Lien] 🔗", elements=elements | |
).send() | |
await action.remove() | |
def retriever_to_cache(): | |
os.environ['PINECONE_API_KEY'] = os.environ['PINECONE_API_KEY'] | |
os.environ['PINECONE_ENVIRONMENT'] = "us-west4-gcp-free" | |
index_name = os.environ['PINECONE_INDEX_NAME'] | |
embeddings = HuggingFaceEmbeddings() | |
vectorstore = PineconeVectorStore( | |
index_name=index_name, embedding=embeddings | |
) | |
retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": .7, "k": 30,"filter": {'categorie': {'$eq': 'OF'}}}) | |
return retriever | |
async def chat_profile(): | |
return [ | |
cl.ChatProfile(name="OF - Offre de formation",markdown_description="Requêter sur l'offre de formation - OF",icon="./public/favicon.png",), | |
] | |
async def start(): | |
chat_profile = cl.user_session.get("chat_profile") | |
chatProfile = chat_profile.split(' - ') | |
if chatProfile[0] == 'OF': | |
app_user = cl.user_session.get("user") | |
welcomeUser = app_user.identifier | |
welcomeUserArray = welcomeUser.split('@') | |
welcomeUserStr = welcomeUserArray[0].replace('.',' ') | |
await cl.Message(f"> Bonjour {welcomeUserStr}").send() | |
await cl.Message( | |
author="Datapcc 🌐🌐🌐",content=f"✨ Commencez à poser vos questions sur les données \"{chat_profile}\"\n- Création de BCC à partir d'une liste de savoirs ou d'objectifs pédagogiques\n- Création du tableau de la version n°1 de la maquette de formation" | |
).send() | |
from langchain_core.prompts.prompt import PromptTemplate | |
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. | |
Chat History: | |
{chat_history} | |
Follow Up Input: {question} | |
Standalone question:""" | |
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template) | |
########## Chain with streaming ########## | |
message_history = ChatMessageHistory() | |
memory = ConversationBufferMemory( | |
memory_key="chat_history", | |
output_key="answer", | |
chat_memory=message_history, | |
return_messages=True, | |
) | |
streaming_llm = ChatOpenAI( | |
model_name = "gpt-4-1106-preview", | |
streaming=True, | |
temperature=1 | |
) | |
qa = ConversationalRetrievalChain.from_llm( | |
streaming_llm, | |
memory=memory, | |
chain_type="stuff", | |
return_source_documents=True, | |
verbose=False, | |
retriever=retriever_to_cache() | |
) | |
cl.user_session.set("conversation_chain", qa) | |
async def main(message: cl.Message): | |
chat_profile = cl.user_session.get("chat_profile") | |
chatProfile = chat_profile.split(' - ') | |
if chatProfile[0] == "OF": | |
chain = cl.user_session.get("conversation_chain") | |
cb = cl.AsyncLangchainCallbackHandler() | |
res = await chain.acall("Contexte : Réponds à la question suivante de la manière la plus pertinente, la plus exhaustive et la plus détaillée possible, dans le contexte et les informations fournies. Question : " + message.content, callbacks=[cb]) | |
answer = res["answer"] | |
source_documents = res["source_documents"] | |
text_elements = [] | |
metadatas = '' | |
if source_documents: | |
for source_idx, source_doc in enumerate(source_documents): | |
numSource = source_idx + 1 | |
source_name = f"Source n°{numSource}" | |
text_elements.append( | |
cl.Text(content="Formations : " + source_doc.metadata['ABREGE_LIBELLES'] + " " + source_doc.metadata['INTITULE'] + "\n\nROME : " + source_doc.metadata['CODES_ROME'] + "\nLibellés ROME : " + source_doc.metadata['LIBELLES_ROME'] + "\n\nActivités : " + source_doc.metadata['ACTIVITES_VISEES'].replace('','oe') + "\n\nEmplois accessibles : " + source_doc.metadata['TYPE_EMPLOI_ACCESSIBLES'] + "\n\nCompétences : " + source_doc.metadata['CAPACITES_ATTESTEES'].replace('','oe').replace(' ','oe'), name=source_name) | |
) | |
source_names = [text_el.name for countMetadata, text_el in enumerate(text_elements) if countMetadata < 10] | |
if source_names: | |
metadatas += ', '.join(source_names) | |
else: | |
metadatas += "\n\nPas de source trouvée!" | |
actions = [ | |
cl.Action(name="download", value="Question : " + message.content + "\n\nRéponse : " + answer, description="download_offre_formation") | |
] | |
await cl.Message(author="Datapcc 🌐🌐🌐",content=answer).send() | |
await cl.Message(author="Datapcc 🌐🌐🌐",content="Download", actions=actions).send() | |
if metadatas: | |
await cl.Message(author="Datapcc 🌐🌐🌐",content="Sources : " + metadatas, elements=text_elements).send() |