Spaces:
Sleeping
Sleeping
import os | |
import time | |
from operator import itemgetter | |
from collections import Counter | |
from langchain.schema.runnable import Runnable, RunnablePassthrough, RunnableLambda | |
from langchain.schema.runnable.config import RunnableConfig | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder | |
from langchain.schema import StrOutputParser | |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain | |
from langchain_pinecone import PineconeVectorStore | |
from pinecone import Pinecone | |
from langchain.memory import ChatMessageHistory, ConversationBufferMemory | |
import pandas as pd | |
import numpy as np | |
from langchain_core.messages import HumanMessage, SystemMessage | |
from langchain_core.prompts.chat import ( | |
ChatPromptTemplate, | |
HumanMessagePromptTemplate, | |
SystemMessagePromptTemplate, | |
) | |
from langchain_openai import ChatOpenAI | |
import chainlit as cl | |
from chainlit.input_widget import TextInput | |
from chainlit import user_session | |
from offres_emploi import Api | |
from offres_emploi.utils import dt_to_str_iso | |
import datetime | |
import bcrypt | |
import json | |
def auth_callback(username: str, password: str): | |
auth = json.loads(os.environ['CHAINLIT_AUTH_LOGIN']) | |
ident = next(d['ident'] for d in auth if d['ident'] == username) | |
pwd = next(d['pwd'] for d in auth if d['ident'] == username) | |
resultLogAdmin = bcrypt.checkpw(username.encode('utf-8'), bcrypt.hashpw(ident.encode('utf-8'), bcrypt.gensalt())) | |
resultPwdAdmin = bcrypt.checkpw(password.encode('utf-8'), bcrypt.hashpw(pwd.encode('utf-8'), bcrypt.gensalt())) | |
resultRole = next(d['role'] for d in auth if d['ident'] == username) | |
if resultLogAdmin and resultPwdAdmin and resultRole == "admindatapcc": | |
return cl.User( | |
identifier=ident + " : 🧑💼 Admin Datapcc", metadata={"role": "admin", "provider": "credentials"} | |
) | |
elif resultLogAdmin and resultPwdAdmin and resultRole == "userdatapcc": | |
return cl.User( | |
identifier=ident + " : 🧑🎓 User Datapcc", metadata={"role": "user", "provider": "credentials"} | |
) | |
os.environ["TOKENIZERS_PARALLELISM"] = os.environ["TOKENIZERS_PARALLELISM"] | |
os.environ['OPENAI_API_KEY'] = os.environ['OPENAI_API_KEY'] | |
def rename(orig_author: str): | |
rename_dict = {"Datapccskillstream": "Datapcc", "ConversationalRetrievalChain": "Assistant conversationnel 💬", "Retriever": "Agent conversationnel", "StuffDocumentsChain": "Chaîne de documents", "LLMChain": "Agent", "ChatOpenAI": "IA🤖"} | |
return rename_dict.get(orig_author, orig_author) | |
async def on_action(action): | |
content = [] | |
content.append(action.value) | |
arrayContent = np.array(content) | |
df = pd.DataFrame(arrayContent) | |
with open('./' + action.description + '.txt', 'wb') as csv_file: | |
df.to_csv(path_or_buf=csv_file, index=False,header=False, encoding='utf-8') | |
elements = [ | |
cl.File( | |
name= action.description + ".txt", | |
path="./" + action.description + ".txt", | |
display="inline", | |
), | |
] | |
await cl.Message( | |
author="Datapcc 🌐🌐🌐", content="[Lien] 🔗", elements=elements | |
).send() | |
await action.remove() | |
async def on_action(action): | |
time.sleep(0.5) | |
track = user_session.get("tracker") | |
await track.remove() | |
async def on_action(action): | |
task_list = cl.TaskList() | |
# Create the TaskList | |
# Create a task and put it in the running state | |
task1 = cl.Task(title="Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data Processing data \n\n Processing data", status=cl.TaskStatus.READY) | |
await task_list.add_task(task1) | |
task2 = cl.Task(title=action.value, status=cl.TaskStatus.READY) | |
await task_list.add_task(task2) | |
# Perform some action on your end | |
await task_list.send() | |
tracking = user_session.set("tracker", task_list) | |
others = [ | |
cl.Action(name="close_button", value="closed", label="Fermer", description="Fermer le volet d'information!") | |
] | |
await cl.Message(author="Datapcc 🌐🌐🌐",content="Fermer le panneau d'information", actions=others).send() | |
def retriever_to_cache(): | |
os.environ['PINECONE_API_KEY'] = os.environ['PINECONE_API_KEY'] | |
os.environ['PINECONE_ENVIRONMENT'] = "us-west4-gcp-free" | |
index_name = os.environ['PINECONE_INDEX_NAME'] | |
time.sleep(5) | |
embeddings = HuggingFaceEmbeddings() | |
time.sleep(5) | |
vectorstore = PineconeVectorStore( | |
index_name=index_name, embedding=embeddings | |
) | |
time.sleep(10) | |
retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": .7, "k": 30,"filter": {'categorie': {'$eq': 'OF'}}}) | |
return retriever | |
async def chat_profile(): | |
return [ | |
cl.ChatProfile(name="OF - Offre de formation",markdown_description="Requêter sur l'offre de formation - OF",icon="./public/favicon.png",), | |
] | |
async def start(): | |
chat_profile = cl.user_session.get("chat_profile") | |
chatProfile = chat_profile.split(' - ') | |
if chatProfile[0] == 'OF': | |
app_user = cl.user_session.get("user") | |
welcomeUser = app_user.identifier | |
welcomeUserArray = welcomeUser.split('@') | |
welcomeUserStr = welcomeUserArray[0].replace('.',' ') | |
await cl.Message(f"> Bonjour {welcomeUserStr}").send() | |
await cl.Message( | |
author="Datapcc 🌐🌐🌐",content=f"✨ Commencez à poser vos questions sur les données \"{chat_profile}\"\n- Création de BCC à partir d'une liste de savoirs ou d'objectifs pédagogiques\n- Création du tableau de la version n°1 de la maquette de formation" | |
).send() | |
from langchain_core.prompts.prompt import PromptTemplate | |
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. | |
Chat History: | |
{chat_history} | |
Follow Up Input: {question} | |
Standalone question:""" | |
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template) | |
########## Chain with streaming ########## | |
message_history = ChatMessageHistory() | |
memory = ConversationBufferMemory( | |
memory_key="chat_history", | |
output_key="answer", | |
chat_memory=message_history, | |
return_messages=True, | |
) | |
streaming_llm = ChatOpenAI( | |
model_name = "gpt-4-1106-preview", | |
streaming=True, | |
temperature=1 | |
) | |
qa = ConversationalRetrievalChain.from_llm( | |
streaming_llm, | |
memory=memory, | |
chain_type="stuff", | |
return_source_documents=True, | |
verbose=False, | |
retriever=retriever_to_cache() | |
) | |
cl.user_session.set("conversation_chain", qa) | |
async def main(message: cl.Message): | |
chat_profile = cl.user_session.get("chat_profile") | |
chatProfile = chat_profile.split(' - ') | |
if chatProfile[0] == "OF": | |
chain = cl.user_session.get("conversation_chain") | |
cb = cl.AsyncLangchainCallbackHandler() | |
res = await chain.acall("Contexte : Réponds à la question suivante de la manière la plus pertinente, la plus exhaustive et la plus détaillée possible, avec au minimum 3000 tokens jusqu'à 4000 tokens, seulement et strictement dans le contexte et les informations fournies. Question : " + message.content, callbacks=[cb]) | |
answer = res["answer"] | |
source_documents = res["source_documents"] | |
text_elements = [] | |
metadatas = '' | |
if source_documents: | |
for source_idx, source_doc in enumerate(source_documents): | |
numSource = source_idx + 1 | |
source_name = f"Source n°{numSource}" | |
text_elements.append( | |
cl.Text(content="Formations : " + source_doc.metadata['ABREGE_LIBELLES'] + " " + source_doc.metadata['INTITULE'] + "\n\nROME : " + source_doc.metadata['CODES_ROME'] + "\nLibellés ROME : " + source_doc.metadata['LIBELLES_ROME'] + "\n\nActivités : " + source_doc.metadata['ACTIVITES_VISEES'].replace('','oe') + "\n\nEmplois accessibles : " + source_doc.metadata['TYPE_EMPLOI_ACCESSIBLES'] + "\n\nCompétences : " + source_doc.metadata['CAPACITES_ATTESTEES'].replace('','oe').replace(' ','oe'), name=source_name) | |
) | |
source_names = [text_el.name for countMetadata, text_el in enumerate(text_elements) if countMetadata < 10] | |
if source_names: | |
metadatas += ', '.join(source_names) | |
else: | |
metadatas += "\n\nPas de source trouvée!" | |
actions = [ | |
cl.Action(name="download", value="Question : " + message.content + "\n\nRéponse : " + answer, description="download_offre_formation") | |
] | |
await cl.Message(author="Datapcc 🌐🌐🌐",content=answer).send() | |
await cl.Message(author="Datapcc 🌐🌐🌐",content="Download", actions=actions).send() | |
if metadatas: | |
await cl.Message(author="Datapcc 🌐🌐🌐",content="Sources : " + metadatas, elements=text_elements).send() |