copilotdatast

Sleeping

App Files Files Community

copilotdatast / main.py

Ludovicollin

Update main.py

2d08e00 verified about 1 year ago

raw

history blame

8.46 kB

	import os
	import time
	from operator import itemgetter
	from collections import Counter
	from langchain.schema.runnable import Runnable, RunnablePassthrough, RunnableLambda
	from langchain.schema.runnable.config import RunnableConfig
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.chains import ConversationalRetrievalChain
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
	from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain.schema import StrOutputParser
	from langchain.chains.qa_with_sources import load_qa_with_sources_chain
	from langchain_pinecone import PineconeVectorStore

	from pinecone import Pinecone
	from langchain.memory import ChatMessageHistory, ConversationBufferMemory
	import pandas as pd
	import numpy as np
	from langchain_core.messages import HumanMessage, SystemMessage
	from langchain_core.prompts.chat import (
	ChatPromptTemplate,
	HumanMessagePromptTemplate,
	SystemMessagePromptTemplate,
	)
	from langchain_openai import ChatOpenAI
	import chainlit as cl
	from chainlit.input_widget import TextInput
	from chainlit import user_session
	from offres_emploi import Api
	from offres_emploi.utils import dt_to_str_iso
	import datetime
	import bcrypt
	import json

	@cl.password_auth_callback
	def auth_callback(username: str, password: str):
	auth = json.loads(os.environ['CHAINLIT_AUTH_LOGIN'])
	ident = next(d['ident'] for d in auth if d['ident'] == username)
	pwd = next(d['pwd'] for d in auth if d['ident'] == username)
	resultLogAdmin = bcrypt.checkpw(username.encode('utf-8'), bcrypt.hashpw(ident.encode('utf-8'), bcrypt.gensalt()))
	resultPwdAdmin = bcrypt.checkpw(password.encode('utf-8'), bcrypt.hashpw(pwd.encode('utf-8'), bcrypt.gensalt()))
	resultRole = next(d['role'] for d in auth if d['ident'] == username)
	if resultLogAdmin and resultPwdAdmin and resultRole == "admindatapcc":
	return cl.User(
	identifier=ident + " : 🧑‍💼 Admin Datapcc", metadata={"role": "admin", "provider": "credentials"}
	)
	elif resultLogAdmin and resultPwdAdmin and resultRole == "userdatapcc":
	return cl.User(
	identifier=ident + " : 🧑‍🎓 User Datapcc", metadata={"role": "user", "provider": "credentials"}
	)

	os.environ["TOKENIZERS_PARALLELISM"] = os.environ["TOKENIZERS_PARALLELISM"]
	os.environ['OPENAI_API_KEY'] = os.environ['OPENAI_API_KEY']

	@cl.author_rename
	def rename(orig_author: str):
	rename_dict = {"DatapccSkillStream": "Datapcc", "ConversationalRetrievalChain": "Assistant conversationnel 💬", "Retriever": "Agent conversationnel", "StuffDocumentsChain": "Chaîne de documents", "LLMChain": "Agent", "ChatOpenAI": "IA🤖"}
	return rename_dict.get(orig_author, orig_author)

	@cl.action_callback("download")
	async def on_action(action):
	content = []
	content.append(action.value)
	arrayContent = np.array(content)
	df = pd.DataFrame(arrayContent)
	with open('./' + action.description + '.txt', 'wb') as csv_file:
	df.to_csv(path_or_buf=csv_file, index=False,header=False, encoding='utf-8')
	elements = [
	cl.File(
	name= action.description + ".txt",
	path="./" + action.description + ".txt",
	display="inline",
	),
	]
	await cl.Message(
	author="Datapcc 🌐🌐🌐", content="[Lien] 🔗", elements=elements
	).send()
	await action.remove()

	def retriever_to_cache():
	os.environ['PINECONE_API_KEY'] = os.environ['PINECONE_API_KEY']
	os.environ['PINECONE_ENVIRONMENT'] = "us-west4-gcp-free"
	index_name = os.environ['PINECONE_INDEX_NAME']
	embeddings = HuggingFaceEmbeddings()
	vectorstore = PineconeVectorStore(
	index_name=index_name, embedding=embeddings
	)
	retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": .7, "k": 30,"filter": {'categorie': {'$eq': 'OF'}}})
	return retriever

	@cl.set_chat_profiles
	async def chat_profile():
	return [
	cl.ChatProfile(name="OF - Offre de formation",markdown_description="Requêter sur l'offre de formation - OF",icon="./public/favicon.png",),
	]
	@cl.on_chat_start
	async def start():
	chat_profile = cl.user_session.get("chat_profile")
	chatProfile = chat_profile.split(' - ')

	if chatProfile[0] == 'OF':
	app_user = cl.user_session.get("user")
	welcomeUser = app_user.identifier
	welcomeUserArray = welcomeUser.split('@')
	welcomeUserStr = welcomeUserArray[0].replace('.',' ')
	await cl.Message(f"> Bonjour {welcomeUserStr}").send()
	await cl.Message(
	author="Datapcc 🌐🌐🌐",content=f"✨ Commencez à poser vos questions sur les données \"{chat_profile}\"\n- Création de BCC à partir d'une liste de savoirs ou d'objectifs pédagogiques\n- Création du tableau de la version n°1 de la maquette de formation"
	).send()

	from langchain_core.prompts.prompt import PromptTemplate

	_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

	Chat History:
	{chat_history}
	Follow Up Input: {question}
	Standalone question:"""
	CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

	########## Chain with streaming ##########
	message_history = ChatMessageHistory()
	memory = ConversationBufferMemory(
	memory_key="chat_history",
	output_key="answer",
	chat_memory=message_history,
	return_messages=True,
	)
	streaming_llm = ChatOpenAI(
	model_name = "gpt-4-1106-preview",
	streaming=True,
	temperature=1
	)
	qa = ConversationalRetrievalChain.from_llm(
	streaming_llm,
	memory=memory,
	chain_type="stuff",
	return_source_documents=True,
	verbose=False,
	retriever=retriever_to_cache()
	)
	cl.user_session.set("conversation_chain", qa)

	@cl.on_message
	async def main(message: cl.Message):
	chat_profile = cl.user_session.get("chat_profile")
	chatProfile = chat_profile.split(' - ')
	if chatProfile[0] == "OF":
	chain = cl.user_session.get("conversation_chain")
	cb = cl.AsyncLangchainCallbackHandler()
	res = await chain.acall("Contexte : Réponds à la question suivante de la manière la plus pertinente, la plus exhaustive et la plus détaillée possible, dans le contexte et les informations fournies. Question : " + message.content, callbacks=[cb])
	answer = res["answer"]
	source_documents = res["source_documents"]

	text_elements = []
	metadatas = ''
	if source_documents:
	for source_idx, source_doc in enumerate(source_documents):
	numSource = source_idx + 1
	source_name = f"Source n°{numSource}"
	text_elements.append(
	cl.Text(content="Formations : " + source_doc.metadata['ABREGE_LIBELLES'] + " " + source_doc.metadata['INTITULE'] + "\n\nROME : " + source_doc.metadata['CODES_ROME'] + "\nLibellés ROME : " + source_doc.metadata['LIBELLES_ROME'] + "\n\nActivités : " + source_doc.metadata['ACTIVITES_VISEES'].replace('','oe') + "\n\nEmplois accessibles : " + source_doc.metadata['TYPE_EMPLOI_ACCESSIBLES'] + "\n\nCompétences : " + source_doc.metadata['CAPACITES_ATTESTEES'].replace('','oe').replace('','oe'), name=source_name)
	)
	source_names = [text_el.name for countMetadata, text_el in enumerate(text_elements) if countMetadata < 10]
	if source_names:
	metadatas += ', '.join(source_names)
	else:
	metadatas += "\n\nPas de source trouvée!"

	actions = [
	cl.Action(name="download", value="Question : " + message.content + "\n\nRéponse : " + answer, description="download_offre_formation")
	]

	await cl.Message(author="Datapcc 🌐🌐🌐",content=answer).send()
	await cl.Message(author="Datapcc 🌐🌐🌐",content="Download", actions=actions).send()

	if metadatas:
	await cl.Message(author="Datapcc 🌐🌐🌐",content="Sources : " + metadatas, elements=text_elements).send()