Spaces:

LISA-Kadi
/

LISA-demo

Running

App Files Files Community

Kadi-IAM commited on Nov 14, 2024

Commit

f8bb00c

verified ·

1 Parent(s): 71632fa

Upload evaluation_example.ipynb

Browse files

Files changed (1) hide show

evaluation_example.ipynb +316 -0

evaluation_example.ipynb ADDED Viewed

	@@ -0,0 +1,316 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from tqdm.auto import tqdm\n",
+    "import pandas as pd\n",
+    "import time\n",
+    "\n",
+    "from langchain.document_loaders import PyMuPDFLoader\n",
+    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "\n",
+    "pd.set_option(\"display.max_colwidth\", None)\n",
+    "\n",
+    "# Set ChatMistralAI API KEY\n",
+    "# e.g., export MISTRAL_API_KEY==your_api_key_here\n",
+    "# or save apy key in .env file\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load pdf file\n",
+    "filepath = \"data/documents/Brandt et al_2024_Kadi_info_page.pdf\"\n",
+    "loader_module = PyMuPDFLoader\n",
+    "loader = loader_module(filepath)\n",
+    "document = loader.load()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split docs into chunks\n",
+    "text_splitter = RecursiveCharacterTextSplitter(\n",
+    "    chunk_size=2000,\n",
+    "    chunk_overlap=200,\n",
+    "    add_start_index=True,\n",
+    "    separators=[\"\\n\\n\", \"\\n\", \".\", \" \", \"\"],\n",
+    ")\n",
+    "\n",
+    "docs_processed = []\n",
+    "for doc in document:\n",
+    "    docs_processed += text_splitter.split_documents([doc])\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create LLM, here we use MistralAI\n",
+    "from langchain_mistralai.chat_models import ChatMistralAI\n",
+    "\n",
+    "llm = ChatMistralAI(\n",
+    "    model=\"mistral-large-latest\"\n",
+    ")\n",
+    "\n",
+    "llm.invoke(\"hello\")  # test llm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "QA_generation_prompt = \"\"\"\n",
+    "Your task is to write a factoid question and an answer given a context.\n",
+    "Your factoid question should be answerable with a specific, concise piece of factual information from the context.\n",
+    "Your factoid question should be formulated in the same style as questions users could ask in a search engine. Users are usually scientific researchers in the field of materials science.\n",
+    "This means that your factoid question MUST NOT mention something like \"according to the passage\" or \"context\".\n",
+    "Please ask the specific question instead of the general question, like 'What is the key information in the given paragraph?'.\n",
+    "\n",
+    "Provide your answer as follows:\n",
+    "\n",
+    "Output:::\n",
+    "Factoid question: (your factoid question)\n",
+    "Answer: (your answer to the factoid question)\n",
+    "\n",
+    "Now here is the context.\n",
+    "\n",
+    "Context: {context}\\n\n",
+    "Output:::\"\"\"\n",
+    "\n",
+    "# Or\n",
+    "# Ref: https://mlflow.org/docs/latest/llms/rag/notebooks/question-generation-retrieval-evaluation.html\n",
+    "# QA_generation_prompt = \"\"\"\n",
+    "# Please generate a question asking for the key information in the given paragraph.\n",
+    "# Also answer the questions using the information in the given paragraph.\n",
+    "# Please ask the specific question instead of the general question, like\n",
+    "# 'What is the key information in the given paragraph?'.\n",
+    "# Please generate the answer using as much information as possible.\n",
+    "# If you are unable to answer it, please generate the answer as 'I don't know.'\n",
+    "\n",
+    "# Provide your answer as follows:\n",
+    "\n",
+    "# Output:::\n",
+    "# Factoid question: (your factoid question)\n",
+    "# Answer: (your answer to the factoid question)\n",
+    "\n",
+    "# Now here is the context.\n",
+    "\n",
+    "# Context: {context}\\n\n",
+    "# Output:::\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate QA pairs\n",
+    "\n",
+    "import random\n",
+    "\n",
+    "N_GENERATIONS = 5  # generate only 5 QA couples here for cost and time considerations\n",
+    "\n",
+    "print(f\"Generating {N_GENERATIONS} QA couples...\")\n",
+    "\n",
+    "outputs = []\n",
+    "for sampled_context in tqdm(random.choices(docs_processed, k=N_GENERATIONS)):\n",
+    "    # Generate QA pairs\n",
+    "    output_QA_couple = llm.invoke(QA_generation_prompt.format(context=sampled_context.page_content)).content\n",
+    "    try:\n",
+    "        question = output_QA_couple.split(\"Factoid question: \")[-1].split(\"Answer: \")[0]\n",
+    "        answer = output_QA_couple.split(\"Answer: \")[-1]\n",
+    "        assert len(answer) < 500, \"Answer is too long\"\n",
+    "        outputs.append(\n",
+    "            {\n",
+    "                \"context\": sampled_context.page_content,\n",
+    "                \"question\": question,\n",
+    "                \"answer\": answer,\n",
+    "                \"source_doc\": sampled_context.metadata[\"source\"],\n",
+    "            }\n",
+    "        )\n",
+    "        time.sleep(3)  # sleep for llm rate limitation\n",
+    "    except:\n",
+    "        time.sleep(3)  # sleep for llm rate limitation\n",
+    "        continue"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reference_df = pd.DataFrame(outputs)\n",
+    "display(reference_df.head(1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build a simple rag chain\n",
+    "from langchain_huggingface import HuggingFaceEmbeddings\n",
+    "from langchain.vectorstores import FAISS\n",
+    "\n",
+    "chunk_size=1024\n",
+    "chunk_overlap=256\n",
+    "splitter = RecursiveCharacterTextSplitter(\n",
+    "    separators=[\"\\n\\n\", \"\\n\"], chunk_size=chunk_size, chunk_overlap=chunk_overlap\n",
+    ")\n",
+    "doc_chunks = splitter.split_documents(document)\n",
+    "\n",
+    "embeddings = HuggingFaceEmbeddings(model_name=\"all-mpnet-base-v2\")\n",
+    "\n",
+    "vectorstore = FAISS.from_documents(doc_chunks, embedding=embeddings)\n",
+    "\n",
+    "retriever = vectorstore.as_retriever()\n",
+    "\n",
+    "from langchain.chains import RetrievalQA\n",
+    "\n",
+    "rag_chain = RetrievalQA.from_llm(\n",
+    "        llm=llm, retriever=retriever, return_source_documents=True\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Prepare evaluation data set\n",
+    "def prepare_eval_dataset(reference_df, rag_chain):\n",
+    "    \n",
+    "    print(\"now loading evaluation dataset...\")\n",
+    "    from datasets import Dataset\n",
+    "    # Read reference file\n",
+    "    df = reference_df\n",
+    "\n",
+    "    # Add anwsers from rag_chain\n",
+    "    questions = df[\"question\"].values\n",
+    "    ground_truth = []\n",
+    "    for a in df[\"answer\"].values:\n",
+    "        ground_truth.append(a)  # [a] for older version of ragas\n",
+    "    answers = []\n",
+    "    contexts = []\n",
+    "\n",
+    "    # Get anwswers from rag_chain\n",
+    "    print(\"now getting anwsers from QA llm...\")\n",
+    "    for query in questions:\n",
+    "        results = rag_chain({\"query\": query})\n",
+    "        answers.append(results[\"result\"])\n",
+    "        contexts.append([docs.page_content for docs in results[\"source_documents\"]])\n",
+    "        time.sleep(3)  # sleep for llm rate limitation\n",
+    "\n",
+    "    # To dict\n",
+    "    data = {\n",
+    "        \"question\": questions,\n",
+    "        \"answer\": answers,\n",
+    "        \"contexts\": contexts,\n",
+    "        \"ground_truth\": ground_truth,\n",
+    "    }\n",
+    "\n",
+    "    # Convert dict to dataset\n",
+    "    dataset = Dataset.from_dict(data)\n",
+    "    return dataset\n",
+    "\n",
+    "eval_dataset = prepare_eval_dataset(reference_df, rag_chain)\n",
+    "eval_dataset\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Ragas evaluation\n",
+    "from ragas.llms import LangchainLLMWrapper\n",
+    "eval_llm = LangchainLLMWrapper(llm)\n",
+    "\n",
+    "from ragas import evaluate\n",
+    "from ragas.metrics import (\n",
+    "    faithfulness,\n",
+    "    answer_relevancy,\n",
+    "    context_recall,\n",
+    "    context_precision,\n",
+    "    answer_correctness,\n",
+    ")\n",
+    "result_eval_df = evaluate(\n",
+    "    dataset=eval_dataset,\n",
+    "    metrics=[\n",
+    "        context_precision,\n",
+    "        context_recall,\n",
+    "        faithfulness,\n",
+    "        answer_relevancy,\n",
+    "        answer_correctness,\n",
+    "    ],\n",
+    "    llm=eval_llm, embeddings=embeddings,\n",
+    "    raise_exceptions=False,\n",
+    ")\n",
+    "\n",
+    "result_eval_df = result_eval_df.to_pandas()  # can take a while"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check results\n",
+    "result_eval_df\n",
+    "# if you get NaN in results, check \"Frequently Asked Questions\" in Ragas for help"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}