File size: 5,168 Bytes
cb7c21b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import json
import os
import sys
import pandas as pd
from timeit import default_timer as timer
import nltk
sys.path.insert(0, os.getcwd())
chatting = len(sys.argv) > 1 and sys.argv[1] == "chat"
if chatting:
os.environ["BATCH_SIZE"] = "1"
from app_modules.init import app_init
from app_modules.llm_qa_chain import QAChain
from app_modules.utils import print_llm_response, calc_metrics, detect_repetition_scores
llm_loader, qa_chain = app_init()
if chatting:
print("Starting chat mode")
while True:
question = input("Please enter your question: ")
if question.lower() == "exit":
break
result = qa_chain.call_chain({"question": question, "chat_history": []}, None)
print_llm_response(result)
sys.exit(0)
num_of_questions = 0
if len(sys.argv) > 1:
num_of_questions = int(sys.argv[1])
# Create an empty DataFrame with column names
df = pd.DataFrame(
columns=[
"id",
"question",
"answer",
]
)
batch_size = int(os.getenv("BATCH_SIZE", "1"))
print(f"Batch size: {batch_size}")
questions_file_path = os.environ.get("QUESTIONS_FILE_PATH")
debug_retrieval = os.getenv("DEBUG_RETRIEVAL", "false").lower() == "true"
# Open the file for reading
print(f"Reading questions from file: {questions_file_path}")
test_data = json.loads(open(questions_file_path).read())
if isinstance(test_data, dict):
questions = [test_data[key] for key in test_data.keys()]
ids = [key for key in test_data.keys()]
else:
questions = test_data
ids = [row["id"] for row in questions]
if num_of_questions > 0:
questions = questions[:num_of_questions]
print(f"Number of questions: {len(questions)}")
if __name__ == "__main__":
chat_start = timer()
index = 0
while index < len(questions):
batch_ids = ids[index : index + batch_size]
batch_questions = [q["question"] for q in questions[index : index + batch_size]]
if isinstance(qa_chain, QAChain):
inputs = [{"question": q, "chat_history": []} for q in batch_questions]
else:
inputs = [{"question": q} for q in batch_questions]
start = timer()
result = qa_chain.call_chain(inputs, None)
end = timer()
print(f"Completed in {end - start:.3f}s")
# print("result:", result)
batch_answers = [r["answer"] for r in result]
for id, question, answer in zip(batch_ids, batch_questions, batch_answers):
df.loc[len(df)] = {
"id": id,
"question": question,
"answer": answer,
}
index += batch_size
for r in result:
print_llm_response(r, debug_retrieval)
chat_end = timer()
total_time = chat_end - chat_start
print(f"Total time used: {total_time:.3f} s")
df2 = pd.DataFrame(
columns=[
"id",
"question",
"answer",
"word_count",
"ground_truth",
]
)
for i in range(len(df)):
question = questions[i]
answer = df["answer"][i]
query = df["question"][i]
id = df["id"][i]
ground_truth = question[
"wellFormedAnswers" if "wellFormedAnswers" in question else "answers"
]
word_count = len(nltk.word_tokenize(answer))
df2.loc[len(df2)] = {
"id": id,
"question": query,
"answer": answer,
"word_count": word_count,
"ground_truth": ground_truth,
}
df2[["newline_score", "repetition_score", "total_repetitions"]] = df2[
"answer"
].apply(detect_repetition_scores)
pd.options.display.float_format = "{:.3f}".format
print(df2.describe())
word_count = df2["word_count"].sum()
csv_file = (
os.getenv("TEST_RESULTS_CSV_FILE") or f"qa_batch_{batch_size}_test_results.csv"
)
with open(csv_file, "w") as f:
f.write(
f"# RAG: {isinstance(qa_chain, QAChain)} questions: {questions_file_path}\n"
)
f.write(
f"# model: {llm_loader.model_name} repetition_penalty: {llm_loader.repetition_penalty}\n"
)
df2.to_csv(csv_file, mode="a", index=False, header=True)
print(f"test results saved to file: {csv_file}")
scores = calc_metrics(df2)
df = pd.DataFrame(
{
"model": [llm_loader.model_name],
"repetition_penalty": [llm_loader.repetition_penalty],
"word_count": [word_count],
"inference_time": [total_time],
"inference_speed": [word_count / total_time],
"bleu1": [scores["bleu_scores"]["bleu"]],
"rougeL": [scores["rouge_scores"]["rougeL"]],
}
)
print(f"Number of words generated: {word_count}")
print(f"Average generation speed: {word_count / total_time:.3f} words/s")
csv_file = os.getenv("ALL_RESULTS_CSV_FILE") or "qa_chain_all_results.csv"
file_existed = os.path.exists(csv_file) and os.path.getsize(csv_file) > 0
df.to_csv(csv_file, mode="a", index=False, header=not file_existed)
print(f"all results appended to file: {csv_file}")
|