from datetime import datetime import matplotlib.pyplot as plt import pandas as pd # pd.set_option('display.max_columns', None) # pd.set_option('display.max_rows', None) df = pd.read_csv(filepath_or_buffer='call_history_bash.csv', sep=";") df["elapsed"] = df["finished"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")) - df["started"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")) df["elapsed"] = df["elapsed"].apply(lambda x: x.total_seconds()) df.to_csv("output.csv", index=False, sep=";") student_numbers = df['active_students'].unique() plt.figure(figsize=(16, 10)) rows = len(student_numbers) for index, student_number in enumerate(student_numbers, 1): data = df[df["active_students"] == student_number] plt.subplot(rows, 2, 2 * index - 1) plt.title("y=seconds, x=active students", x=0.75, y=0.75) plt.boxplot(x=data["elapsed"], labels=[student_number]) plt.subplot(rows, 2, 2 * index) plt.title("y=count of seconds, x=seconds", x=0.75, y=0.75) plt.hist(x=data["elapsed"], bins=25, edgecolor='white') plt.show()