import matplotlib.pyplot as plt import matplotlib matplotlib.use('agg') import plot_utils from constants import * class MatplotlibDataPlotter: def __init__(self, single_df, pair_df, num_domains_in_region_df): self.single_df = single_df self.pair_df = pair_df self.num_domains_in_region_df = num_domains_in_region_df self.single_domains_fig = plt.figure(figsize=(5, 10)) self.pair_domains_fig = plt.figure(figsize=(5, 10)) def plot_single_domains(self, num_domains, split_name): # selected_region_ids = self.num_domains_in_region_df.loc[ # self.num_domains_in_region_df.num_domains >= num_domains, # 'cds_region_id'].values # single_df_subset = self.single_df.loc[self.single_df.cds_region_id.isin(selected_region_ids)] return self.single_domains_fig # split_name = 'stratified' column_name = f'cosine_similarity_{split_name}' # single_df_subset = single_df.loc[single_df.dom_location_len >= num_domains] selected_keyword_index = single_df_subset.groupby('cds_region_id').agg( {column_name: 'idxmax'} ).values.flatten() targets_list = single_df_subset.loc[selected_keyword_index, 'biosyn_class_index'].values label_list = single_df_subset.loc[selected_keyword_index, 'profile_name'].values top_n=5 bin_width=1 hue_group_offset=0.5 # hue_order=BIOSYN_CLASS_NAMES hue2count={} width=0.9 show_legend=True print(matplotlib.get_backend()) fig = self.single_domains_fig fig.clf() ax = fig.gca() plot_utils.draw_barplots( targets_list, label_list=label_list, top_n=5, bin_width=1, hue_group_offset=0.5, hue_order=BIOSYN_CLASS_NAMES, hue2count={}, width=0.9, ax=ax, show_legend=True ) plt.tight_layout() return fig # plt.gcf() def plot_pair_domains(self, num_domains, split_name): # selected_region_ids = self.num_domains_in_region_df.loc[ # self.num_domains_in_region_df.num_domains >= num_domains, # 'cds_region_id'].values # pair_df_subset = self.pair_df.loc[self.pair_df.cds_region_id.isin(selected_region_ids)] return self.pair_domains_fig # split_name = 'stratified' column_name = f'cosine_similarity_{split_name}' # pair_df_subset = pair_df.loc[pair_df.dom_location_len >= num_domains] selected_keyword_index = pair_df_subset.groupby('cds_region_id').agg( {column_name: 'idxmax'} ).values.flatten() targets_list = pair_df_subset.loc[ selected_keyword_index, 'biosyn_class_index'].values label_list=pair_df_subset.loc[ selected_keyword_index, 'profile_name'].values top_n=5 bin_width=1 hue_group_offset=0.5 # hue_order=BIOSYN_CLASS_NAMES hue2count={} width=0.9 show_legend=True # fig = plt.figure(figsize=(5, 10)) fig = self.pair_domains_fig # fig = plt.gcf() fig.clf() print(matplotlib.get_backend()) ax = fig.gca() plot_utils.draw_barplots( targets_list, label_list=label_list, top_n=5, bin_width=1, hue_group_offset=0.5, hue_order=BIOSYN_CLASS_NAMES, hue2count={}, width=0.9, ax=ax, show_legend=True ) plt.tight_layout() return fig #plt.gcf()