Source code for DigitalCellSorter.VisualizationFunctions

import os
import copy

import numpy as np
import pandas as pd

import scipy.stats
import scipy.signal
import scipy.ndimage
import scipy.cluster.hierarchy
from scipy.interpolate import UnivariateSpline
from scipy.ndimage import gaussian_filter

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
from matplotlib import cm

import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import plot as plot_offline
from plotly.offline import plot_mpl
from adjustText import adjust_text

from .GenericFunctions import read, write

[docs]class VisualizationFunctions: '''Class of visualization functions for DigitalCellSorter''' def __init__(self, dataName = 'dataName', saveDir = os.path.join(''), matplotlibMode = 'Agg', safePlotting = True, verbose = 1): '''Function called automatically''' self.saveDir = saveDir self.dataName = dataName self.matplotlibMode = matplotlibMode self.safePlotting = safePlotting self.verbose = verbose return @property def matplotlibMode(self): return self._matplotlibMode @matplotlibMode.setter def matplotlibMode(self, value): self._matplotlibMode = value if not self.matplotlibMode is None: matplotlib.use(self.matplotlibMode) return def tryExcept(func): def internal(self, *args, **kwargs): if self.safePlotting: try: func(self, *args, **kwargs) except Exception as exception: if self.verbose >= 1: print('Something went wrong while making plot: %s' % (func)) print('\tError message: %s\n' % (exception)) else: func(self, *args, **kwargs) return internal def saveFigure(self, fig, saveDir, label = 'Figure', extension = 'png', dpi = 300, close = True, attemptSavingHTML = False): '''Function used internally to save and close figures Parameters: saveDir: str Path of directories to save the object to label: str, Default 'Figure' Name of the figure to save extension: str, Default '.png' Path of directories to save the object to dpi: int, Default 300 Figure resolution if rasterized close: boolean: Default True Whether to close the figure after saving Returns: None Usage: saveFigure(fig, saveDir, label, extension, dpi) ''' if saveDir != os.path.join('') and not os.path.exists(saveDir): os.makedirs(saveDir) try: if not extension[0] == '.': extension = ''.join(['.', extension]) except Exception as exception: if self.verbose >= 1: print(exception) print('Figure extension/format error') print('Example of acceptable extension: \".png\"') return if extension in ['.png', '.jpeg', '.tiff']: try: fig.savefig(os.path.join(saveDir, label + extension), dpi=dpi) except Exception as exception: if self.verbose >= 1: print(exception) elif extension in ['.svg', '.eps', '.pdf']: try: fig.savefig(os.path.join(saveDir, label + extension)) except Exception as exception: if self.verbose >= 1: print(exception) else: if self.verbose >= 1: print('Unsupported format. Figure not saved') if attemptSavingHTML: try: plot_mpl(fig, filename=os.path.join(saveDir, label + '.html'), auto_open=False) except Exception as exception: if self.verbose >= 1: print('Saving to iteractive HTML did not succeed') if close: try: plt.close(fig) except Exception as exception: if self.verbose >= 1: print(exception) print('Error while closing figure') return # MatPlotLib-powered figures @tryExcept def makeHeatmapGeneExpressionPlot(self, df = None, genes = None, normalize = True, logScale=False, subtract = False, saveExcel = True, nameToAppend = 'heatmap', plotBy = 'cluster', figsize = (8, 4), convertGenes = False, orderGenes=False, orderClusters=False, dpi = 300, extension = 'png', fontsize = 10, labelsFontsize = 10, **kwargs): '''Make heatmap gene expression plot from a provided gene expression matrix. Parameters: df: pandas.DataFrame Gene expression matrix genes: list, Default None List of genes to plot nameToAppend: str, Default '' String to append to fifure file name dpi: int, Default 300 Resolution of the figure image extension: str, Default 'png' Format of the figure file Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makeHeatmapGeneExpressionPlot() ''' if not genes is None: if type(genes) in [list, np.ndarray, tuple]: isNegativeGenes = [True if gene[-1]=='-' else False for gene in genes] genes = [gene[:-1] if gene[-1]=='-' else gene for gene in genes] if convertGenes: genes = self.gnc.Convert(genes, 'alias', 'hugo', returnUnknownString=False) elif type(genes) in [dict]: isNegativeGenes = dict() for key in genes.keys(): isNegativeGenes[key] = [True if gene[-1]=='-' else False for gene in genes[key]] genes[key] = [gene[:-1] if gene[-1]=='-' else gene for gene in genes[key]] if convertGenes: for key in genes.keys(): genes[key] = self.gnc.Convert(genes[key], 'alias', 'hugo', returnUnknownString=False) else: if self.verbose >= 1: print('Plotting all expressed genes not supported. Provide a smaller list of genes') return lengthListGenes = [] labelsListGenes = [] if df is None: if self.df_expr is None: self.loadExpressionData() if self.df_expr is None: return targetIndex = self.df_expr.index if type(genes) in [list, np.ndarray, tuple]: ind = np.isin(genes, targetIndex) common = np.array(genes)[ind] isNegativeGenes = np.array(isNegativeGenes)[ind] elif type(genes) in [dict]: common = [] temp_negative = [] for key in genes.keys(): ind = np.isin(genes[key], targetIndex) temp_common = np.array(genes[key])[ind] isNegativeGenes_common = np.array(isNegativeGenes[key])[ind] if len(temp_common) > 0: common.extend(temp_common) temp_negative.extend(isNegativeGenes_common) lengthListGenes.append(len(temp_common)) labelsListGenes.append(key) isNegativeGenes = np.array(temp_negative) else: return df = self.df_expr.loc[common].copy() else: targetIndex = df.index if type(genes) in [list, np.ndarray, tuple]: ind = np.isin(genes, targetIndex) common = np.array(genes)[ind] isNegativeGenes = np.array(isNegativeGenes)[ind] elif type(genes) in [dict]: common = [] temp_negative = [] for key in genes.keys(): ind = np.isin(genes[key], targetIndex) temp_common = np.array(genes[key])[ind] isNegativeGenes_common = np.array(isNegativeGenes[key])[ind] if len(temp_common) > 0: common.extend(temp_common) temp_negative.extend(isNegativeGenes_common) lengthListGenes.append(len(temp_common)) labelsListGenes.append(key) isNegativeGenes = np.array(temp_negative) else: return df = df.loc[common] counts = df.loc[[df.index[0]]].groupby(axis=1, level=plotBy).count() means = df.mean(axis=1) df = df.groupby(axis=1, level=plotBy).mean() df.columns = df.columns.get_level_values(plotBy) df.columns = list(zip(df.columns.values, counts.values[0])) if normalize: for i in range(df.shape[0]): if subtract: df.iloc[i,:] -= np.min(df.iloc[i,:]) df.iloc[i,:] /= np.max(df.iloc[i,:]) if logScale: df += 1. df = np.log(df) if orderGenes: df = df.iloc[scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(df, 'ward'), no_plot=True, get_leaves=True)['leaves']] if orderClusters: df = df.T.iloc[scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(df.T, 'ward'), no_plot=True, get_leaves=True)['leaves']].T df.insert(0, ('Mean', 'All'), means) if saveExcel: df.T.to_excel(os.path.join(self.saveDir, self.dataName + '_' + nameToAppend + '_genes_by_%s' % (plotBy) + '.xlsx')) fig, ax = plt.subplots(figsize=figsize) ax.imshow(df.T.values[1:,:], cmap='Blues', interpolation='None', aspect='auto', extent=(-0.5, df.shape[0] - 0.5, df.shape[1] - 0.5, +0.5)) data = df.T.values[1:,:].copy() data[:, ~isNegativeGenes] = np.nan data = np.ma.masked_where(np.isnan(data), data) ax.imshow(data, cmap='Reds', interpolation='None', aspect='auto', extent=(-0.5, df.shape[0] - 0.5, df.shape[1] - 0.5, +0.5)) ax.imshow(df.T.values[:1,:], cmap='Reds', interpolation='None', aspect='auto', extent=(-0.5, df.shape[0] - 0.5, -0.5, +0.5)) ax.axhline(y=0.5, c='k', lw=1.5) if len(lengthListGenes) != 0: currPosition = 0 for label, value in zip(labelsListGenes, lengthListGenes): currPosition += value ax.axvline(x=currPosition - 0.5, c='k', lw=1) ax.text(currPosition - 0.5*value - 0.5, df.shape[1], label, fontsize=labelsFontsize, c='k', ha='center', va='top') df_temp = pd.DataFrame(index=df.columns[1:], columns=labelsListGenes) df_temp.index = pd.MultiIndex.from_tuples(df_temp.index).get_level_values(0)[::-1] df_temp.to_excel(os.path.join(self.saveDir, self.dataName + '-' + nameToAppend + '.xlsx')) ax.set_xticks(range(df.shape[0])) ax.set_yticks(range(df.shape[1])) ylabels = ['(' + str(col[1]) + ')%s#' % (' ' if len(col[0]) <= 3 else ' ') + str(col[0]) for col in df.columns] ylabels[0] = 'Mean across all cells' ax.set_xticklabels(df.index, rotation=90, fontsize=fontsize) ax.set_yticklabels(ylabels, rotation=0, fontsize=1.2*fontsize) ax.set_xlim([-0.5, df.shape[0] - 0.5]) ax.set_ylim([-0.5, df.shape[1] - 0.5]) fig.tight_layout() self.saveFigure(fig, self.saveDir, self.dataName + '_' + nameToAppend + '_genes_by_%s' % (plotBy), extension=extension, dpi=dpi, **kwargs) return fig
[docs] @tryExcept def makeMarkerExpressionPlot(self, fontscale = 1., dpi = 300, extension = 'png', **kwargs): '''Produce image on marker genes and their expression on all clusters. Uses files generated by function DCS.Vote Parameters: dpi: int, Default 600 Resolution of the figure image extension: str, Default 'png' Format of the figure file Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makeMarkerExpressionPlot() ''' df_votingResults = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='z-scores') votingResults = dict(zip(df_votingResults['cluster'].values, df_votingResults['Predicted cell type'].values)) predictedCelltypes = dict(zip(df_votingResults['cluster'].values, df_votingResults['Predicted cell type'].str.split(' #', expand=True)[0])) supportingMarkersList = dict(zip(df_votingResults['cluster'].values, df_votingResults['Supporting markers'].str.split(' // '))) allMarkersList = dict(zip(df_votingResults['cluster'].values, df_votingResults['All markers'].str.split(' // '))) df_markers_cluster_centroids = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='Cluster centroids', index_col=0, header=0).T df_markers = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='Marker cell type weight matrix', index_col=0) df_markers_weighted = df_markers.copy() df_markers[df_markers >= 0.] = np.nan df_markers[df_markers < 0.] = -1. # Y_mc.T X_markers_cluster_means_transpose = df_markers_cluster_centroids.values.T df_means = df_markers_cluster_centroids.copy() # Normalization for i in range(X_markers_cluster_means_transpose.shape[1]): X_markers_cluster_means_transpose[:,i] -= np.min(X_markers_cluster_means_transpose[:,i]) X_markers_cluster_means_transpose[:,i] /= np.max(X_markers_cluster_means_transpose[:,i]) ORDER = scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(X_markers_cluster_means_transpose, 'ward'), no_plot=True, get_leaves=True)['leaves'] ORDER2 = scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(X_markers_cluster_means_transpose.T, 'ward'), no_plot=True, get_leaves=True)['leaves'] df_markers_weighted = df_markers_weighted.iloc[:, ORDER2] X_markers_cluster_means_sorted = X_markers_cluster_means_transpose[ORDER,:][:,ORDER2] df_all_marker_hits = pd.DataFrame(data=np.zeros((df_markers_cluster_centroids.shape)), index=df_markers_cluster_centroids.index, columns=df_markers_cluster_centroids.columns) for cluster in allMarkersList: if not allMarkersList[cluster] is np.nan: for gene in allMarkersList[cluster]: df_all_marker_hits.loc[gene, cluster] = 1 df_supp_marker_hits = pd.DataFrame(data=np.zeros((df_markers_cluster_centroids.shape)), index=df_markers_cluster_centroids.index, columns=df_markers_cluster_centroids.columns) for cluster in supportingMarkersList: if not supportingMarkersList[cluster] is np.nan: for gene in supportingMarkersList[cluster]: df_supp_marker_hits.loc[gene, cluster] = 1 df_neg_supp_marker_hits = pd.DataFrame(data=np.zeros((df_markers_cluster_centroids.shape)), index=df_markers_cluster_centroids.index, columns=df_markers_cluster_centroids.columns) for cluster in supportingMarkersList: if not supportingMarkersList[cluster] is np.nan: for gene in allMarkersList[cluster]: #for gene in df_markers.columns: if df_markers.loc[predictedCelltypes[cluster], gene] == -1.: df_neg_supp_marker_hits.loc[gene, cluster] = 1 X_marker_hits = df_all_marker_hits.values.T[ORDER,:][:,ORDER2] X_supp_marker_hits = df_supp_marker_hits.values.T[ORDER,:][:,ORDER2] X_neg_supp_marker_hits = df_neg_supp_marker_hits.values.T[ORDER,:][:,ORDER2] _figsize = np.float_(X_markers_cluster_means_transpose.shape[::-1]) / \ np.max(X_markers_cluster_means_transpose.shape) * 15.0 + 2.0 _figsize[1] *= 1.5 height_ratio = df_markers_cluster_centroids.shape[1] / (1. * df_markers_weighted.shape[0]) gs = matplotlib.gridspec.GridSpec(2, 2, width_ratios=[20,1], height_ratios=[height_ratio,1], left=0.13, right=0.99, top=0.99, bottom=0.25, wspace=0.01, hspace=0.04) fig = plt.figure(figsize=_figsize) if True: ax = plt.subplot(gs[0]) cell_counts = df_votingResults['# cells in cluster'].values.copy()[ORDER] means = (df_means.iloc[ORDER2,ORDER] * cell_counts / cell_counts.sum()).sum(axis=1) ax.imshow(means.values[None, :], cmap='Reds', interpolation='None', aspect='auto', extent=(-0.5, means.shape[0] - 0.5, -0.5, +0.5)) ax.imshow(X_markers_cluster_means_sorted,cmap='Blues', interpolation='None', aspect='auto', extent=(-0.5, X_markers_cluster_means_sorted.shape[1] - 0.5, X_markers_cluster_means_sorted.shape[0] - 0.5 + 1.0, +0.5)) i_list,j_list = np.where(X_marker_hits.T > 0) color = 'w' #(1., 1., 0.7) ax.plot(i_list, j_list + 1., 'k*', mec=color, mew=0.5, markersize=4) i_list_supp, j_list_supp = np.where(X_supp_marker_hits.T > 0) i_list_neg_supp, j_list_neg_supp = np.where(X_neg_supp_marker_hits.T > 0) ax.plot(i_list_supp, j_list_supp + 1., 'k*', mec='lime', mew=0.7, markersize=4) #mec='k', alpha=0.5, markersize=6 ax.plot(i_list_neg_supp, j_list_neg_supp + 1., 'k*', mec='red', mew=0.7, markersize=4) #mec='k', alpha=0.5, markersize=6 ax.set_xticks([]) ax.set_yticks(range(X_markers_cluster_means_transpose.shape[0] + 1)) clusterNames = list(votingResults.values()) clusterIndices = list(votingResults.keys()) ax.set_yticklabels(['Mean'] + [str(clusterNames[i]) + ' (' + str(clusterIndices[i]) + ')' for i in ORDER], rotation=0, fontsize=6*fontscale) ax.set_xlim([-0.5,X_markers_cluster_means_transpose.shape[1] - 0.5]) ax.set_ylim([-0.5,X_markers_cluster_means_transpose.shape[0] + 1 - 0.5]) if True: ax2 = plt.subplot(gs[1]) fontsize = 5*fontscale cells_in_clusters = df_votingResults['# cells in cluster'].values.copy()[ORDER] numberOfCells = cells_in_clusters.sum() with open(os.path.join(self.saveDir, 'ColormapForCellTypes.txt'), 'r') as temp_file: colormap = {item.strip().split('\t')[0]:eval(item.strip().split('\t')[1]) for item in temp_file.readlines()} celltypes = df_votingResults['Predicted cell type'].str.split(' #', expand=True)[0].values.copy()[ORDER] ax2.barh(y=range(len(cells_in_clusters)), width=cells_in_clusters, height=0.8, align='center', color=[colormap[i] for i in celltypes]) for i in range(len(cells_in_clusters)): ax2.text(np.max(cells_in_clusters), i, cells_in_clusters[i], ha='right',va='top', color='k', weight='bold', fontsize=fontsize) ax2.text(0.02 * numberOfCells, i, str(round(100 * cells_in_clusters[i] / numberOfCells, 1)) + '%', ha='left',va='bottom', color='b', fontsize=fontsize) ax2.set_xticklabels(cells_in_clusters, fontsize=fontsize) ax2.set_yticklabels(cells_in_clusters, alpha=0) ax2.set_xticklabels(cells_in_clusters, alpha=0) ax2.set_xticks([]) ax2.set_yticks([]) ax2.set_xlabel('Number of\ncells in clusters', fontsize=fontsize) ax2.set_ylim(-1.5, len(cells_in_clusters) - 0.5) if True: ax3 = plt.subplot(gs[2]) masked = np.ma.array(df_markers_weighted.values, mask=(df_markers_weighted.values == 0.)) cmap = plt.cm.PiYG #cmap = matplotlib.colors.LinearSegmentedColormap.from_list('RedGreen', [(1, 0, 0), (0, #1, 0)], N=100) cmap.set_bad('white') value = 0.5 * np.abs(df_markers_weighted).max().max() ax3.imshow(masked, cmap=cmap, vmin=-value, vmax=+value, interpolation='None', aspect='auto') ax3.set_xticks(range(X_markers_cluster_means_transpose.shape[1])) ax3.set_yticks(range(df_markers_weighted.shape[0])) xtickslabels = np.array(df_markers_cluster_centroids.index[ORDER2]) for i in range(0,len(xtickslabels),2): xtickslabels[i] += " ─────────" ax3.set_xticklabels(xtickslabels, rotation=90, fontsize=5*fontscale) ax3.set_yticklabels(df_markers_weighted.index.values, rotation=0, fontsize=8*fontscale) ax3.set_xlim([-0.5, df_markers_weighted.shape[1] - 0.5]) ax3.set_ylim([-0.5, df_markers_weighted.shape[0] - 0.5]) self.saveFigure(fig, self.saveDir, self.dataName + '_marker_expression', extension=extension, dpi=dpi, **kwargs) return fig
[docs] @tryExcept def internalMakeMarkerSubplots(self, df, X_projection, hugo_cd_dict, NoFrameOnFigures = False, HideClusterLabels = False, outlineClusters = True, analyzeBy = 'cluster', saveSubDir = 'marker_subplots', dpi = 300, extension = 'png', **kwargs): '''Produce subplots on each marker and its expression on all clusters Parameters: df: pandas.DataFrame Data with marker genes expression X_projection: 2d numpy.array 2D coordinates for each cell hugo_cd_dict: dictionary With aliases for hugo names of genes NoFrameOnFigures: boolean, Default False Whether to include frame on the figure HideClusterLabels: boolean, Default False Whether to print cluster labels on the figure outlineClusters: boolean, Default True Whether to outline the clusters with circles analyzeBy: str, Default 'cluster' What level of lablels to include. Other possible option is 'label' Returns: None Usage: Function used internally DCS = DigitalCellSorter.DigitalCellSorter() DCS.internalMakeMarkerSubplots(df_markers_expr, projection, hugo_cd_dict) ''' def MarkerSubplot(counter, marker, df, analyzeBy, X_projection, cellClusterIndexLabel, hugo_cd_dict, dataName, saveDir, NoFrameOnFigures, HideClusterLabels, XLIM, YLIM, directory, circles): fig,ax = plt.subplots(figsize=(8,8)) ax.cla() suffix = '(' + str(hugo_cd_dict[marker]).replace('\"', '').replace('\'', '').replace('(', '').replace(')', '').replace(' ','') + ')' ax.plot(np.nan,np.nan,'*',markersize=15,c=cm.seismic(1.0),label=marker + '\n' + suffix.replace(',','\n')) circleIndices = np.where(df.loc[marker].values == 0)[0] # cells that don't have this marker starIndices = np.where(df.loc[marker].values > 0)[0] # cells that have this marker starIndices = starIndices[np.argsort(df.loc[marker].values[starIndices])] args1 = [X_projection[0,circleIndices], X_projection[1,circleIndices]] kwargs1 = {'marker':'o', 'c':'b', 'alpha':0.1, 's':6 * 3, 'linewidth':0,} args2 = [X_projection[0,starIndices], X_projection[1,starIndices]] kwargs2 = {'marker':'*', 'c':cm.seismic(df.loc[marker].values[starIndices] / np.max(df.loc[marker].values[starIndices])), 's':30 * 4, 'linewidth':0.0,} ax.scatter(*args1,**kwargs1) ax.scatter(*args2,**kwargs2) for label in set(cellClusterIndexLabel): # cells with this label X_projection2_cluster = X_projection[:,cellClusterIndexLabel == label] x_mean = np.mean(X_projection2_cluster[0,:]) y_mean = np.mean(X_projection2_cluster[1,:]) _text_label = label if not HideClusterLabels else '' ax.text(x_mean,y_mean, _text_label. replace('-','-\n').replace(' ','\n'). replace('T\n','T ').replace('B\n','B '). replace('\n#',' #').replace('/','/\n'). replace('NK\n','NK ').replace('Stem\n','Stem '), fontsize=10, ha='center',va='center',#alpha=0.75, ).set_path_effects([path_effects.Stroke(linewidth=3, foreground='white'),path_effects.Normal()]) if circles: radius = np.sqrt(X_projection2_cluster.shape[1]) * 300.0 ax.scatter(x_mean,y_mean,s=radius * 1,facecolors='none',edgecolors='k') ax.set_xlim(XLIM) ax.set_ylim(YLIM) ax.legend(loc='upper right', frameon=False, fontsize=14) #loc='best',numpoints=1,fontsize=12 ax.set_xticks([]) ax.set_yticks([]) if NoFrameOnFigures: #fig.patch.set_visible(False) ax.axis('off') fig.tight_layout() if self.saveDir is not None: self.saveFigure(fig, directory, '%s_%s_%s_%s' % (self.dataName,marker,suffix.replace(',','_').replace('/','_'),analyzeBy), extension=extension, dpi=dpi, **kwargs) if self.verbose >= 2: print(marker, end=" ", flush=True) return maxs = np.max(X_projection,axis=1) mins = np.min(X_projection,axis=1) maxDiffs = maxs - mins deltas = maxDiffs * 0.05 XLIM = [mins[0] - deltas[0],maxs[0] + deltas[0]] YLIM = [mins[1] - deltas[1],maxs[1] + deltas[1]] if len(df.index) > 1: if self.verbose >= 2: print('\nSaving marker expression plots:\n') else: if self.verbose >= 2: print('Saving expression plot of:', end=' ', flush=True) if analyzeBy == 'celltype': try: index = df.columns.get_level_values('label').str.split(' #', expand=True).get_level_values(0).values except: index = df.columns.get_level_values('celltype').values else: index = df.columns.get_level_values(analyzeBy).values for counter,marker in enumerate(df.index.values): MarkerSubplot(counter, marker, pd.DataFrame(data=np.reshape(np.array(df.loc[marker]), (1,len(df.loc[marker]))), columns=df.columns, index=[marker]), analyzeBy, X_projection, index, hugo_cd_dict, self.dataName, self.saveDir, NoFrameOnFigures, HideClusterLabels, XLIM, YLIM, os.path.join(self.saveDir, saveSubDir, ''), outlineClusters) if self.verbose >= 1: print() return
[docs] @tryExcept def makeAnnotationResultsMatrixPlot(self, dpi = 300, extension = 'png', **kwargs): '''Produce voting results voting matrix plot Parameters: dpi: int, Default 600 Resolution of the figure image extension: str, Default 'png' Format of the figure file Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makeAnnotationResultsMatrixPlot() ''' df_votingResults = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='z-scores') cellTypes = sorted([x for x in df_votingResults.columns.values.tolist() if x not in ['cluster', 'Predicted cell type', '# cells in cluster', 'Winning score', 'Supporting markers', 'Contradicting markers', 'All markers']]) #df_votingResults['order'] = #scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(df_votingResults[cellTypes].values, #method='ward', metric='euclidean', optimal_ordering=True), # no_plot=True,get_leaves=True)['leaves'] df_votingResults['order'] = np.argsort(np.argsort(df_votingResults['Predicted cell type'])) df_votingResults = df_votingResults.sort_values(by='order', axis=0, ascending=False) numberOfCells = np.sum(df_votingResults['# cells in cluster']) num_of_cell_types = len(cellTypes) num_of_clusters = np.unique(df_votingResults['cluster']).shape[0] indicis_of_clusters = df_votingResults['cluster'] assigned_names_of_clusters = df_votingResults['Predicted cell type'] label_max = 0.5 * max([len(assigned_names_of_clusters[i]) for i in range(len(assigned_names_of_clusters))]) _figsize = np.float_((num_of_cell_types,num_of_clusters)) / np.max(num_of_clusters) * 15.0 _figsize[0] += 1.0 + label_max _figsize[1] += 2.0 gs = matplotlib.gridspec.GridSpec(1, 2, width_ratios=[3,1]) fig = plt.figure(figsize=_figsize) ax = fig.add_axes([0.15, 0.125, 0.65, 0.85]) axx = fig.add_axes([0.76, 0.125, 0.19, 0.85]) zscores = df_votingResults[cellTypes].values ax.imshow(zscores, aspect=1, cmap='Greens', vmin=0, vmax=0.5, interpolation='None') for i in range(num_of_clusters): for j in range(num_of_cell_types): if np.round(zscores[i,j],1) > 0: if zscores[i,j] == np.max(zscores[i,:]): ax.text(j,i,np.round(zscores[i,j],1), color='w', fontsize=125 * 4 / max([num_of_cell_types, num_of_clusters]),ha='center',va='center').set_path_effects([path_effects.Stroke(linewidth=4, foreground='red'),path_effects.Normal()]) else: ax.text(j,i,np.round(zscores[i,j],1), color='w', fontsize=125 * 3 / max([num_of_cell_types, num_of_clusters]),ha='center',va='center').set_path_effects([path_effects.Stroke(linewidth=2, foreground='black'),path_effects.Normal()]) ax.set_xticks(range(num_of_cell_types)) ax.set_yticks(range(num_of_clusters)) ytickslabels = copy.deepcopy(assigned_names_of_clusters) for i in range(len(ytickslabels)): ytickslabels[i] = str(assigned_names_of_clusters[i]) + ' (' + str(indicis_of_clusters[i]) + ')' xtickslabels = np.array(copy.deepcopy(cellTypes)) #for i in range(len(xtickslabels)): # if i % 3 == 1: xtickslabels[i] = '\n' + xtickslabels[i] # if i % 3 == 2: xtickslabels[i] = '\n\n' + xtickslabels[i] ax.set_xticklabels(xtickslabels, rotation=30, fontsize=20, ha='right') ax.set_yticklabels(ytickslabels, fontsize=20, rotation=0) ax.set_xlim([-0.5,num_of_cell_types - 0.5]) ax.set_ylim([-0.5,num_of_clusters - 0.5]) cells_in_clusters = df_votingResults['# cells in cluster'].values with open(os.path.join(self.saveDir, 'ColormapForCellTypes.txt'), 'r') as temp_file: colormap = {item.strip().split('\t')[0]:eval(item.strip().split('\t')[1]) for item in temp_file.readlines()} celltypes = df_votingResults['Predicted cell type'].str.split(' #', expand=True)[0].values.copy() axx.barh(y=range(len(cells_in_clusters)), width=cells_in_clusters, height=0.8, align='center', color=[colormap[i] for i in celltypes]) for i in range(len(cells_in_clusters)): axx.text(np.max(cells_in_clusters), i, cells_in_clusters[i], ha='right',va='top', color='k', weight='bold', fontsize = 20) axx.text(0.02 * numberOfCells, i, str(round(100 * cells_in_clusters[i] / numberOfCells, 1)) + '%', ha='left',va='bottom', color='b', fontsize = 20) axx.set_xticklabels(cells_in_clusters, fontsize=20) axx.set_yticklabels(cells_in_clusters, alpha=0) axx.set_xticklabels(cells_in_clusters, alpha=0) axx.set_xlabel('Number of\ncells in clusters', fontsize=20) axx.set_ylim(-0.5, num_of_clusters - 0.5) self.saveFigure(fig, self.saveDir, self.dataName + '_scores_matrix', extension=extension, dpi=dpi, **kwargs) return fig
[docs] @tryExcept def makeHistogramNullDistributionPlot(self, dpi = 600, extension = 'png', **kwargs): '''Produce histogram plot of the voting null distributions Parameters: dpi: int, Default 600 Resolution of the figure image extension: str, Default 'png' Format of the figure file Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makeHistogramNullDistributionPlot() ''' try: df_noise_dict = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='Null distributions', index_col=0, header=[0,1], skiprows=[2]) except Exception as exception: if self.verbose >= 2: print(exception) print('Error loading distributions from the results file') return if len(df_noise_dict) == 0: if self.verbose >= 1: print('Null distribution is empty in the results file') return df_votingResultsV = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='Voting scores', dtype={'cluster':str}).reset_index().set_index('cluster') df_votingResultsZ = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='z-scores', dtype={'cluster':str}).reset_index().set_index('cluster') predicted_cell_type_cluster = df_votingResultsZ['Predicted cell type'].values predicted_cell_type = df_votingResultsZ['Predicted cell type'].str.split(' #', expand=True)[0].values cellTypes = sorted([x for x in df_votingResultsV.columns.values.tolist() if x not in ['cluster', 'Predicted cell type', '# cells in cluster', 'Winning score', 'Supporting markers', 'All markers']]) df_votingResultsV = df_votingResultsV[cellTypes] df_votingResultsZ = df_votingResultsZ[cellTypes] cell_types = np.unique(df_noise_dict.columns.get_level_values(0).values) num_of_cell_types = cell_types.shape[0] clusters = np.unique(df_noise_dict.columns.get_level_values(1).values).astype(str) num_of_clusters = clusters.shape[0] maxy = np.round(np.nanmax(df_noise_dict.values), 2) with open(os.path.join(self.saveDir, 'ColormapForCellTypes.txt'), 'r') as temp_file: colormap = {item.strip().split('\t')[0]:eval(item.strip().split('\t')[1]) for item in temp_file.readlines()} origWidth = matplotlib.rcParams['axes.linewidth'] matplotlib.rcParams['axes.linewidth'] = 0.1 gs = matplotlib.gridspec.GridSpec(num_of_clusters, num_of_cell_types, hspace=0.45, wspace=0.1, bottom=0.04, top=0.96, left=0.05, right=0.99) fig = plt.figure(figsize=(num_of_cell_types, num_of_clusters * 0.4)) for i in range(num_of_cell_types): try: minx, maxx = np.round(df_noise_dict.index.values[np.where(df_noise_dict.xs(key=cell_types[i], level=0, axis=1).values.sum(axis=1) != 0.)[0][[0,-1]]], 3) #maxx += 0.1 maxx = np.round(maxx, 3) except Exception as exception: print(exception) minx = np.round(df_noise_dict.index.values[0], 3) maxx = np.round(df_noise_dict.index.values[-1], 3) if self.verbose >= 2: print(cell_types[i], end=':\t') for j in range(num_of_clusters): if self.verbose >= 2: print(j, end=',', flush=True) fontsize = 3 ax = plt.subplot(gs[i + num_of_cell_types * j]) ax.bar(df_noise_dict.index.values, df_noise_dict[(cell_types[i], clusters[j])].values, width=df_noise_dict.index.values[2] - df_noise_dict.index.values[1], align='center', color=colormap[cell_types[i]]) valueV = df_votingResultsV.loc[clusters[j], cell_types[i]] valueZ = df_votingResultsZ.loc[clusters[j], cell_types[i]] ax.axvline(x=valueV, ymin=0, ymax=1, color='k', lw=0.2) color = 'k' if predicted_cell_type[j] != cell_types[i] else 'r' xloc = 0.02 * minx + minx #valueV ax.text(xloc, maxy - 0.02 * maxy, r'$V_{%s,%s}=$' % (i,j) + str(np.round(valueV,2)), fontsize=fontsize, va='top', ha='left', color=color, zorder=np.inf) ax.text(xloc, maxy - 0.2 * maxy, r'$\Lambda_{%s,%s}=$' % (i,j) + str(np.round(valueZ,2)), fontsize=fontsize, va='top', ha='left', color=color, zorder=np.inf) if j == 0: ax.set_title(cell_types[i], fontdict={'color': 'b', 'size':'6'}) if i == 0: ax.text(0. * maxx, maxy + 0.05 * maxy, predicted_cell_type_cluster[j] + ' (Cluster %s)' % clusters[j], rotation=0, fontsize=fontsize, weight='bold', va='bottom', ha='left', color='k') ax.set_ylabel('Probability', fontsize=fontsize) ax.set_yticklabels([0., maxy], fontsize=fontsize) else: ax.set_yticklabels([], fontsize=fontsize) if j == num_of_clusters - 1: #ax.set_xlabel('Voting score', fontsize=fontsize) ax.set_xticklabels([minx, maxx], fontsize=fontsize) if maxx > 0.0 and minx < 0.0: ax.text(0.0, -0.2 * maxy, '0.0', fontsize=fontsize, va='top', ha='center', color='k') else: ax.set_xticklabels([], fontsize=fontsize) ax.set_xticks([minx, maxx]) ax.set_yticks([0., maxy]) ax.set_xlim(minx, maxx) ax.set_ylim(0., maxy) ax.tick_params(direction='in', length=1, width=0.1, colors='k') if self.verbose >= 1: print() self.saveFigure(fig, self.saveDir, self.dataName + '_null_distributions', extension=extension, dpi=dpi, **kwargs) matplotlib.rcParams['axes.linewidth'] = origWidth return fig
[docs] @tryExcept def makeProjectionPlot(self, Xprojection, cellClusterIndexLabel, suffix = '', colormap = cm.jet, legend = True, labels = True, colorbar = False, fontsize = 10, plotNaNs = True, rightShift = 0.3, dpi = 300, extension = 'png', **kwargs): '''Produce projection plot (2D layout) with a specified coloring scheme Parameters: Xprojection: 2D coordinates for each cell cellClusterIndexLabel: cluster index for each cell suffix: str Text label to append to the figure name colormap: cell coloring sequence, can be a dictionary or cm.colormap, Default matplotlib.colors.LinearSegmentedColormap.jet legend: boolean, Default True Whether to print legend labels: boolean, Default True Whether to print labels colorbar: boolean, Default False Whether to show colorbar Use with non-numerical values will raise an error fontsize: int, Default 10 Labels and legend font size plotNaNs: boolean, Default True Whether to plot NaN labels (in grey) rightShift: float, Default 0.3 Fraction of space to leave on the right-hand side of the plot. This parameter is useful for adjusting legend overlap with data points. dpi: int, Default 600 Resolution of the figure image extension: str, Default 'png' Format of the figure file Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makeProjectionPlot(projection, cellClusterIndexLabel, suffix) ''' def add_colorbar(fig, labels, cmap = matplotlib.colors.LinearSegmentedColormap.from_list('GR', [(0, 1, 0), (1, 0, 0)], N=100), fontsize = 10): mapp = cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=np.min(labels), vmax=np.max(labels)), cmap=cmap) mapp.set_array(labels) sp = np.linspace(np.max(labels), np.min(labels), num=6, endpoint=True) axisColor = fig.add_axes([0.9,0.5,0.01,0.4]) fig.colorbar(mapp, cax=axisColor, ticks=sp) axisColor.tick_params(labelsize=fontsize) axisColor.set_yticklabels(np.round(sp,2)) return None fig = plt.figure(figsize=(8,8)) ax = fig.add_axes([0.05,0.05,0.9,0.9]) maxs, mins = np.max(Xprojection,axis=1), np.min(Xprojection,axis=1) missing = np.where(cellClusterIndexLabel != cellClusterIndexLabel)[0] if len(missing) > 0: ax.plot(Xprojection[0, missing], Xprojection[1, missing], 'o', color='grey', mew=0.5, alpha=0.2, markeredgecolor='k', label='NaN') nonMissing = np.where(cellClusterIndexLabel == cellClusterIndexLabel)[0] cellClusterIndexLabel = np.array(cellClusterIndexLabel)[nonMissing] Xprojection = Xprojection[:, nonMissing] possible_cluster_labels = np.sort(np.unique(cellClusterIndexLabel)) if labels: if self.verbose >= 3: print(possible_cluster_labels) texts = [] for ilabel, label in enumerate(possible_cluster_labels): if type(colormap) is matplotlib.colors.LinearSegmentedColormap: color = colormap(ilabel / len(possible_cluster_labels)) elif type(colormap) is str: color = plt.get_cmap(colormap)(ilabel / len(possible_cluster_labels)) else: color = colormap[label.split(' #')[0]] XprojectionC = Xprojection[:,cellClusterIndexLabel == label] ax.plot(XprojectionC[0,:], XprojectionC[1,:], 'o', color=color, mew=0.5, alpha=0.3, markeredgecolor='k', label=label) if labels: text = ax.text(np.median(XprojectionC[0,:]), np.median(XprojectionC[1,:]), label, fontsize=fontsize, ha='center',va='center') text.set_path_effects([path_effects.Stroke(linewidth=3, foreground='white'), path_effects.Normal()]) texts.append(text) adjust_text(texts, # arrowprops=dict(arrowstyle='-', color='k', lw=0.3, alpha=0.75), expand_text=(0.9, 0.9), expand_points=(0.91, 0.9), force_text=(0.01, 0.01), force_points=(0.01, 0.01)) ax.set_xticks([]) ax.set_yticks([]) ax.set_xlim([mins[0] - (maxs[0] - mins[0]) * 0.05, (1 + rightShift) * (maxs[0] + (maxs[0] - mins[0]) * 0.05)]) ax.set_ylim([mins[1] - (maxs[1] - mins[1]) * 0.05, maxs[1] + (maxs[1] - mins[1]) * 0.05]) if legend: plt.legend(loc='lower right', frameon=False, fontsize=fontsize) #fig.patch.set_visible(False) ax.axis('off') if colorbar: add_colorbar(fig, possible_cluster_labels, cmap=colormap, fontsize=fontsize) self.saveFigure(fig, self.saveDir, '%s_clusters_%s' % (self.dataName, suffix), extension=extension, dpi=dpi, **kwargs) return fig
[docs] @tryExcept def makeStackedBarplot(self, clusterName = None, legendStyle = False, includeLowQC = True, fontsize = 12, dpi = 300, extension = 'png', **kwargs): '''Produce stacked barplot with cell fractions Parameters: clusterName: str, Deafult None Label to include at the bar bottom. If None the self.dataName value will be used legendStyle: boolean, Default False Use one out of two styles of this figure includeLowQC: boolean, Default True Wether to include low quality cells dpi: int, Default 600 Resolution of the figure image extension: str, Default 'png' Format of the figure file Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makeStackedBarplot(clusterName) ''' def get_stacked_data_and_colors(saveDir): with open(os.path.join(saveDir, 'ColormapForCellTypes.txt'), 'r') as temp_file: colors = temp_file.readlines() colors = np.vstack([(color.strip('\n').split('\t')) for color in colors]) colors = pd.DataFrame(colors.T[1], index=colors.T[0]).apply(lambda x: tuple(np.float_(x[0][1:][:-1].split(','))), axis=1) df = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='z-scores') index = df['Predicted cell type'] if not clusterName is None: barName = self.dataName # + ': ' + clusterName else: barName = self.dataName index = [index[i][:len(index[i]) if index[i].find('#') - 1 == -2 else index[i].find('#') - 1].strip('*').strip('#').strip(' ') for i in range(len(index))] df_BM_temp = pd.DataFrame(data=df['# cells in cluster'].values, index=index, columns=[barName]) df_BM_temp = df_BM_temp.groupby(level=0, axis=0, sort=False).sum() df_main = pd.DataFrame(data=np.zeros((len(colors),1)), index=colors.index, columns=[barName]) for i, item in enumerate(df_BM_temp.index): df_main.loc[item,barName] += df_BM_temp.iloc[i][barName] if df_BM_temp.index[i] == item else 0 s = 'sums' df_main[s] = np.array(np.sum(df_main, axis=1)) df_main.loc[self.nameForUnknown, s] = 0 if includeLowQC: try: cells_all = pd.read_hdf(self.fileHDFpath, key='df_projection_pre_QC', mode='r').columns.get_level_values('cell') cells_high = pd.read_hdf(self.fileHDFpath, key='df_projection', mode='r').columns.get_level_values('cell') cells_low_count = len(cells_all.difference(cells_high)) del cells_all, cells_high df_main.loc[self.nameForLowQC, df_main.columns[0]] = cells_low_count df_main.loc[self.nameForLowQC, df_main.columns[1]] = -1 colors[self.nameForLowQC] = (0.6, 0.6, 0.6, 1.) except Exception as exception: if self.verbose >= 1: print(exception) print('QC data not found') df_main = df_main.apply(lambda x: 100. * x / np.sum(df_main, axis=0), axis=1).loc[np.sum(df_main, axis=1) > 0].sort_values(by=[s]).drop(columns=[s]) return df_main, colors, clusterName if clusterName is None: clusterName = self.dataName df_Main, colors, clusterName = get_stacked_data_and_colors(self.saveDir) if legendStyle: fig,ax = plt.subplots(figsize=(4.5,8)) #4.15 else: fig = plt.figure(figsize=(4.5,8)) ax = fig.add_axes([0.2, 0.05, 0.1, 0.9]) barWidth = 1.0 cellTypes = df_Main.index bottom = np.zeros((len(df_Main.columns))) centers = [] fractions = [] for i in range(len(cellTypes)): bottom += df_Main.loc[cellTypes[i - 1]].values if i > 0 else 0 ax.bar(range(len(df_Main.columns)), list(df_Main.loc[cellTypes[i]]), bottom=list(bottom), color=colors.loc[cellTypes[i]], edgecolor='white', width=barWidth, label=cellTypes[i]) centers.append(bottom + 0.5 * df_Main.loc[cellTypes[i]].values[0]) fractions.append(df_Main.loc[cellTypes[i]].values[0]) plt.xticks(range(len(df_Main.columns)), list(df_Main.columns), fontsize=12) plt.yticks([0,20,40,60,80,100], ['0','20%','40%','60%','80%','100%'], fontsize=12) handles, labels = ax.get_legend_handles_labels() ms = np.max([len(item) for item in labels]) - len('cell') labels = [item.replace(' ','\n').replace('\nCD4', ' CD4').replace('CD4\n', 'CD4 ').replace('\ncell', ' cell').replace('B\n', 'B ').replace('T\n', 'T ') if len(item) >= ms else item for item in labels[::-1]] if legendStyle: ax.legend(handles[::-1], labels, loc='upper left', bbox_to_anchor=(1,1), ncol=1, frameon=False, fontsize=14, labelspacing=1, title = ''.join([' ' for _ in range(60)])) else: fractions = np.round(np.array(fractions)[::-1], 1) centers = np.round(np.array(centers).T[0][::-1], 0) centers_orig = centers.copy() step = 5. for i in range(len(centers) - 2,0,-1): if (centers[i] - centers[i + 1]) < step: centers[i] = centers[i + 1] + step for i in range(len(centers)): ax.text(1.3, centers[i], '%s%% ' % (fractions[i]) + labels[i], fontsize=fontsize, va='center', ha='left') ax.plot([0.65, 1.2], [centers_orig[i], centers[i]], c='k', lw=0.75, clip_on=False) plt.xlim((-0.5, len(df_Main.columns) - 0.5)) plt.ylim((0, 100)) for spine in plt.gca().spines.values(): spine.set_visible(False) if legendStyle: fig.tight_layout() saveName = "%s_subclustering_stacked_barplot_%s" % (self.dataName, ('All cell clusters' if clusterName == None else clusterName).replace(' ', '_').replace('*', '')) self.saveFigure(fig, self.saveDir, saveName, extension=extension, dpi=dpi, **kwargs) if self.verbose >= 2: print('Saved stacked bar plot: %s' % ('All cell clusters' if clusterName == None else clusterName)) return fig
[docs] @tryExcept def makeQualityControlHistogramPlot(self, subset, cutoff, plotPathAndName = None, N_bins = 100, mito = False, displayMeasures = True, precision = 4, quantilePlotCutoff = 0.95, dpi = 300, extension = 'png', fontScale = 1.5, includeTitle = False, **kwargs): '''Function to calculate QC quality cutoff and visualize it on a histogram Parameters: subset: pandas.Series Data to analyze cutoff: float Cutoff to display plotPathAndName: str, Default None Text to include in the figure title and file name N_bins: int, Default 100 Number of bins of the histogram mito: boolean, Default False Whether the analysis of mitochondrial genes fraction displayMeasures: boolean, Default True Print vertical dashed lines along with mean, median, and standard deviation precision: int, Default 4 Number of digits after decimal quantilePlotCutoff: float, Default 0.99 Distributions are cut to display the range from 0 to quantilePlotCutoff dpi: int, Default 600 Resolution of the figure image extension: str, Default 'png' Format of the figure file fontScale: float, Default 1.5 Scale most of the figure fonts includeTitle: boolean, Default False Whether to include title on the figure Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() cutoff = DCS.makeQualityControlHistogramPlot(subset, cutoff) ''' if plotPathAndName is None: plotPathAndName = 'QC_Plot' range_min = 0. #np.min(subset) if mito: range_max = max(1.1 * cutoff, np.quantile(subset, quantilePlotCutoff) + 0.05) else: range_max = np.quantile(subset, quantilePlotCutoff) hist_of_subset = scipy.stats.rv_histogram(np.histogram(subset, bins=N_bins, range=(range_min, range_max))) hist_data = hist_of_subset._hpdf / N_bins hist_bins = hist_of_subset._hbins fig, ax = plt.subplots(figsize=(8,8)) bar_bin_width = range_max / N_bins ax.bar(hist_bins, hist_data[:-1], width=0.9 * bar_bin_width, color='b', align='center') try: title = os.path.basename(plotPathAndName) except Exception as exception: if self.verbose >= 1: print(exception) title = plotPathAndName if includeTitle: ax.set_title(title, fontdict={'color': 'b'}) ax.set_xlabel('Fraction' if mito else 'Count', fontsize=10 * fontScale) ax.set_ylabel('Density', fontsize=10 * fontScale) ax.set_ylim(0.,ax.get_ylim()[1]) ax.set_xlim(range_min - 0.5 * bar_bin_width, range_max + 0.5 * bar_bin_width) ax.tick_params(labelsize=8 * fontScale) xs = np.linspace(hist_bins[0], hist_bins[-1], 1000) spline_data = np.vstack((xs, UnivariateSpline(hist_bins, hist_data[:-1], k=5, s=0)(xs))).T sg = scipy.signal.savgol_filter(spline_data.T[1], 101, 3) ax.plot(spline_data.T[0], sg, 'r', lw=3, alpha=0.95) try: x, y = cutoff, sg[np.where(spline_data.T[0] >= cutoff)[0][0]] except Exception as exception: if self.verbose >= 1: print(exception) x, y = cutoff, 0. ax.plot([x,x], [0,y], 'k', lw=2) ax.plot(x, y, 'ko', ms=10, alpha=0.8) ax.plot(x, y, 'ro', ms=7) ax.text(x, -0.04 * spline_data.T[1].max(), str(np.round(cutoff, precision)), fontsize=8 * fontScale, va='top', ha='center', color='r') ax.ticklabel_format(axis='y', style='sci', scilimits=(0,0), useMathText=False) ax.axvspan(cutoff, 1.5 * range_max if mito else -1.5 * range_min, alpha=0.1, color='red', hatch='\\', linewidth=0.1) fig.tight_layout() if displayMeasures: texts = [] dist_std, dist_median, dist_mean = np.round(np.std(subset),precision), np.round(np.median(subset),precision), np.round(np.mean(subset),precision) if self.verbose >= 2: print(plotPathAndName, '\tstd:', dist_std, '\tmedian:', dist_median, '\tmean:', dist_mean) xspan = ax.get_xlim()[1] - ax.get_xlim()[0] yspan = ax.get_ylim()[1] - ax.get_ylim()[0] ax.axvline(x=dist_mean, color='k', lw=1.0, ls='--') text = ax.text(dist_mean + 0.02 * xspan, 0.98 * yspan, r'$\mu=%s$' % (dist_mean), fontsize=fontScale * 10, va='top', ha='left', color='k') text.set_path_effects([path_effects.Stroke(linewidth=1, foreground='white'),path_effects.Normal()]) texts.append(text) ax.axvline(x=dist_median, color='k', lw=1.0, ls='--') text = ax.text(dist_median + 0.02 * xspan, 0.94 * yspan, r'$M=%s$' % (dist_median), fontsize=fontScale * 10, va='top', ha='left', color='k') text.set_path_effects([path_effects.Stroke(linewidth=1, foreground='white'),path_effects.Normal()]) texts.append(text) ax.axvline(x=dist_median - dist_std, color='k', lw=1.0, ls='--') text = ax.text(dist_median - dist_std + 0.02 * xspan, 0.90 * yspan, r'$M-\sigma=%s$' % (np.round(dist_median - dist_std,precision)), fontsize=fontScale * 10, va='top', ha='left', color='k') text.set_path_effects([path_effects.Stroke(linewidth=1, foreground='white'),path_effects.Normal()]) texts.append(text) ax.axvline(x=dist_median + dist_std, color='k', lw=1.0, ls='--') text = ax.text(dist_median + dist_std + 0.02 * xspan, 0.90 * yspan, r'$M+\sigma=%s$' % (np.round(dist_median + dist_std,precision)), fontsize=fontScale * 10, va='top', ha='left', color='k') text.set_path_effects([path_effects.Stroke(linewidth=1, foreground='white'),path_effects.Normal()]) texts.append(text) text = ax.text(dist_median + 0.02 * xspan, 0.76 * yspan, r'$\sigma=%s$' % (np.round(dist_std,precision)), fontsize=fontScale * 10, va='bottom', ha='left', color='k') text.set_path_effects([path_effects.Stroke(linewidth=1, foreground='white'),path_effects.Normal()]) texts.append(text) ax.annotate('', (dist_median + dist_std, 0.75 * yspan), (dist_median, 0.75 * yspan), arrowprops={'arrowstyle':'<|-|>'}) if not mito: text = ax.text(0.98, 0.65, '%s%%\nof distribution \nis shown' % (100. * quantilePlotCutoff), va='top', ha='right', fontsize=fontScale * 10, transform=ax.transAxes) text.set_path_effects([path_effects.Stroke(linewidth=0.5, foreground='white'),path_effects.Normal()]) adjust_text(texts) self.saveFigure(fig, os.path.dirname(plotPathAndName), label=os.path.basename(plotPathAndName) + '_histogram', extension=extension, dpi=dpi, **kwargs) return fig
[docs] @tryExcept def makePlotOfNewMarkers(self, df_marker_cell_type, df_new_marker_cell_type, dpi = 300, extension = 'png', **kwargs): '''Produce plot of the new markers extracted from the annotated clusters Parameters: df_marker_cell_type: pandas.DataFrame Known markers per cell types df_new_marker_cell_type: pandas.DataFrame New markers per cell types dpi: int, Default 600 Resolution of the figure image extension: str, Default 'png' Format of the figure file Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makePlotOfNewMarkers(df_marker_cell_type, df_new_marker_cell_type) ''' ORDERx = scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(df_new_marker_cell_type.values.T, 'ward'), no_plot=True, get_leaves=True)['leaves'] ORDERy = scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(df_new_marker_cell_type.values, 'ward'), no_plot=True, get_leaves=True)['leaves'] genes = df_new_marker_cell_type.columns.values[ORDERx] celltypes = df_new_marker_cell_type.index.values[ORDERy] df_marker_cell_type = df_marker_cell_type[[celltype for celltype in celltypes if celltype in df_marker_cell_type.columns]] fig, ax = plt.subplots(figsize=(13,3)) ax.imshow(df_new_marker_cell_type.values[ORDERy,:][:,ORDERx], cmap='Blues', interpolation='None', aspect='auto') ax.set_xticks([]) ax.set_yticks(range(len(celltypes))) ax.set_yticklabels(celltypes, rotation=0, fontsize=8) ax.set_xlim([-0.5,df_new_marker_cell_type.shape[1] - 0.5]) ax.set_ylim([-0.5,df_new_marker_cell_type.shape[0] - 0.5]) for i, celltype in enumerate(celltypes): if celltype in df_marker_cell_type.columns: known_markers = df_marker_cell_type[celltype][df_marker_cell_type[celltype] > 0.].index.values xy = np.array([np.array([np.where(genes == marker)[0][0], i]) for marker in known_markers if marker in genes]) if self.verbose >= 3: print('Overlapping positive markers of %s: %s (%s)' % (celltype, len(xy), len(known_markers))) if len(xy) > 0: ax.plot(xy.T[0], xy.T[1], 'go', markeredgecolor='r', ms=1.0, markeredgewidth=0.2) known_markers = df_marker_cell_type[celltype][df_marker_cell_type[celltype] < 0.].index.values xy = np.array([np.array([np.where(genes == marker)[0][0], i]) for marker in known_markers if marker in genes]) if self.verbose >= 3: print('Overlapping negative markers of %s: %s (%s)' % (celltype, len(xy), len(known_markers))) if len(xy) > 0: ax.plot(xy.T[0], xy.T[1], 'ro', markeredgecolor='r', ms=1.0, markeredgewidth=0.2) #ax.set_title('Additional markers along with the overlapping part of the input (red)') self.saveFigure(fig, self.saveDir, self.dataName + '_new_markers', extension=extension, dpi=dpi, **kwargs) return fig
[docs] @tryExcept def makeTtestPlot(self, df, dfp, label = None, reorder = True, p_value_cutoff = 0.05, dpi = 300, extension = 'png', **kwargs): '''Produce heatmap plot of t-test p-Values calculated gene-pair-wise from the annotated clusters. Parameters: df: pandas.DataFrame t-test statistic values dfp: pandas.DataFrame t-test p-Values calculated gene-pair-wise label: str, Default None Lebel to include in the plot reorder: boolean, Default True Reorder values to group similar p_value_cutoff: float, Default 0.05 p-Value cutoff dpi: int, Default 600 Resolution of the figure image extension: str, Default 'png' Format of the figure file Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makeTtestPlot(df) ''' if reorder: def metricCommonEuclidean(u,v): where_common = (~np.isnan(u)) * (~np.isnan(v)) return np.sqrt(((u[where_common] - v[where_common]) ** 2).sum()) order = scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(df.values, method='average', metric=metricCommonEuclidean), no_plot=True, get_leaves=True)['leaves'] df = df[df.columns.values[order]] dfp = dfp[dfp.columns.values[order]] df = df.loc[df.index.values[order]] dfp = dfp.loc[dfp.index.values[order]] df = df[df.columns[::-1]] dfp = dfp[dfp.columns[::-1]] fig = plt.figure(figsize=(5,5)) ax = fig.add_axes([0.35,0.02,0.6,0.6]) cmap = plt.cm.PuOr_r #BrBG #PiYG #seismic cmap.set_bad('grey') ax.imshow(df.values.astype(float), cmap=cmap, interpolation='None', aspect='auto') wh = np.where(dfp.values.T <= p_value_cutoff) ax.plot(wh[0], wh[1], '*k') ax.set_xticks(range(df.shape[1])) ax.set_yticks(range(df.shape[0])) ax.set_xticklabels(df.columns.values, rotation=90, fontsize=8) ax.set_yticklabels(df.index.values, rotation=0, fontsize=8) ax.set_xlim([-0.5, df.shape[1] - 0.5]) ax.set_ylim([-0.5, df.shape[0] - 0.5]) ax.xaxis.tick_top() if not label is None: ax.text(-0.5, 1.5, label, transform=ax.transAxes, fontsize=10, color='k', ha='left', va='top').set_path_effects([path_effects.Stroke(linewidth=0.5, foreground='blue'),path_effects.Normal()]) ax.set_title('Two-tailed p-Value (t-test)') data = df.values.flatten().astype(float) data = data[np.where(~np.isnan(data))] dataMin = np.min(data) dataMax = np.max(data) axisColor = fig.add_axes([0.22,0.75,0.08,0.02]) norm = matplotlib.colors.Normalize(vmin=dataMin, vmax=dataMax) mapp = cm.ScalarMappable(norm=norm, cmap=cmap) mapp.set_array(data) fig.colorbar(mapp, cax=axisColor, ticks=[dataMax,dataMin], orientation='horizontal') axisColor.tick_params(labelsize=4) axisColor.set_xlabel('Statistic\n*p-Value < %s' % (p_value_cutoff), fontsize=5) axisColor.set_yticklabels([np.round(dataMax,2), np.round(dataMin,2)]) self.saveFigure(fig, self.saveDir, self.dataName + '_ttest_%s' % (label.replace('\n', '_')), extension=extension, dpi=dpi, **kwargs) return fig
[docs] @tryExcept def makeCellMarkersPiePlot(self, type1, type2, df_marker_cell_type = 'all', nameToAppend = None, listUnexpressedMarkers = True, orthogonalSectorsShift = 0.1, rotationAngle = 0, dpi = 300, extension = 'png', **kwargs): '''Make summary of markers comparison between two cell types. Parameters: type1: str Name of the first cell type to compare type2: str Name of the second cell type to compare df_marker_cell_type: pandas.DataFrame or str, Default 'all' Celltypes/Markers matrix. If 'expressed', then only expressed markers will be used. If 'all' then all markers of the input marker list will be used. If an instance of a pandas.DataFrame is passed, then its all markers will be used. nameToAppend: str, Default None String to append to the figure file name. listUnexpressedMarkers: boolean, Default True List (highlight) markers that are not expressed. This option is ignored unless df_marker_cell_type=='all' orthogonalSectorsShift: float, Default 0.1 Sectors marked as '+/-' and '-/+' are shifted off-center. Set this parameter to zero to have round continuous pie chart. rotationAngle: int or float, Default 0 Angle in degrees that will rotate the whole pie chart counterclockwise. dpi: int, Default 600 Resolution of the figure image extension: str, Default 'png' Format of the figure file Returns: Marker lists split into categories. Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makeCellMarkersPiePlot('T cells', 'B cells') ''' try: df_marker_expression = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='Marker cell type weight matrix', index_col=0, header=0).T except Exception as exception: if self.verbose >= 1: print(exception) print('Marker expression data unavailable') df_marker_expression = None listUnexpressedMarkers = False if type(df_marker_cell_type) is str: if df_marker_cell_type == 'expressed': listUnexpressedMarkers = False if not df_marker_expression is None: df_marker_cell_type = df_marker_expression else: if self.verbose >= 1: print("Try using option 'all'") return additional_name = 'expressed' elif df_marker_cell_type == 'all': df_marker_cell_type = self.readMarkerFile() additional_name = 'all' if nameToAppend is None: nameToAppend = '.'.join(os.path.basename(self.geneListFileName).split('.')[:-1]) else: additional_name = 'custom' if nameToAppend is None: nameToAppend = '' df_marker_cell_type = df_marker_cell_type.fillna(0.) def getSet(df, celltype): try: pos = set(df.index[(df.loc[:, celltype] > 0.)].values) neg = set(df.index[(df.loc[:, celltype] < 0.)].values) except Exception as exception: if self.verbose >= 1: print(exception) print('Cell type %s not found' % (celltype)) print('Available celltypes are: %s' % (df.columns.values.tolist())) return return pos, neg def getEightAll(t1p, t2p, t1n, t2n): p1 = t1p.intersection(t2n) p3 = t1n.intersection(t2n) p5 = t1n.intersection(t2p) p7 = t1p.intersection(t2p) p0 = t1p - p1 - p7 p2 = t2n - p1 - p3 p4 = t1n - p3 - p5 p6 = t2p - p5 - p7 all = t1p.union(t2p).union(t1n).union(t2n) return [p0, p1, p2, p3, p4, p5, p6, p7], all try: t1p, t1n = getSet(df_marker_cell_type, type1) t2p, t2n = getSet(df_marker_cell_type, type2) sets, all = getEightAll(t1p, t2p, t1n, t2n) except: return if listUnexpressedMarkers: try: t1pe, t1ne = getSet(df_marker_expression, type1) t2pe, t2ne = getSet(df_marker_expression, type2) setsE, allE = getEightAll(t1pe, t2pe, t1ne, t2ne) except Exception as exception: if self.verbose >= 1: print(exception) listUnexpressedMarkers = False labels = '+/*', '+/-', '*/-', '-/-', '-/*', '-/+', '*/+', '+/+' colors = ['limegreen', 'thistle', 'lightcoral', 'red', 'lightcoral', 'thistle', 'limegreen', 'green'] titles = ['Positive in %s:' % (type1), 'Positive in %s, Negative in %s:' % (type1, type2), 'Negative in %s:' % (type2), 'Negative in both:', 'Negative in %s:' % (type1), 'Negative in %s, Positive in %s:' % (type1, type2), 'Positive in %s:' % (type2), 'Positive in both:'] sizes = [len(item) for item in sets] labels = [(label if size > 0 else '') for label, size in zip(labels, sizes)] explode = (0.0, orthogonalSectorsShift, 0.0, 0.0, 0.0, orthogonalSectorsShift, 0.0, 0.0) def findAll(a, b): start = 0 while True: start = a.find(b, start) if start == -1: return yield start start += len(b) return if listUnexpressedMarkers: str_sets = [str(sorted([i for i in list(setsE[j])])).replace("'", "").replace(']','').replace('[','').replace(' ','') for j in range(8)] else: str_sets = [str(sorted([i for i in list(sets[j])])).replace("'", "").replace(']','').replace('[','').replace(' ','') for j in range(8)] for i, item in enumerate(str_sets): all_temp = item.split(',') new_item = all_temp[0] temp_item = all_temp[0] limit = 75 for gene in all_temp[1:]: if len(temp_item) > limit: temp_item = '\n' + gene new_item += '\n' + gene else: new_item += ', ' + gene temp_item += ', ' + gene if listUnexpressedMarkers: str_sets[i] = titles[i] + ' (%s):' % (len(all_temp)) + '\n' + new_item else: str_sets[i] = titles[i] + '\n' + new_item if listUnexpressedMarkers: str_setsU = [str(sorted([i for i in list(sets[j].difference(setsE[j]))])).replace("'", "").replace(']','').replace('[','').replace(' ','') for j in range(8)] for i, item in enumerate(str_setsU): all_temp = item.split(',') new_item = all_temp[0] temp_item = all_temp[0] limit = 75 for gene in all_temp[1:]: if len(temp_item) > limit: temp_item = '\n' + gene new_item += '\n' + gene else: new_item += ', ' + gene temp_item += ', ' + gene if len(new_item) > 1: str_sets[i] += '\n' + 'Not expressed (%s):' % (len(all_temp)) + '\n' + new_item fig = plt.figure(figsize=(8,4)) ax = fig.add_axes([0.25,0.25,0.5,0.5]) currentWedge = 0 def autopctFunc(value): nonlocal currentWedge n = int(np.round((float(value) / 100. * float(np.sum(sizes))), 0)) if listUnexpressedMarkers: u = len(sets[currentWedge].difference(setsE[currentWedge])) else: u = 0 if n > 0: if u > 0: format = "{:d}\n({:d})".format(n,u) else: format = "{:d}".format(n) else: format = "" currentWedge += 1 return format wedges, texts, autotexts = ax.pie(sizes, explode=explode, labels=labels, colors=colors, labeldistance=1.05, textprops={'size':6, 'weight':'semibold', 'color':'b'}, autopct=autopctFunc, wedgeprops={'linewidth': 0.5, 'edgecolor':'aqua', 'width': 0.7}, shadow=False, startangle=-180 + rotationAngle, frame=False, rotatelabels=False) plt.setp(autotexts, size=6, weight="semibold", color='k') bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.5) kw = dict(arrowprops=dict(arrowstyle="-"), bbox=bbox_props, zorder=0, va="center") for i, p in enumerate(wedges): if len(sets[i]) == 0: continue ang = (p.theta2 - p.theta1) / 2. + p.theta1 y = np.sin(np.deg2rad(ang)) x = np.cos(np.deg2rad(ang)) horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))] connectionstyle = "angle,angleA=0,angleB={}".format(ang) kw["arrowprops"].update({"connectionstyle": connectionstyle}) ax.annotate(str_sets[i], xy=(x, y), xytext=(1.6 * np.sign(x), 1.8 * y), fontsize=3.5, ha=horizontalalignment, **kw) fig.suptitle('%s & %s' % (type1, type2), fontsize=11, color='b') ax.axis('equal') ax.text(0., 0., '%s\n(%s)' % (len(all), len(all.difference(allE))) if listUnexpressedMarkers else '%s' % (len(all)), color='k', fontsize=8, ha='center', va='center').set_path_effects([path_effects.Stroke(linewidth=1, foreground='blue'),path_effects.Normal()]) saveName = 'Markers_of_%s_vs_%s_(%s)_%s' % (type1.replace('/',''), type2.replace('/',''), nameToAppend, additional_name) self.saveFigure(fig, self.saveDir, self.dataName + saveName, extension=extension, dpi=dpi, **kwargs) return dict(zip(labels, list(sets))), fig
@tryExcept def makeHopfieldPCplot(self, colormap = cm.hot_r, plotTrLines = False, clusterid = 1, trID = 0, axisOff = False, fontscale = 1., trPath = None, dpi = 300, extension = 'png', **kwargs): '''Make radar plot of the attractors in their principal components coordinates Parameters: colormap: matplotlib.colormap or str, Default cm.hot_r Colormap or its string name plotTrLines: boolean, Default False Whether to plot trajectories clusterid: int, Default 1 Identifier of the cluster to plot trajectories of trID: int, Default 0 Identifier of the trajectories to plot axisOff: boolean, Default False Whether to hide the axes lines trPath: str, Default None Path to trajectories files dpi: int, Default 300 Resolution of the figure extension: str, Default 'png' Format extension of the figure Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makeHopfieldPCplot() ''' if axisOff: ax.axis('off') if trPath is None: trPath = os.path.join(self.saveDir, 'HopfieldTrajectories') if not os.path.exists(trPath): if self.verbose >= 1: print('Data not found', flush=True) return fig = plt.figure(figsize=(8,8)) ax = plt.subplot(111, polar=True, theta_direction=-1, theta_offset=0.5*np.pi) attrs, attrs_names = read(os.path.join(trPath, 'attrs')) N = attrs.shape[1] df = pd.DataFrame(data=attrs[:N], index=attrs_names, columns=['PC%s\n%s%%'%(i+1, np.int(100.*attrs[N][i])) for i in range(N)]) wherePC = attrs[N] > 0.001 df = df[df.columns[wherePC]] N = df.shape[1] vmaxAt = df.max(axis=0).max() vminAt = df.min(axis=0).min() ax.set_ylim(vminAt, vmaxAt) angles = [n / float(N) * 2 * np.pi for n in range(N)] + [0.] ax.set_xticklabels([]) ax.set_xticks(angles) for i, celltype in enumerate(df.index): values=df.loc[celltype].values.flatten().tolist() values.append(values[0]) color = cm.jet(i/len(attrs_names)) ax.plot(angles, values, color=color, linewidth=1.75, linestyle='solid', label=celltype) #ax.fill(angles, values, alpha=0.2, color=color, zorder=1) ax.fill_between(angles, 0, values, alpha=0.2, facecolor=color) temp_texts = ax.text(angles[np.argmax(values)], values[np.argmax(values)], celltype, color=color, fontsize=12.*fontscale, ha='center', va='center') temp_texts.set_path_effects([path_effects.Stroke(linewidth=1., foreground='k'), path_effects.Normal()]) if plotTrLines: trajectories = read(os.path.join(trPath, 'trajectories%s')%(trID)) initial, final, typesNames, clusterNames = read(os.path.join(trPath, 'additional')) thisTr = trajectories[:, clusterid, wherePC].T vmax = max(thisTr.max(axis=0).max(axis=0), vmaxAt) vmin = min(thisTr.min(axis=0).min(axis=0), vminAt) ax.set_ylim(vmin, vmax) inId = initial[clusterid] outId = final[clusterid] if self.verbose >= 3: print('ClusterID: %s, Initial state: %s (%s), Final state: %s (%s)'%(clusterNames[clusterid], typesNames[inId], inId, typesNames[outId], outId)) suffix = clusterNames[clusterid] fig.suptitle('Cluster %s: %s -> %s' % (clusterNames[clusterid], typesNames[inId] if inId!=-1 else 'Unknown', typesNames[outId] if outId!=-1 else 'Unknown'), fontsize=11, color='b') values = thisTr.T[0].tolist() + [thisTr.T[0][0]] timeLimit = thisTr.shape[1] for t in range(timeLimit): values = thisTr.T[t].tolist() + [thisTr.T[t][0]] if t==0: ax.plot(angles, values, 'o-', ms=4.0, color='b', alpha=1.0, clip_on=False) ax.plot(angles, values, color='b', linewidth=0.5, alpha=0.04, linestyle='solid') ax.fill(angles, values, alpha=0.005, color='b') if t == timeLimit - 1: ax.plot(angles, values, 'o-', ms=4.0, color='r', alpha=1.0, clip_on=False) ax.set_rlabel_position(0) else: vmax = vmaxAt vmin = vminAt suffix = 'attractors' for i, pc in enumerate(df.columns): temp_texts = ax.text(angles[i], 1.15 * (vmax - vmin) + vmin, pc, color='k', fontsize=14*fontscale, ha='center', va='center') temp_texts.set_path_effects([path_effects.Stroke(linewidth=0.5*fontscale, foreground='w'), path_effects.Normal()]) ax.set_axisbelow(True) #ax.plot(angles, [0]*len(angles), color='k', linewidth=1., linestyle='-', label=celltype) fig.canvas.draw() ylabels = ax.get_yticklabels() ax.set_yticklabels([]) for label in ylabels: ax.text(label._x, label._y, label._text, zorder=np.inf) self.saveFigure(fig, self.saveDir, self.dataName + '_polar_%s'%(suffix), extension=extension, dpi=dpi, **kwargs) return fig @tryExcept def HopfieldLandscapePlot(self, legend = False, labels = False, PCx = 0, PCy = 1, colorbar = True, fontsize = 10, plotMesh = True, plotAttractors = True, adjustText = True, axisOff = True, colorbarva = 0.75, colorbarha = 0.85, trPath = None, colormap = matplotlib.colors.LinearSegmentedColormap.from_list('cmap', [(1, 1, 1), (0, 1, 1), (0, 0, 1), (1, 0, 0)], N=1000), dpi = 300, extension = 'png', **kwargs): '''Make heatmap plot of the attractors in their two principal components coordinates Parameters: legend: boolean, Default False Whether to add legend containing cell types names labels: boolean, Default False Whether to add labels PCx: int, Default 0 Principal component for x-coordinate of the plot PCy: int, Default 1 Principal component for y-coordinate of the plot colorbar: boolean, Default False Whether to add colorbar fontsize: int, Default 10 Text labels font size plotMesh: boolean, Default False Whether to plot landscape heatmap plotAttractors: boolean, Default False Whether to plot attractor stars adjustText: boolean, Default False Whether to minimize text labels overlap axisOff: boolean, Default False Whether to hide the axes lines colorbarva: float, Default 0.75 Vertical position of the bottom of the colorbar colorbarha: float, Default 0.85 Horizontal position of the colorbar trPath: str, Default None Path to trajectories files colormap: matplotlib.colormap or str, Default matplotlib.colors.LinearSegmentedColormap.from_list('cmap', [(1, 1, 1), (0, 1, 1), (0, 0, 1), (1, 0, 0)], N=1000) Colormap or its string name dpi: int, Default 300 Resolution of the figure extension: str, Default 'png' Format extension of the figure Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makeHopfieldLandscapePlot() ''' np.random.seed(0) colormap.set_bad('white') def add_colorbar(fig, labels, cmap = matplotlib.colors.LinearSegmentedColormap.from_list('GR', [(0, 1, 0), (1, 0, 0)], N=100), fontsize = 10): mapp = cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=np.min(labels), vmax=np.max(labels)), cmap=cmap) sp = np.linspace(np.max(labels), np.min(labels), num=6, endpoint=True) mapp.set_array(sp) axisColor = fig.add_axes([colorbarha, colorbarva, 0.01, 0.2]) fig.colorbar(mapp, cax=axisColor, ticks=sp) axisColor.tick_params(labelsize=fontsize) axisColor.set_yticklabels(sp.astype(int)) return None fig = plt.figure(figsize=(8,8)) ax = fig.add_axes([0.05,0.05,0.9,0.9]) if axisOff: ax.axis('off') if trPath is None: trPath = os.path.join(self.saveDir, 'HopfieldTrajectories') if not os.path.exists(trPath): if self.verbose >= 1: print('Data not found', flush=True) return attrs_xpca, attrs_names = read(os.path.join(trPath, 'attrs')) attrs_xpca = attrs_xpca[:attrs_xpca.shape[1]] mesh_xpca = read(os.path.join(trPath, 'mesh')) mesh_energy = mesh_xpca[range(len(mesh_xpca)), -1] mesh_xpca = mesh_xpca[range(len(mesh_xpca)), :-1] data = np.vstack([attrs_xpca, mesh_xpca]) coords = data.T[[PCx, PCy], :] attrs2D, mesh2D = coords[:, :attrs_xpca.shape[1]].T, coords[:, attrs_xpca.shape[1]:].T if plotMesh: vmin, vmax = min(mesh_energy), 0. vals = mesh_energy.copy() vals[np.where(vals > (vmax - 0.001))[0]] = vmax - 0.001 vals[np.where(vals < (vmin + 0.001))[0]] = vmin + 0.001 xmin, xmax = mesh2D.T[0].min(), mesh2D.T[0].max() ymin, ymax = mesh2D.T[1].min(), mesh2D.T[1].max() dx = (xmax - xmin) * 0.05 dy = (ymax - ymin) * 0.05 xmin -= dx xmax += dx ymin -= dy ymax += dy ngrid = 100 grid = np.zeros((ngrid + 1, ngrid + 1)) grid[:] = vmin i = (ngrid * (mesh2D.T[0] - xmin) / (xmax - xmin)).astype(int) j = (ngrid * (mesh2D.T[1] - ymin) / (ymax - ymin)).astype(int) se = pd.Series(index=zip(i,j), data=mesh_energy).groupby(axis=0, level=0).agg(np.min) se.index = pd.MultiIndex.from_tuples(se.index) grid[(se.index.get_level_values(0).values, se.index.get_level_values(1).values)] = se.values maskedArray = np.ma.array(grid.T, mask=np.isnan(grid.T,)) im = ax.imshow(maskedArray[::-1], vmin=vmin, vmax=vmax, cmap=colormap, alpha=0.8, extent=[xmin, xmax, ymin, ymax], interpolation='quadric', zorder=-10**8, clip_on=False) data = scipy.ndimage.gaussian_filter(grid.T, 1.5) xgrid = np.linspace(xmin, xmax, num=(ngrid+1)) ygrid = np.linspace(ymin, ymax, num=(ngrid+1)) tempColormap = colormap #tempColormap = matplotlib.colors.LinearSegmentedColormap.from_list('cmap', [(0.75, 0.75, 0.75), (0, 1, 1), (0, 0, 1), (1, 0, 0)], N=1000) ax.contour(xgrid, ygrid, data, levels=10, cmap=tempColormap, linewidths=1.0, zorder=-10**8+1) ax.contour(xgrid, ygrid, data, levels=10, colors='k', linestyles='solid', linewidths=0.25, zorder=-10**8+2) if plotAttractors: texts = [] ax.plot(attrs2D.T[0], attrs2D.T[1], '*', ms=14, color='k', alpha=1.0, zorder=-10**7, clip_on=False) for attr in range(attrs2D.T[0].shape[0]): temp_texts = ax.text(attrs2D.T[0][attr], attrs2D.T[1][attr], attrs_names[attr], fontsize=fontsize, fontweight=550, ha='left',va='center', zorder=10 ** 10, clip_on=False) temp_texts.set_path_effects([path_effects.Stroke(linewidth=2.5, foreground='white'), path_effects.Normal()]) texts.append(temp_texts) if adjustText: adjust_text(texts, arrowprops=dict(arrowstyle='-', color='k', lw=0.3, alpha=0.5), force_text=(0.05, 0.05)) if plotMesh and colorbar: add_colorbar(fig, [vmax, vmin], cmap=colormap, fontsize=fontsize) self.saveFigure(fig, self.saveDir, self.dataName + '_energy_landscape_PC%s_vs_PC%s'%(PCy, PCx), extension=extension, dpi=dpi, **kwargs) return fig # Plotly-powered figures
[docs] @tryExcept def makeSankeyDiagram(self, df, colormapForIndex = None, colormapForColumns = None, linksColor = 'rgba(100,100,100,0.6)', title = '', attemptSavingHTML = False, quality = 4, width = 400, height = 400, border = 20, nodeLabelsFontSize = 15, nameAppend = '_Sankey_diagram'): '''Make a Sankey diagram, also known as 'river plot' with two groups of nodes Parameters: df: pandas.DataFrame With counts (overlaps) colormapForIndex: dictionary, Default None Colors to use for nodes specified in the DataFrame index colormapForColumns: dictionary, Default None Colors to use for nodes specified in the DataFrame columns linksColor: str, Default 'rgba(100,100,100,0.6)' Color of the non-overlapping links title: str, Default '' Title to print on the diagram interactive: boolean , Default False Whether to launch interactive JavaScript-based graph quality: int, Default 4 Proportional to the resolution of the figure to save nodeLabelsFontSize: int, Default 15 Font size for node labels nameAppend: str, Default '_Sankey_diagram' Name to append to the figure file Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makeSankeyDiagram(df) ''' try: temp_index = pd.MultiIndex.from_arrays([df.index, [colormapForIndex[item] for item in df.index]], names=['label', 'color']) temp_columns = pd.MultiIndex.from_arrays([df.columns, [colormapForColumns[item] for item in df.columns]], names=['label', 'color']) df.index = temp_index df.columns = temp_columns except Exception as exception: if self.verbose >= 2: print(exception) print('Using default node colors') colormapForIndex = None colormapForColumns = None if (colormapForIndex is None) or (colormapForColumns is None): nodeColors = ['rgba(150,0,10,0.8)'] * len(df.index) + ['rgba(10,0,150,0.8)'] * len(df.columns) nodeLabels = df.index.to_list() + df.columns.to_list() else: nodeLabels = df.index.get_level_values('label').to_list() + df.columns.get_level_values('label').to_list() nodeColors = df.index.get_level_values('color').to_list() + df.columns.get_level_values('color').to_list() sources, targets, values, labels = [], [], [], [] for i, item in enumerate(df.index): sources.extend([i] * len(df.loc[item])) targets.extend(list(range(len(df.index), len(df.index) + len(df.loc[item])))) values.extend([j for j in df.loc[item].values]) if type(item) is tuple: labels.extend([str(item[0]) + ' -> ' + str(jtem[0]) for jtem in df.loc[item].index]) else: labels.extend([str(item) + ' -> ' + str(jtem) for jtem in df.loc[item].index]) colorscales = [dict(label=label, colorscale=[[0, linksColor], [1, linksColor]]) for label in labels] if not nodeColors is None: for i in range(len(sources)): if nodeColors[sources[i]] == nodeColors[targets[i]]: newColor = ','.join(nodeColors[sources[i]].split(',')[:3] + ['0.6)']) colorscales[i] = dict(label=labels[i], colorscale=[[0, newColor], [1, newColor]]) fig = go.Figure(data=[go.Sankey(valueformat = '', valuesuffix = '', textfont = dict(color = 'rgb(255,0,0)', size = nodeLabelsFontSize, family = 'Arial'), node = dict(pad = 20, thickness = 40, line = dict(color = 'white', width = 0.0), label = nodeLabels, color = nodeColors, ), # hoverlabel=dict(bordercolor = 'yellow') link = dict(source = sources, target = targets, value = values, label = labels, colorscales = colorscales, hoverinfo='all'), )],) #line ={'color':'rgba(255,0,0,0.8)', 'width':0.1} if not title is None: fig.update_layout(title_text=title, font_size=10) fig.update_layout(margin=dict(l=border, r=border, t=border, b=border)) try: fig.write_image(os.path.join(self.saveDir, self.dataName + nameAppend + '.png'), width=width, height=height, scale=quality) except Exception as exception: if self.verbose >= 2: print('Cannot save static image (likely due to missing orca). Saving to interactive html') attemptSavingHTML = True if attemptSavingHTML: fig.update_layout(margin=dict(l=200, r=200, t=100, b=100)) plot_offline(fig, filename=os.path.join(self.saveDir, self.dataName + nameAppend + '.html'), auto_open=False) return fig
@tryExcept def HopfieldLandscapePlot3D(self, PCx = 0, PCy = 1, colorbar = True, fontsize = 12, plotMesh = True, plotAttractors = True, trPath = None, attemptSavingHTML=False, nameAppend = '', quality = 4, **kwargs): '''Make heatmap plot of the attractors in their two principal components coordinates Parameters: legend: boolean, Default False Whether to add legend containing cell types names labels: boolean, Default False Whether to add labels PCx: int, Default 0 Principal component for x-coordinate of the plot PCy: int, Default 1 Principal component for y-coordinate of the plot colorbar: boolean, Default False Whether to add colorbar fontsize: int, Default 10 Text labels font size plotMesh: boolean, Default False Whether to plot landscape heatmap plotAttractors: boolean, Default False Whether to plot attractor stars adjustText: boolean, Default False Whether to minimize text labels overlap axisOff: boolean, Default False Whether to hide the axes lines colorbarva: float, Default 0.75 Vertical position of the bottom of the colorbar colorbarha: float, Default 0.85 Horizontal position of the colorbar trPath: str, Default None Path to trajectories files colormap: matplotlib.colormap or str, Default matplotlib.colors.LinearSegmentedColormap.from_list('cmap', [(1, 1, 1), (0, 1, 1), (0, 0, 1), (1, 0, 0)], N=1000) Colormap or its string name dpi: int, Default 300 Resolution of the figure extension: str, Default 'png' Format extension of the figure Returns: None Usage: DCS = DigitalCellSorter.DigitalCellSorter() DCS.makeHopfieldLandscapePlot() ''' if trPath is None: trPath = os.path.join(self.saveDir, 'HopfieldTrajectories') if not os.path.exists(trPath): if self.verbose >= 1: print('Data not found', flush=True) return attrs_xpca, attrs_names = read(os.path.join(trPath, 'attrs')) attrs_xpca = attrs_xpca[:attrs_xpca.shape[1]] mesh_xpca = read(os.path.join(trPath, 'mesh')) mesh_energy = mesh_xpca[range(len(mesh_xpca)), -1] mesh_xpca = mesh_xpca[range(len(mesh_xpca)), :-1] data = np.vstack([attrs_xpca, mesh_xpca]) coords = data.T[[PCx, PCy], :] attrs2D, mesh2D = coords[:, :attrs_xpca.shape[1]].T, coords[:, attrs_xpca.shape[1]:].T vmin, vmax = min(mesh_energy), 0. vals = mesh_energy.copy() vals[np.where(vals > (vmax - 0.001))[0]] = vmax - 0.001 vals[np.where(vals < (vmin + 0.001))[0]] = vmin + 0.001 xmin, xmax = mesh2D.T[0].min(), mesh2D.T[0].max() ymin, ymax = mesh2D.T[1].min(), mesh2D.T[1].max() dx = (xmax - xmin) * 0.05 dy = (ymax - ymin) * 0.05 xmin -= dx xmax += dx ymin -= dy ymax += dy ngrid = 100 grid = np.zeros((ngrid + 1, ngrid + 1)) grid[:] = vmin i = (ngrid * (mesh2D.T[0] - xmin) / (xmax - xmin)).astype(int) j = (ngrid * (mesh2D.T[1] - ymin) / (ymax - ymin)).astype(int) se = pd.Series(index=zip(i,j), data=mesh_energy).groupby(axis=0, level=0).agg(np.min) se.index = pd.MultiIndex.from_tuples(se.index) grid[(se.index.get_level_values(0).values, se.index.get_level_values(1).values)] = se.values df = se.unstack(fill_value=vmin) fig = go.Figure() if plotMesh: fig.add_trace(go.Surface(x=np.linspace(xmin, xmax, df.shape[0]), y=np.linspace(ymin, ymax, df.shape[1]), z=gaussian_filter(df.values.T, sigma=0.75), opacity=1., colorscale="blackbody_r", showscale=colorbar, hoverinfo='none', contours= {'x': {'highlight': False}, 'y': {'highlight': False}, 'z': {'highlight': False}},)) fig.update_traces(contours_z=dict(show=True, width=3., highlightwidth=3., usecolormap=False, highlightcolor="limegreen", project=dict(x=True,y=True,z=True), highlight=True, color='grey', size=(vmax-vmin)/10.)) annotations = [] if plotAttractors: for i, point in enumerate(zip(attrs2D.T[0], attrs2D.T[1])): fig.add_trace(go.Scatter3d(x=[point[0], point[0]], y=[point[1], point[1]], z=[vmin, 0.5*vmin], mode='lines', hoverinfo='none', line=dict(width=2, color='blue'), showlegend=False)) annotations.append(dict(showarrow=False, x=point[0], y=point[1], z=0.4*vmin, text=attrs_names[i], xanchor="center", xshift=10, opacity=1, font=dict(color='black', size=fontsize))) fig.add_trace(go.Scatter3d(x=attrs2D.T[0], y=attrs2D.T[1], z=0. * attrs2D.T[0] + 0.5*vmin, mode='markers', hovertext=attrs_names, hoverinfo='text', marker=dict(size=5, color='blue'), projection=dict(z=dict(show=True)), showlegend=False)) fig.update_layout(title='Hopfield Attractors', autosize=False, width=700, height=700, margin=dict(l=75, r=75, b=75, t=90)) fig.update_layout(scene = {'xaxis': {'title_text': 'PC1', 'nticks': 10, 'spikesides': False, 'showspikes': False, 'showbackground': False, 'showline': False, 'showticklabels': False, 'showaxeslabels': False}, 'yaxis': {'title_text': 'PC2', 'nticks': 10, 'spikesides': False, 'showspikes': False, 'showbackground': False, 'showline': False, 'showticklabels': False, 'showaxeslabels': False}, 'zaxis': {'title_text': 'Energy', 'range': (vmin, 0.), 'nticks': 10, 'showspikes': False, 'showbackground': False, 'showline': False, 'showticklabels': False, 'showaxeslabels': False}, 'aspectratio': {'x': 1, 'y': 1, 'z': 0.33}, 'annotations': annotations}) fig.update_layout(scene_camera=dict(up=dict(x=0, y=0, z=2), center=dict(x=0, y=0, z=0), eye=dict(x=0, y=-0.25, z=1.25))) fileName = self.dataName + '_energy_landscape_PC%s_vs_PC%s'%(PCy, PCx) + nameAppend try: fig.write_image(os.path.join(self.saveDir, fileName + '.png'), width=700, height=700, scale=quality) except Exception as exception: if self.verbose >= 2: print('Cannot save static image (likely due to missing orca). Saving to interactive html') attemptSavingHTML = True if attemptSavingHTML: plot_offline(fig, filename=os.path.join(self.saveDir, fileName + '.html'), auto_open=False) return fig