import os
import copy
import numpy as np
import pandas as pd
import scipy.stats
import scipy.signal
import scipy.ndimage
import scipy.cluster.hierarchy
from scipy.interpolate import UnivariateSpline
from scipy.ndimage import gaussian_filter
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
from matplotlib import cm
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import plot as plot_offline
from plotly.offline import plot_mpl
from adjustText import adjust_text
from .GenericFunctions import read, write
[docs]class VisualizationFunctions:
'''Class of visualization functions for DigitalCellSorter'''
def __init__(self, dataName = 'dataName', saveDir = os.path.join(''), matplotlibMode = 'Agg', safePlotting = True, verbose = 1):
'''Function called automatically'''
self.saveDir = saveDir
self.dataName = dataName
self.matplotlibMode = matplotlibMode
self.safePlotting = safePlotting
self.verbose = verbose
return
@property
def matplotlibMode(self):
return self._matplotlibMode
@matplotlibMode.setter
def matplotlibMode(self, value):
self._matplotlibMode = value
if not self.matplotlibMode is None:
matplotlib.use(self.matplotlibMode)
return
def tryExcept(func):
def internal(self, *args, **kwargs):
if self.safePlotting:
try:
func(self, *args, **kwargs)
except Exception as exception:
if self.verbose >= 1:
print('Something went wrong while making plot: %s' % (func))
print('\tError message: %s\n' % (exception))
else:
func(self, *args, **kwargs)
return internal
def saveFigure(self, fig, saveDir, label = 'Figure', extension = 'png', dpi = 300, close = True, attemptSavingHTML = False):
'''Function used internally to save and close figures
Parameters:
saveDir: str
Path of directories to save the object to
label: str, Default 'Figure'
Name of the figure to save
extension: str, Default '.png'
Path of directories to save the object to
dpi: int, Default 300
Figure resolution if rasterized
close: boolean: Default True
Whether to close the figure after saving
Returns:
None
Usage:
saveFigure(fig, saveDir, label, extension, dpi)
'''
if saveDir != os.path.join('') and not os.path.exists(saveDir):
os.makedirs(saveDir)
try:
if not extension[0] == '.':
extension = ''.join(['.', extension])
except Exception as exception:
if self.verbose >= 1:
print(exception)
print('Figure extension/format error')
print('Example of acceptable extension: \".png\"')
return
if extension in ['.png', '.jpeg', '.tiff']:
try:
fig.savefig(os.path.join(saveDir, label + extension), dpi=dpi)
except Exception as exception:
if self.verbose >= 1:
print(exception)
elif extension in ['.svg', '.eps', '.pdf']:
try:
fig.savefig(os.path.join(saveDir, label + extension))
except Exception as exception:
if self.verbose >= 1:
print(exception)
else:
if self.verbose >= 1:
print('Unsupported format. Figure not saved')
if attemptSavingHTML:
try:
plot_mpl(fig, filename=os.path.join(saveDir, label + '.html'), auto_open=False)
except Exception as exception:
if self.verbose >= 1:
print('Saving to iteractive HTML did not succeed')
if close:
try:
plt.close(fig)
except Exception as exception:
if self.verbose >= 1:
print(exception)
print('Error while closing figure')
return
# MatPlotLib-powered figures
@tryExcept
def makeHeatmapGeneExpressionPlot(self, df = None, genes = None, normalize = True, logScale=False, subtract = False, saveExcel = True, nameToAppend = 'heatmap', plotBy = 'cluster', figsize = (8, 4), convertGenes = False, orderGenes=False, orderClusters=False, dpi = 300, extension = 'png', fontsize = 10, labelsFontsize = 10, **kwargs):
'''Make heatmap gene expression plot from a provided gene expression matrix.
Parameters:
df: pandas.DataFrame
Gene expression matrix
genes: list, Default None
List of genes to plot
nameToAppend: str, Default ''
String to append to fifure file name
dpi: int, Default 300
Resolution of the figure image
extension: str, Default 'png'
Format of the figure file
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makeHeatmapGeneExpressionPlot()
'''
if not genes is None:
if type(genes) in [list, np.ndarray, tuple]:
isNegativeGenes = [True if gene[-1]=='-' else False for gene in genes]
genes = [gene[:-1] if gene[-1]=='-' else gene for gene in genes]
if convertGenes:
genes = self.gnc.Convert(genes, 'alias', 'hugo', returnUnknownString=False)
elif type(genes) in [dict]:
isNegativeGenes = dict()
for key in genes.keys():
isNegativeGenes[key] = [True if gene[-1]=='-' else False for gene in genes[key]]
genes[key] = [gene[:-1] if gene[-1]=='-' else gene for gene in genes[key]]
if convertGenes:
for key in genes.keys():
genes[key] = self.gnc.Convert(genes[key], 'alias', 'hugo', returnUnknownString=False)
else:
if self.verbose >= 1:
print('Plotting all expressed genes not supported. Provide a smaller list of genes')
return
lengthListGenes = []
labelsListGenes = []
if df is None:
if self.df_expr is None:
self.loadExpressionData()
if self.df_expr is None:
return
targetIndex = self.df_expr.index
if type(genes) in [list, np.ndarray, tuple]:
ind = np.isin(genes, targetIndex)
common = np.array(genes)[ind]
isNegativeGenes = np.array(isNegativeGenes)[ind]
elif type(genes) in [dict]:
common = []
temp_negative = []
for key in genes.keys():
ind = np.isin(genes[key], targetIndex)
temp_common = np.array(genes[key])[ind]
isNegativeGenes_common = np.array(isNegativeGenes[key])[ind]
if len(temp_common) > 0:
common.extend(temp_common)
temp_negative.extend(isNegativeGenes_common)
lengthListGenes.append(len(temp_common))
labelsListGenes.append(key)
isNegativeGenes = np.array(temp_negative)
else:
return
df = self.df_expr.loc[common].copy()
else:
targetIndex = df.index
if type(genes) in [list, np.ndarray, tuple]:
ind = np.isin(genes, targetIndex)
common = np.array(genes)[ind]
isNegativeGenes = np.array(isNegativeGenes)[ind]
elif type(genes) in [dict]:
common = []
temp_negative = []
for key in genes.keys():
ind = np.isin(genes[key], targetIndex)
temp_common = np.array(genes[key])[ind]
isNegativeGenes_common = np.array(isNegativeGenes[key])[ind]
if len(temp_common) > 0:
common.extend(temp_common)
temp_negative.extend(isNegativeGenes_common)
lengthListGenes.append(len(temp_common))
labelsListGenes.append(key)
isNegativeGenes = np.array(temp_negative)
else:
return
df = df.loc[common]
counts = df.loc[[df.index[0]]].groupby(axis=1, level=plotBy).count()
means = df.mean(axis=1)
df = df.groupby(axis=1, level=plotBy).mean()
df.columns = df.columns.get_level_values(plotBy)
df.columns = list(zip(df.columns.values, counts.values[0]))
if normalize:
for i in range(df.shape[0]):
if subtract:
df.iloc[i,:] -= np.min(df.iloc[i,:])
df.iloc[i,:] /= np.max(df.iloc[i,:])
if logScale:
df += 1.
df = np.log(df)
if orderGenes:
df = df.iloc[scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(df, 'ward'), no_plot=True, get_leaves=True)['leaves']]
if orderClusters:
df = df.T.iloc[scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(df.T, 'ward'), no_plot=True, get_leaves=True)['leaves']].T
df.insert(0, ('Mean', 'All'), means)
if saveExcel:
df.T.to_excel(os.path.join(self.saveDir, self.dataName + '_' + nameToAppend + '_genes_by_%s' % (plotBy) + '.xlsx'))
fig, ax = plt.subplots(figsize=figsize)
ax.imshow(df.T.values[1:,:], cmap='Blues', interpolation='None', aspect='auto',
extent=(-0.5, df.shape[0] - 0.5, df.shape[1] - 0.5, +0.5))
data = df.T.values[1:,:].copy()
data[:, ~isNegativeGenes] = np.nan
data = np.ma.masked_where(np.isnan(data), data)
ax.imshow(data, cmap='Reds', interpolation='None', aspect='auto',
extent=(-0.5, df.shape[0] - 0.5, df.shape[1] - 0.5, +0.5))
ax.imshow(df.T.values[:1,:], cmap='Reds', interpolation='None', aspect='auto',
extent=(-0.5, df.shape[0] - 0.5, -0.5, +0.5))
ax.axhline(y=0.5, c='k', lw=1.5)
if len(lengthListGenes) != 0:
currPosition = 0
for label, value in zip(labelsListGenes, lengthListGenes):
currPosition += value
ax.axvline(x=currPosition - 0.5, c='k', lw=1)
ax.text(currPosition - 0.5*value - 0.5, df.shape[1], label, fontsize=labelsFontsize, c='k', ha='center', va='top')
df_temp = pd.DataFrame(index=df.columns[1:], columns=labelsListGenes)
df_temp.index = pd.MultiIndex.from_tuples(df_temp.index).get_level_values(0)[::-1]
df_temp.to_excel(os.path.join(self.saveDir, self.dataName + '-' + nameToAppend + '.xlsx'))
ax.set_xticks(range(df.shape[0]))
ax.set_yticks(range(df.shape[1]))
ylabels = ['(' + str(col[1]) + ')%s#' % (' ' if len(col[0]) <= 3 else ' ') + str(col[0]) for col in df.columns]
ylabels[0] = 'Mean across all cells'
ax.set_xticklabels(df.index, rotation=90, fontsize=fontsize)
ax.set_yticklabels(ylabels, rotation=0, fontsize=1.2*fontsize)
ax.set_xlim([-0.5, df.shape[0] - 0.5])
ax.set_ylim([-0.5, df.shape[1] - 0.5])
fig.tight_layout()
self.saveFigure(fig, self.saveDir, self.dataName + '_' + nameToAppend + '_genes_by_%s' % (plotBy), extension=extension, dpi=dpi, **kwargs)
return fig
[docs] @tryExcept
def makeMarkerExpressionPlot(self, fontscale = 1., dpi = 300, extension = 'png', **kwargs):
'''Produce image on marker genes and their expression on all clusters.
Uses files generated by function DCS.Vote
Parameters:
dpi: int, Default 600
Resolution of the figure image
extension: str, Default 'png'
Format of the figure file
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makeMarkerExpressionPlot()
'''
df_votingResults = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='z-scores')
votingResults = dict(zip(df_votingResults['cluster'].values, df_votingResults['Predicted cell type'].values))
predictedCelltypes = dict(zip(df_votingResults['cluster'].values, df_votingResults['Predicted cell type'].str.split(' #', expand=True)[0]))
supportingMarkersList = dict(zip(df_votingResults['cluster'].values, df_votingResults['Supporting markers'].str.split(' // ')))
allMarkersList = dict(zip(df_votingResults['cluster'].values, df_votingResults['All markers'].str.split(' // ')))
df_markers_cluster_centroids = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='Cluster centroids', index_col=0, header=0).T
df_markers = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='Marker cell type weight matrix', index_col=0)
df_markers_weighted = df_markers.copy()
df_markers[df_markers >= 0.] = np.nan
df_markers[df_markers < 0.] = -1.
# Y_mc.T
X_markers_cluster_means_transpose = df_markers_cluster_centroids.values.T
df_means = df_markers_cluster_centroids.copy()
# Normalization
for i in range(X_markers_cluster_means_transpose.shape[1]):
X_markers_cluster_means_transpose[:,i] -= np.min(X_markers_cluster_means_transpose[:,i])
X_markers_cluster_means_transpose[:,i] /= np.max(X_markers_cluster_means_transpose[:,i])
ORDER = scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(X_markers_cluster_means_transpose, 'ward'), no_plot=True, get_leaves=True)['leaves']
ORDER2 = scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(X_markers_cluster_means_transpose.T, 'ward'), no_plot=True, get_leaves=True)['leaves']
df_markers_weighted = df_markers_weighted.iloc[:, ORDER2]
X_markers_cluster_means_sorted = X_markers_cluster_means_transpose[ORDER,:][:,ORDER2]
df_all_marker_hits = pd.DataFrame(data=np.zeros((df_markers_cluster_centroids.shape)), index=df_markers_cluster_centroids.index, columns=df_markers_cluster_centroids.columns)
for cluster in allMarkersList:
if not allMarkersList[cluster] is np.nan:
for gene in allMarkersList[cluster]:
df_all_marker_hits.loc[gene, cluster] = 1
df_supp_marker_hits = pd.DataFrame(data=np.zeros((df_markers_cluster_centroids.shape)), index=df_markers_cluster_centroids.index, columns=df_markers_cluster_centroids.columns)
for cluster in supportingMarkersList:
if not supportingMarkersList[cluster] is np.nan:
for gene in supportingMarkersList[cluster]:
df_supp_marker_hits.loc[gene, cluster] = 1
df_neg_supp_marker_hits = pd.DataFrame(data=np.zeros((df_markers_cluster_centroids.shape)), index=df_markers_cluster_centroids.index, columns=df_markers_cluster_centroids.columns)
for cluster in supportingMarkersList:
if not supportingMarkersList[cluster] is np.nan:
for gene in allMarkersList[cluster]:
#for gene in df_markers.columns:
if df_markers.loc[predictedCelltypes[cluster], gene] == -1.:
df_neg_supp_marker_hits.loc[gene, cluster] = 1
X_marker_hits = df_all_marker_hits.values.T[ORDER,:][:,ORDER2]
X_supp_marker_hits = df_supp_marker_hits.values.T[ORDER,:][:,ORDER2]
X_neg_supp_marker_hits = df_neg_supp_marker_hits.values.T[ORDER,:][:,ORDER2]
_figsize = np.float_(X_markers_cluster_means_transpose.shape[::-1]) / \
np.max(X_markers_cluster_means_transpose.shape) * 15.0 + 2.0
_figsize[1] *= 1.5
height_ratio = df_markers_cluster_centroids.shape[1] / (1. * df_markers_weighted.shape[0])
gs = matplotlib.gridspec.GridSpec(2, 2, width_ratios=[20,1], height_ratios=[height_ratio,1],
left=0.13, right=0.99, top=0.99, bottom=0.25, wspace=0.01, hspace=0.04)
fig = plt.figure(figsize=_figsize)
if True:
ax = plt.subplot(gs[0])
cell_counts = df_votingResults['# cells in cluster'].values.copy()[ORDER]
means = (df_means.iloc[ORDER2,ORDER] * cell_counts / cell_counts.sum()).sum(axis=1)
ax.imshow(means.values[None, :], cmap='Reds', interpolation='None', aspect='auto',
extent=(-0.5, means.shape[0] - 0.5, -0.5, +0.5))
ax.imshow(X_markers_cluster_means_sorted,cmap='Blues', interpolation='None', aspect='auto',
extent=(-0.5, X_markers_cluster_means_sorted.shape[1] - 0.5, X_markers_cluster_means_sorted.shape[0] - 0.5 + 1.0, +0.5))
i_list,j_list = np.where(X_marker_hits.T > 0)
color = 'w' #(1., 1., 0.7)
ax.plot(i_list, j_list + 1., 'k*', mec=color, mew=0.5, markersize=4)
i_list_supp, j_list_supp = np.where(X_supp_marker_hits.T > 0)
i_list_neg_supp, j_list_neg_supp = np.where(X_neg_supp_marker_hits.T > 0)
ax.plot(i_list_supp, j_list_supp + 1., 'k*', mec='lime', mew=0.7, markersize=4) #mec='k', alpha=0.5, markersize=6
ax.plot(i_list_neg_supp, j_list_neg_supp + 1., 'k*', mec='red', mew=0.7, markersize=4) #mec='k', alpha=0.5, markersize=6
ax.set_xticks([])
ax.set_yticks(range(X_markers_cluster_means_transpose.shape[0] + 1))
clusterNames = list(votingResults.values())
clusterIndices = list(votingResults.keys())
ax.set_yticklabels(['Mean'] + [str(clusterNames[i]) + ' (' + str(clusterIndices[i]) + ')' for i in ORDER], rotation=0, fontsize=6*fontscale)
ax.set_xlim([-0.5,X_markers_cluster_means_transpose.shape[1] - 0.5])
ax.set_ylim([-0.5,X_markers_cluster_means_transpose.shape[0] + 1 - 0.5])
if True:
ax2 = plt.subplot(gs[1])
fontsize = 5*fontscale
cells_in_clusters = df_votingResults['# cells in cluster'].values.copy()[ORDER]
numberOfCells = cells_in_clusters.sum()
with open(os.path.join(self.saveDir, 'ColormapForCellTypes.txt'), 'r') as temp_file:
colormap = {item.strip().split('\t')[0]:eval(item.strip().split('\t')[1]) for item in temp_file.readlines()}
celltypes = df_votingResults['Predicted cell type'].str.split(' #', expand=True)[0].values.copy()[ORDER]
ax2.barh(y=range(len(cells_in_clusters)), width=cells_in_clusters, height=0.8, align='center', color=[colormap[i] for i in celltypes])
for i in range(len(cells_in_clusters)):
ax2.text(np.max(cells_in_clusters), i, cells_in_clusters[i], ha='right',va='top', color='k', weight='bold', fontsize=fontsize)
ax2.text(0.02 * numberOfCells, i, str(round(100 * cells_in_clusters[i] / numberOfCells, 1)) + '%', ha='left',va='bottom', color='b', fontsize=fontsize)
ax2.set_xticklabels(cells_in_clusters, fontsize=fontsize)
ax2.set_yticklabels(cells_in_clusters, alpha=0)
ax2.set_xticklabels(cells_in_clusters, alpha=0)
ax2.set_xticks([])
ax2.set_yticks([])
ax2.set_xlabel('Number of\ncells in clusters', fontsize=fontsize)
ax2.set_ylim(-1.5, len(cells_in_clusters) - 0.5)
if True:
ax3 = plt.subplot(gs[2])
masked = np.ma.array(df_markers_weighted.values, mask=(df_markers_weighted.values == 0.))
cmap = plt.cm.PiYG
#cmap = matplotlib.colors.LinearSegmentedColormap.from_list('RedGreen', [(1, 0, 0), (0,
#1, 0)], N=100)
cmap.set_bad('white')
value = 0.5 * np.abs(df_markers_weighted).max().max()
ax3.imshow(masked, cmap=cmap, vmin=-value, vmax=+value, interpolation='None', aspect='auto')
ax3.set_xticks(range(X_markers_cluster_means_transpose.shape[1]))
ax3.set_yticks(range(df_markers_weighted.shape[0]))
xtickslabels = np.array(df_markers_cluster_centroids.index[ORDER2])
for i in range(0,len(xtickslabels),2):
xtickslabels[i] += " ─────────"
ax3.set_xticklabels(xtickslabels, rotation=90, fontsize=5*fontscale)
ax3.set_yticklabels(df_markers_weighted.index.values, rotation=0, fontsize=8*fontscale)
ax3.set_xlim([-0.5, df_markers_weighted.shape[1] - 0.5])
ax3.set_ylim([-0.5, df_markers_weighted.shape[0] - 0.5])
self.saveFigure(fig, self.saveDir, self.dataName + '_marker_expression', extension=extension, dpi=dpi, **kwargs)
return fig
[docs] @tryExcept
def internalMakeMarkerSubplots(self, df, X_projection, hugo_cd_dict, NoFrameOnFigures = False, HideClusterLabels = False, outlineClusters = True, analyzeBy = 'cluster', saveSubDir = 'marker_subplots', dpi = 300, extension = 'png', **kwargs):
'''Produce subplots on each marker and its expression on all clusters
Parameters:
df: pandas.DataFrame
Data with marker genes expression
X_projection: 2d numpy.array
2D coordinates for each cell
hugo_cd_dict: dictionary
With aliases for hugo names of genes
NoFrameOnFigures: boolean, Default False
Whether to include frame on the figure
HideClusterLabels: boolean, Default False
Whether to print cluster labels on the figure
outlineClusters: boolean, Default True
Whether to outline the clusters with circles
analyzeBy: str, Default 'cluster'
What level of lablels to include.
Other possible option is 'label'
Returns:
None
Usage:
Function used internally
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.internalMakeMarkerSubplots(df_markers_expr, projection, hugo_cd_dict)
'''
def MarkerSubplot(counter, marker, df, analyzeBy, X_projection, cellClusterIndexLabel, hugo_cd_dict, dataName, saveDir, NoFrameOnFigures, HideClusterLabels, XLIM, YLIM, directory, circles):
fig,ax = plt.subplots(figsize=(8,8))
ax.cla()
suffix = '(' + str(hugo_cd_dict[marker]).replace('\"', '').replace('\'', '').replace('(', '').replace(')', '').replace(' ','') + ')'
ax.plot(np.nan,np.nan,'*',markersize=15,c=cm.seismic(1.0),label=marker + '\n' + suffix.replace(',','\n'))
circleIndices = np.where(df.loc[marker].values == 0)[0] # cells that don't have this marker
starIndices = np.where(df.loc[marker].values > 0)[0] # cells that have this marker
starIndices = starIndices[np.argsort(df.loc[marker].values[starIndices])]
args1 = [X_projection[0,circleIndices],
X_projection[1,circleIndices]]
kwargs1 = {'marker':'o',
'c':'b',
'alpha':0.1,
's':6 * 3,
'linewidth':0,}
args2 = [X_projection[0,starIndices],
X_projection[1,starIndices]]
kwargs2 = {'marker':'*',
'c':cm.seismic(df.loc[marker].values[starIndices] / np.max(df.loc[marker].values[starIndices])),
's':30 * 4,
'linewidth':0.0,}
ax.scatter(*args1,**kwargs1)
ax.scatter(*args2,**kwargs2)
for label in set(cellClusterIndexLabel):
# cells with this label
X_projection2_cluster = X_projection[:,cellClusterIndexLabel == label]
x_mean = np.mean(X_projection2_cluster[0,:])
y_mean = np.mean(X_projection2_cluster[1,:])
_text_label = label if not HideClusterLabels else ''
ax.text(x_mean,y_mean,
_text_label.
replace('-','-\n').replace(' ','\n').
replace('T\n','T ').replace('B\n','B ').
replace('\n#',' #').replace('/','/\n').
replace('NK\n','NK ').replace('Stem\n','Stem '),
fontsize=10,
ha='center',va='center',#alpha=0.75,
).set_path_effects([path_effects.Stroke(linewidth=3, foreground='white'),path_effects.Normal()])
if circles:
radius = np.sqrt(X_projection2_cluster.shape[1]) * 300.0
ax.scatter(x_mean,y_mean,s=radius * 1,facecolors='none',edgecolors='k')
ax.set_xlim(XLIM)
ax.set_ylim(YLIM)
ax.legend(loc='upper right', frameon=False, fontsize=14) #loc='best',numpoints=1,fontsize=12
ax.set_xticks([])
ax.set_yticks([])
if NoFrameOnFigures:
#fig.patch.set_visible(False)
ax.axis('off')
fig.tight_layout()
if self.saveDir is not None:
self.saveFigure(fig, directory, '%s_%s_%s_%s' % (self.dataName,marker,suffix.replace(',','_').replace('/','_'),analyzeBy), extension=extension, dpi=dpi, **kwargs)
if self.verbose >= 2:
print(marker, end=" ", flush=True)
return
maxs = np.max(X_projection,axis=1)
mins = np.min(X_projection,axis=1)
maxDiffs = maxs - mins
deltas = maxDiffs * 0.05
XLIM = [mins[0] - deltas[0],maxs[0] + deltas[0]]
YLIM = [mins[1] - deltas[1],maxs[1] + deltas[1]]
if len(df.index) > 1:
if self.verbose >= 2:
print('\nSaving marker expression plots:\n')
else:
if self.verbose >= 2:
print('Saving expression plot of:', end=' ', flush=True)
if analyzeBy == 'celltype':
try:
index = df.columns.get_level_values('label').str.split(' #', expand=True).get_level_values(0).values
except:
index = df.columns.get_level_values('celltype').values
else:
index = df.columns.get_level_values(analyzeBy).values
for counter,marker in enumerate(df.index.values):
MarkerSubplot(counter, marker, pd.DataFrame(data=np.reshape(np.array(df.loc[marker]), (1,len(df.loc[marker]))), columns=df.columns, index=[marker]), analyzeBy, X_projection, index, hugo_cd_dict, self.dataName, self.saveDir, NoFrameOnFigures, HideClusterLabels, XLIM, YLIM, os.path.join(self.saveDir, saveSubDir, ''), outlineClusters)
if self.verbose >= 1:
print()
return
[docs] @tryExcept
def makeAnnotationResultsMatrixPlot(self, dpi = 300, extension = 'png', **kwargs):
'''Produce voting results voting matrix plot
Parameters:
dpi: int, Default 600
Resolution of the figure image
extension: str, Default 'png'
Format of the figure file
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makeAnnotationResultsMatrixPlot()
'''
df_votingResults = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='z-scores')
cellTypes = sorted([x for x in df_votingResults.columns.values.tolist() if x not in ['cluster', 'Predicted cell type', '# cells in cluster', 'Winning score', 'Supporting markers', 'Contradicting markers', 'All markers']])
#df_votingResults['order'] =
#scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(df_votingResults[cellTypes].values,
#method='ward', metric='euclidean', optimal_ordering=True),
# no_plot=True,get_leaves=True)['leaves']
df_votingResults['order'] = np.argsort(np.argsort(df_votingResults['Predicted cell type']))
df_votingResults = df_votingResults.sort_values(by='order', axis=0, ascending=False)
numberOfCells = np.sum(df_votingResults['# cells in cluster'])
num_of_cell_types = len(cellTypes)
num_of_clusters = np.unique(df_votingResults['cluster']).shape[0]
indicis_of_clusters = df_votingResults['cluster']
assigned_names_of_clusters = df_votingResults['Predicted cell type']
label_max = 0.5 * max([len(assigned_names_of_clusters[i]) for i in range(len(assigned_names_of_clusters))])
_figsize = np.float_((num_of_cell_types,num_of_clusters)) / np.max(num_of_clusters) * 15.0
_figsize[0] += 1.0 + label_max
_figsize[1] += 2.0
gs = matplotlib.gridspec.GridSpec(1, 2, width_ratios=[3,1])
fig = plt.figure(figsize=_figsize)
ax = fig.add_axes([0.15, 0.125, 0.65, 0.85])
axx = fig.add_axes([0.76, 0.125, 0.19, 0.85])
zscores = df_votingResults[cellTypes].values
ax.imshow(zscores, aspect=1, cmap='Greens', vmin=0, vmax=0.5, interpolation='None')
for i in range(num_of_clusters):
for j in range(num_of_cell_types):
if np.round(zscores[i,j],1) > 0:
if zscores[i,j] == np.max(zscores[i,:]):
ax.text(j,i,np.round(zscores[i,j],1), color='w', fontsize=125 * 4 / max([num_of_cell_types, num_of_clusters]),ha='center',va='center').set_path_effects([path_effects.Stroke(linewidth=4, foreground='red'),path_effects.Normal()])
else:
ax.text(j,i,np.round(zscores[i,j],1), color='w', fontsize=125 * 3 / max([num_of_cell_types, num_of_clusters]),ha='center',va='center').set_path_effects([path_effects.Stroke(linewidth=2, foreground='black'),path_effects.Normal()])
ax.set_xticks(range(num_of_cell_types))
ax.set_yticks(range(num_of_clusters))
ytickslabels = copy.deepcopy(assigned_names_of_clusters)
for i in range(len(ytickslabels)):
ytickslabels[i] = str(assigned_names_of_clusters[i]) + ' (' + str(indicis_of_clusters[i]) + ')'
xtickslabels = np.array(copy.deepcopy(cellTypes))
#for i in range(len(xtickslabels)):
# if i % 3 == 1: xtickslabels[i] = '\n' + xtickslabels[i]
# if i % 3 == 2: xtickslabels[i] = '\n\n' + xtickslabels[i]
ax.set_xticklabels(xtickslabels, rotation=30, fontsize=20, ha='right')
ax.set_yticklabels(ytickslabels, fontsize=20, rotation=0)
ax.set_xlim([-0.5,num_of_cell_types - 0.5])
ax.set_ylim([-0.5,num_of_clusters - 0.5])
cells_in_clusters = df_votingResults['# cells in cluster'].values
with open(os.path.join(self.saveDir, 'ColormapForCellTypes.txt'), 'r') as temp_file:
colormap = {item.strip().split('\t')[0]:eval(item.strip().split('\t')[1]) for item in temp_file.readlines()}
celltypes = df_votingResults['Predicted cell type'].str.split(' #', expand=True)[0].values.copy()
axx.barh(y=range(len(cells_in_clusters)), width=cells_in_clusters, height=0.8, align='center', color=[colormap[i] for i in celltypes])
for i in range(len(cells_in_clusters)):
axx.text(np.max(cells_in_clusters), i, cells_in_clusters[i], ha='right',va='top', color='k', weight='bold', fontsize = 20)
axx.text(0.02 * numberOfCells, i, str(round(100 * cells_in_clusters[i] / numberOfCells, 1)) + '%', ha='left',va='bottom', color='b', fontsize = 20)
axx.set_xticklabels(cells_in_clusters, fontsize=20)
axx.set_yticklabels(cells_in_clusters, alpha=0)
axx.set_xticklabels(cells_in_clusters, alpha=0)
axx.set_xlabel('Number of\ncells in clusters', fontsize=20)
axx.set_ylim(-0.5, num_of_clusters - 0.5)
self.saveFigure(fig, self.saveDir, self.dataName + '_scores_matrix', extension=extension, dpi=dpi, **kwargs)
return fig
[docs] @tryExcept
def makeHistogramNullDistributionPlot(self, dpi = 600, extension = 'png', **kwargs):
'''Produce histogram plot of the voting null distributions
Parameters:
dpi: int, Default 600
Resolution of the figure image
extension: str, Default 'png'
Format of the figure file
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makeHistogramNullDistributionPlot()
'''
try:
df_noise_dict = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='Null distributions', index_col=0, header=[0,1], skiprows=[2])
except Exception as exception:
if self.verbose >= 2:
print(exception)
print('Error loading distributions from the results file')
return
if len(df_noise_dict) == 0:
if self.verbose >= 1:
print('Null distribution is empty in the results file')
return
df_votingResultsV = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='Voting scores', dtype={'cluster':str}).reset_index().set_index('cluster')
df_votingResultsZ = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='z-scores', dtype={'cluster':str}).reset_index().set_index('cluster')
predicted_cell_type_cluster = df_votingResultsZ['Predicted cell type'].values
predicted_cell_type = df_votingResultsZ['Predicted cell type'].str.split(' #', expand=True)[0].values
cellTypes = sorted([x for x in df_votingResultsV.columns.values.tolist() if x not in ['cluster', 'Predicted cell type', '# cells in cluster', 'Winning score', 'Supporting markers', 'All markers']])
df_votingResultsV = df_votingResultsV[cellTypes]
df_votingResultsZ = df_votingResultsZ[cellTypes]
cell_types = np.unique(df_noise_dict.columns.get_level_values(0).values)
num_of_cell_types = cell_types.shape[0]
clusters = np.unique(df_noise_dict.columns.get_level_values(1).values).astype(str)
num_of_clusters = clusters.shape[0]
maxy = np.round(np.nanmax(df_noise_dict.values), 2)
with open(os.path.join(self.saveDir, 'ColormapForCellTypes.txt'), 'r') as temp_file:
colormap = {item.strip().split('\t')[0]:eval(item.strip().split('\t')[1]) for item in temp_file.readlines()}
origWidth = matplotlib.rcParams['axes.linewidth']
matplotlib.rcParams['axes.linewidth'] = 0.1
gs = matplotlib.gridspec.GridSpec(num_of_clusters, num_of_cell_types, hspace=0.45, wspace=0.1, bottom=0.04, top=0.96, left=0.05, right=0.99)
fig = plt.figure(figsize=(num_of_cell_types, num_of_clusters * 0.4))
for i in range(num_of_cell_types):
try:
minx, maxx = np.round(df_noise_dict.index.values[np.where(df_noise_dict.xs(key=cell_types[i], level=0, axis=1).values.sum(axis=1) != 0.)[0][[0,-1]]], 3)
#maxx += 0.1
maxx = np.round(maxx, 3)
except Exception as exception:
print(exception)
minx = np.round(df_noise_dict.index.values[0], 3)
maxx = np.round(df_noise_dict.index.values[-1], 3)
if self.verbose >= 2:
print(cell_types[i], end=':\t')
for j in range(num_of_clusters):
if self.verbose >= 2:
print(j, end=',', flush=True)
fontsize = 3
ax = plt.subplot(gs[i + num_of_cell_types * j])
ax.bar(df_noise_dict.index.values, df_noise_dict[(cell_types[i], clusters[j])].values,
width=df_noise_dict.index.values[2] - df_noise_dict.index.values[1], align='center', color=colormap[cell_types[i]])
valueV = df_votingResultsV.loc[clusters[j], cell_types[i]]
valueZ = df_votingResultsZ.loc[clusters[j], cell_types[i]]
ax.axvline(x=valueV, ymin=0, ymax=1, color='k', lw=0.2)
color = 'k' if predicted_cell_type[j] != cell_types[i] else 'r'
xloc = 0.02 * minx + minx #valueV
ax.text(xloc, maxy - 0.02 * maxy, r'$V_{%s,%s}=$' % (i,j) + str(np.round(valueV,2)), fontsize=fontsize, va='top', ha='left', color=color, zorder=np.inf)
ax.text(xloc, maxy - 0.2 * maxy, r'$\Lambda_{%s,%s}=$' % (i,j) + str(np.round(valueZ,2)), fontsize=fontsize, va='top', ha='left', color=color, zorder=np.inf)
if j == 0:
ax.set_title(cell_types[i], fontdict={'color': 'b', 'size':'6'})
if i == 0:
ax.text(0. * maxx, maxy + 0.05 * maxy, predicted_cell_type_cluster[j] + ' (Cluster %s)' % clusters[j], rotation=0,
fontsize=fontsize, weight='bold', va='bottom', ha='left', color='k')
ax.set_ylabel('Probability', fontsize=fontsize)
ax.set_yticklabels([0., maxy], fontsize=fontsize)
else:
ax.set_yticklabels([], fontsize=fontsize)
if j == num_of_clusters - 1:
#ax.set_xlabel('Voting score', fontsize=fontsize)
ax.set_xticklabels([minx, maxx], fontsize=fontsize)
if maxx > 0.0 and minx < 0.0:
ax.text(0.0, -0.2 * maxy, '0.0', fontsize=fontsize, va='top', ha='center', color='k')
else:
ax.set_xticklabels([], fontsize=fontsize)
ax.set_xticks([minx, maxx])
ax.set_yticks([0., maxy])
ax.set_xlim(minx, maxx)
ax.set_ylim(0., maxy)
ax.tick_params(direction='in', length=1, width=0.1, colors='k')
if self.verbose >= 1:
print()
self.saveFigure(fig, self.saveDir, self.dataName + '_null_distributions', extension=extension, dpi=dpi, **kwargs)
matplotlib.rcParams['axes.linewidth'] = origWidth
return fig
[docs] @tryExcept
def makeProjectionPlot(self, Xprojection, cellClusterIndexLabel, suffix = '', colormap = cm.jet, legend = True, labels = True, colorbar = False, fontsize = 10, plotNaNs = True, rightShift = 0.3, dpi = 300, extension = 'png', **kwargs):
'''Produce projection plot (2D layout) with a specified coloring scheme
Parameters:
Xprojection: 2D coordinates for each cell
cellClusterIndexLabel: cluster index for each cell
suffix: str
Text label to append to the figure name
colormap: cell coloring sequence, can be a dictionary or cm.colormap,
Default matplotlib.colors.LinearSegmentedColormap.jet
legend: boolean, Default True
Whether to print legend
labels: boolean, Default True
Whether to print labels
colorbar: boolean, Default False
Whether to show colorbar
Use with non-numerical values will raise an error
fontsize: int, Default 10
Labels and legend font size
plotNaNs: boolean, Default True
Whether to plot NaN labels (in grey)
rightShift: float, Default 0.3
Fraction of space to leave on the right-hand side of the plot.
This parameter is useful for adjusting legend overlap with data points.
dpi: int, Default 600
Resolution of the figure image
extension: str, Default 'png'
Format of the figure file
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makeProjectionPlot(projection, cellClusterIndexLabel, suffix)
'''
def add_colorbar(fig, labels, cmap = matplotlib.colors.LinearSegmentedColormap.from_list('GR', [(0, 1, 0), (1, 0, 0)], N=100), fontsize = 10):
mapp = cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=np.min(labels), vmax=np.max(labels)), cmap=cmap)
mapp.set_array(labels)
sp = np.linspace(np.max(labels), np.min(labels), num=6, endpoint=True)
axisColor = fig.add_axes([0.9,0.5,0.01,0.4])
fig.colorbar(mapp, cax=axisColor, ticks=sp)
axisColor.tick_params(labelsize=fontsize)
axisColor.set_yticklabels(np.round(sp,2))
return None
fig = plt.figure(figsize=(8,8))
ax = fig.add_axes([0.05,0.05,0.9,0.9])
maxs, mins = np.max(Xprojection,axis=1), np.min(Xprojection,axis=1)
missing = np.where(cellClusterIndexLabel != cellClusterIndexLabel)[0]
if len(missing) > 0:
ax.plot(Xprojection[0, missing], Xprojection[1, missing], 'o', color='grey', mew=0.5, alpha=0.2, markeredgecolor='k', label='NaN')
nonMissing = np.where(cellClusterIndexLabel == cellClusterIndexLabel)[0]
cellClusterIndexLabel = np.array(cellClusterIndexLabel)[nonMissing]
Xprojection = Xprojection[:, nonMissing]
possible_cluster_labels = np.sort(np.unique(cellClusterIndexLabel))
if labels:
if self.verbose >= 3:
print(possible_cluster_labels)
texts = []
for ilabel, label in enumerate(possible_cluster_labels):
if type(colormap) is matplotlib.colors.LinearSegmentedColormap:
color = colormap(ilabel / len(possible_cluster_labels))
elif type(colormap) is str:
color = plt.get_cmap(colormap)(ilabel / len(possible_cluster_labels))
else:
color = colormap[label.split(' #')[0]]
XprojectionC = Xprojection[:,cellClusterIndexLabel == label]
ax.plot(XprojectionC[0,:], XprojectionC[1,:], 'o', color=color, mew=0.5, alpha=0.3, markeredgecolor='k', label=label)
if labels:
text = ax.text(np.median(XprojectionC[0,:]), np.median(XprojectionC[1,:]),
label, fontsize=fontsize, ha='center',va='center')
text.set_path_effects([path_effects.Stroke(linewidth=3, foreground='white'), path_effects.Normal()])
texts.append(text)
adjust_text(texts, # arrowprops=dict(arrowstyle='-', color='k', lw=0.3, alpha=0.75),
expand_text=(0.9, 0.9), expand_points=(0.91, 0.9),
force_text=(0.01, 0.01), force_points=(0.01, 0.01))
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlim([mins[0] - (maxs[0] - mins[0]) * 0.05, (1 + rightShift) * (maxs[0] + (maxs[0] - mins[0]) * 0.05)])
ax.set_ylim([mins[1] - (maxs[1] - mins[1]) * 0.05, maxs[1] + (maxs[1] - mins[1]) * 0.05])
if legend:
plt.legend(loc='lower right', frameon=False, fontsize=fontsize)
#fig.patch.set_visible(False)
ax.axis('off')
if colorbar:
add_colorbar(fig, possible_cluster_labels, cmap=colormap, fontsize=fontsize)
self.saveFigure(fig, self.saveDir, '%s_clusters_%s' % (self.dataName, suffix), extension=extension, dpi=dpi, **kwargs)
return fig
[docs] @tryExcept
def makeStackedBarplot(self, clusterName = None, legendStyle = False, includeLowQC = True, fontsize = 12, dpi = 300, extension = 'png', **kwargs):
'''Produce stacked barplot with cell fractions
Parameters:
clusterName: str, Deafult None
Label to include at the bar bottom.
If None the self.dataName value will be used
legendStyle: boolean, Default False
Use one out of two styles of this figure
includeLowQC: boolean, Default True
Wether to include low quality cells
dpi: int, Default 600
Resolution of the figure image
extension: str, Default 'png'
Format of the figure file
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makeStackedBarplot(clusterName)
'''
def get_stacked_data_and_colors(saveDir):
with open(os.path.join(saveDir, 'ColormapForCellTypes.txt'), 'r') as temp_file:
colors = temp_file.readlines()
colors = np.vstack([(color.strip('\n').split('\t')) for color in colors])
colors = pd.DataFrame(colors.T[1], index=colors.T[0]).apply(lambda x: tuple(np.float_(x[0][1:][:-1].split(','))), axis=1)
df = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'), sheet_name='z-scores')
index = df['Predicted cell type']
if not clusterName is None:
barName = self.dataName # + ': ' + clusterName
else:
barName = self.dataName
index = [index[i][:len(index[i]) if index[i].find('#') - 1 == -2 else index[i].find('#') - 1].strip('*').strip('#').strip(' ') for i in range(len(index))]
df_BM_temp = pd.DataFrame(data=df['# cells in cluster'].values, index=index, columns=[barName])
df_BM_temp = df_BM_temp.groupby(level=0, axis=0, sort=False).sum()
df_main = pd.DataFrame(data=np.zeros((len(colors),1)), index=colors.index, columns=[barName])
for i, item in enumerate(df_BM_temp.index):
df_main.loc[item,barName] += df_BM_temp.iloc[i][barName] if df_BM_temp.index[i] == item else 0
s = 'sums'
df_main[s] = np.array(np.sum(df_main, axis=1))
df_main.loc[self.nameForUnknown, s] = 0
if includeLowQC:
try:
cells_all = pd.read_hdf(self.fileHDFpath, key='df_projection_pre_QC', mode='r').columns.get_level_values('cell')
cells_high = pd.read_hdf(self.fileHDFpath, key='df_projection', mode='r').columns.get_level_values('cell')
cells_low_count = len(cells_all.difference(cells_high))
del cells_all, cells_high
df_main.loc[self.nameForLowQC, df_main.columns[0]] = cells_low_count
df_main.loc[self.nameForLowQC, df_main.columns[1]] = -1
colors[self.nameForLowQC] = (0.6, 0.6, 0.6, 1.)
except Exception as exception:
if self.verbose >= 1:
print(exception)
print('QC data not found')
df_main = df_main.apply(lambda x: 100. * x / np.sum(df_main, axis=0), axis=1).loc[np.sum(df_main, axis=1) > 0].sort_values(by=[s]).drop(columns=[s])
return df_main, colors, clusterName
if clusterName is None:
clusterName = self.dataName
df_Main, colors, clusterName = get_stacked_data_and_colors(self.saveDir)
if legendStyle:
fig,ax = plt.subplots(figsize=(4.5,8)) #4.15
else:
fig = plt.figure(figsize=(4.5,8))
ax = fig.add_axes([0.2, 0.05, 0.1, 0.9])
barWidth = 1.0
cellTypes = df_Main.index
bottom = np.zeros((len(df_Main.columns)))
centers = []
fractions = []
for i in range(len(cellTypes)):
bottom += df_Main.loc[cellTypes[i - 1]].values if i > 0 else 0
ax.bar(range(len(df_Main.columns)), list(df_Main.loc[cellTypes[i]]), bottom=list(bottom), color=colors.loc[cellTypes[i]], edgecolor='white', width=barWidth, label=cellTypes[i])
centers.append(bottom + 0.5 * df_Main.loc[cellTypes[i]].values[0])
fractions.append(df_Main.loc[cellTypes[i]].values[0])
plt.xticks(range(len(df_Main.columns)), list(df_Main.columns), fontsize=12)
plt.yticks([0,20,40,60,80,100], ['0','20%','40%','60%','80%','100%'], fontsize=12)
handles, labels = ax.get_legend_handles_labels()
ms = np.max([len(item) for item in labels]) - len('cell')
labels = [item.replace(' ','\n').replace('\nCD4', ' CD4').replace('CD4\n', 'CD4 ').replace('\ncell', ' cell').replace('B\n', 'B ').replace('T\n', 'T ') if len(item) >= ms else item for item in labels[::-1]]
if legendStyle:
ax.legend(handles[::-1], labels, loc='upper left', bbox_to_anchor=(1,1), ncol=1, frameon=False, fontsize=14, labelspacing=1, title = ''.join([' ' for _ in range(60)]))
else:
fractions = np.round(np.array(fractions)[::-1], 1)
centers = np.round(np.array(centers).T[0][::-1], 0)
centers_orig = centers.copy()
step = 5.
for i in range(len(centers) - 2,0,-1):
if (centers[i] - centers[i + 1]) < step:
centers[i] = centers[i + 1] + step
for i in range(len(centers)):
ax.text(1.3, centers[i], '%s%% ' % (fractions[i]) + labels[i], fontsize=fontsize, va='center', ha='left')
ax.plot([0.65, 1.2], [centers_orig[i], centers[i]], c='k', lw=0.75, clip_on=False)
plt.xlim((-0.5, len(df_Main.columns) - 0.5))
plt.ylim((0, 100))
for spine in plt.gca().spines.values():
spine.set_visible(False)
if legendStyle:
fig.tight_layout()
saveName = "%s_subclustering_stacked_barplot_%s" % (self.dataName, ('All cell clusters' if clusterName == None else clusterName).replace(' ', '_').replace('*', ''))
self.saveFigure(fig, self.saveDir, saveName, extension=extension, dpi=dpi, **kwargs)
if self.verbose >= 2:
print('Saved stacked bar plot: %s' % ('All cell clusters' if clusterName == None else clusterName))
return fig
[docs] @tryExcept
def makeQualityControlHistogramPlot(self, subset, cutoff, plotPathAndName = None, N_bins = 100, mito = False, displayMeasures = True, precision = 4, quantilePlotCutoff = 0.95, dpi = 300, extension = 'png', fontScale = 1.5, includeTitle = False, **kwargs):
'''Function to calculate QC quality cutoff and visualize it on a histogram
Parameters:
subset: pandas.Series
Data to analyze
cutoff: float
Cutoff to display
plotPathAndName: str, Default None
Text to include in the figure title and file name
N_bins: int, Default 100
Number of bins of the histogram
mito: boolean, Default False
Whether the analysis of mitochondrial genes fraction
displayMeasures: boolean, Default True
Print vertical dashed lines along with mean, median, and standard deviation
precision: int, Default 4
Number of digits after decimal
quantilePlotCutoff: float, Default 0.99
Distributions are cut to display the range from 0 to quantilePlotCutoff
dpi: int, Default 600
Resolution of the figure image
extension: str, Default 'png'
Format of the figure file
fontScale: float, Default 1.5
Scale most of the figure fonts
includeTitle: boolean, Default False
Whether to include title on the figure
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
cutoff = DCS.makeQualityControlHistogramPlot(subset, cutoff)
'''
if plotPathAndName is None:
plotPathAndName = 'QC_Plot'
range_min = 0. #np.min(subset)
if mito:
range_max = max(1.1 * cutoff, np.quantile(subset, quantilePlotCutoff) + 0.05)
else:
range_max = np.quantile(subset, quantilePlotCutoff)
hist_of_subset = scipy.stats.rv_histogram(np.histogram(subset, bins=N_bins, range=(range_min, range_max)))
hist_data = hist_of_subset._hpdf / N_bins
hist_bins = hist_of_subset._hbins
fig, ax = plt.subplots(figsize=(8,8))
bar_bin_width = range_max / N_bins
ax.bar(hist_bins, hist_data[:-1], width=0.9 * bar_bin_width, color='b', align='center')
try:
title = os.path.basename(plotPathAndName)
except Exception as exception:
if self.verbose >= 1:
print(exception)
title = plotPathAndName
if includeTitle:
ax.set_title(title, fontdict={'color': 'b'})
ax.set_xlabel('Fraction' if mito else 'Count', fontsize=10 * fontScale)
ax.set_ylabel('Density', fontsize=10 * fontScale)
ax.set_ylim(0.,ax.get_ylim()[1])
ax.set_xlim(range_min - 0.5 * bar_bin_width, range_max + 0.5 * bar_bin_width)
ax.tick_params(labelsize=8 * fontScale)
xs = np.linspace(hist_bins[0], hist_bins[-1], 1000)
spline_data = np.vstack((xs, UnivariateSpline(hist_bins, hist_data[:-1], k=5, s=0)(xs))).T
sg = scipy.signal.savgol_filter(spline_data.T[1], 101, 3)
ax.plot(spline_data.T[0], sg, 'r', lw=3, alpha=0.95)
try:
x, y = cutoff, sg[np.where(spline_data.T[0] >= cutoff)[0][0]]
except Exception as exception:
if self.verbose >= 1:
print(exception)
x, y = cutoff, 0.
ax.plot([x,x], [0,y], 'k', lw=2)
ax.plot(x, y, 'ko', ms=10, alpha=0.8)
ax.plot(x, y, 'ro', ms=7)
ax.text(x, -0.04 * spline_data.T[1].max(), str(np.round(cutoff, precision)), fontsize=8 * fontScale, va='top', ha='center', color='r')
ax.ticklabel_format(axis='y', style='sci', scilimits=(0,0), useMathText=False)
ax.axvspan(cutoff, 1.5 * range_max if mito else -1.5 * range_min, alpha=0.1, color='red', hatch='\\', linewidth=0.1)
fig.tight_layout()
if displayMeasures:
texts = []
dist_std, dist_median, dist_mean = np.round(np.std(subset),precision), np.round(np.median(subset),precision), np.round(np.mean(subset),precision)
if self.verbose >= 2:
print(plotPathAndName, '\tstd:', dist_std, '\tmedian:', dist_median, '\tmean:', dist_mean)
xspan = ax.get_xlim()[1] - ax.get_xlim()[0]
yspan = ax.get_ylim()[1] - ax.get_ylim()[0]
ax.axvline(x=dist_mean, color='k', lw=1.0, ls='--')
text = ax.text(dist_mean + 0.02 * xspan, 0.98 * yspan, r'$\mu=%s$' % (dist_mean), fontsize=fontScale * 10, va='top', ha='left', color='k')
text.set_path_effects([path_effects.Stroke(linewidth=1, foreground='white'),path_effects.Normal()])
texts.append(text)
ax.axvline(x=dist_median, color='k', lw=1.0, ls='--')
text = ax.text(dist_median + 0.02 * xspan, 0.94 * yspan, r'$M=%s$' % (dist_median), fontsize=fontScale * 10, va='top', ha='left', color='k')
text.set_path_effects([path_effects.Stroke(linewidth=1, foreground='white'),path_effects.Normal()])
texts.append(text)
ax.axvline(x=dist_median - dist_std, color='k', lw=1.0, ls='--')
text = ax.text(dist_median - dist_std + 0.02 * xspan, 0.90 * yspan, r'$M-\sigma=%s$' % (np.round(dist_median - dist_std,precision)), fontsize=fontScale * 10, va='top', ha='left', color='k')
text.set_path_effects([path_effects.Stroke(linewidth=1, foreground='white'),path_effects.Normal()])
texts.append(text)
ax.axvline(x=dist_median + dist_std, color='k', lw=1.0, ls='--')
text = ax.text(dist_median + dist_std + 0.02 * xspan, 0.90 * yspan, r'$M+\sigma=%s$' % (np.round(dist_median + dist_std,precision)), fontsize=fontScale * 10, va='top', ha='left', color='k')
text.set_path_effects([path_effects.Stroke(linewidth=1, foreground='white'),path_effects.Normal()])
texts.append(text)
text = ax.text(dist_median + 0.02 * xspan, 0.76 * yspan, r'$\sigma=%s$' % (np.round(dist_std,precision)), fontsize=fontScale * 10, va='bottom', ha='left', color='k')
text.set_path_effects([path_effects.Stroke(linewidth=1, foreground='white'),path_effects.Normal()])
texts.append(text)
ax.annotate('', (dist_median + dist_std, 0.75 * yspan), (dist_median, 0.75 * yspan), arrowprops={'arrowstyle':'<|-|>'})
if not mito:
text = ax.text(0.98, 0.65,
'%s%%\nof distribution \nis shown' % (100. * quantilePlotCutoff),
va='top', ha='right', fontsize=fontScale * 10, transform=ax.transAxes)
text.set_path_effects([path_effects.Stroke(linewidth=0.5, foreground='white'),path_effects.Normal()])
adjust_text(texts)
self.saveFigure(fig, os.path.dirname(plotPathAndName), label=os.path.basename(plotPathAndName) + '_histogram', extension=extension, dpi=dpi, **kwargs)
return fig
[docs] @tryExcept
def makePlotOfNewMarkers(self, df_marker_cell_type, df_new_marker_cell_type, dpi = 300, extension = 'png', **kwargs):
'''Produce plot of the new markers extracted from the annotated clusters
Parameters:
df_marker_cell_type: pandas.DataFrame
Known markers per cell types
df_new_marker_cell_type: pandas.DataFrame
New markers per cell types
dpi: int, Default 600
Resolution of the figure image
extension: str, Default 'png'
Format of the figure file
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makePlotOfNewMarkers(df_marker_cell_type, df_new_marker_cell_type)
'''
ORDERx = scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(df_new_marker_cell_type.values.T, 'ward'), no_plot=True, get_leaves=True)['leaves']
ORDERy = scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(df_new_marker_cell_type.values, 'ward'), no_plot=True, get_leaves=True)['leaves']
genes = df_new_marker_cell_type.columns.values[ORDERx]
celltypes = df_new_marker_cell_type.index.values[ORDERy]
df_marker_cell_type = df_marker_cell_type[[celltype for celltype in celltypes if celltype in df_marker_cell_type.columns]]
fig, ax = plt.subplots(figsize=(13,3))
ax.imshow(df_new_marker_cell_type.values[ORDERy,:][:,ORDERx], cmap='Blues', interpolation='None', aspect='auto')
ax.set_xticks([])
ax.set_yticks(range(len(celltypes)))
ax.set_yticklabels(celltypes, rotation=0, fontsize=8)
ax.set_xlim([-0.5,df_new_marker_cell_type.shape[1] - 0.5])
ax.set_ylim([-0.5,df_new_marker_cell_type.shape[0] - 0.5])
for i, celltype in enumerate(celltypes):
if celltype in df_marker_cell_type.columns:
known_markers = df_marker_cell_type[celltype][df_marker_cell_type[celltype] > 0.].index.values
xy = np.array([np.array([np.where(genes == marker)[0][0], i]) for marker in known_markers if marker in genes])
if self.verbose >= 3:
print('Overlapping positive markers of %s: %s (%s)' % (celltype, len(xy), len(known_markers)))
if len(xy) > 0:
ax.plot(xy.T[0], xy.T[1], 'go', markeredgecolor='r', ms=1.0, markeredgewidth=0.2)
known_markers = df_marker_cell_type[celltype][df_marker_cell_type[celltype] < 0.].index.values
xy = np.array([np.array([np.where(genes == marker)[0][0], i]) for marker in known_markers if marker in genes])
if self.verbose >= 3:
print('Overlapping negative markers of %s: %s (%s)' % (celltype, len(xy), len(known_markers)))
if len(xy) > 0:
ax.plot(xy.T[0], xy.T[1], 'ro', markeredgecolor='r', ms=1.0, markeredgewidth=0.2)
#ax.set_title('Additional markers along with the overlapping part of the input (red)')
self.saveFigure(fig, self.saveDir, self.dataName + '_new_markers', extension=extension, dpi=dpi, **kwargs)
return fig
[docs] @tryExcept
def makeTtestPlot(self, df, dfp, label = None, reorder = True, p_value_cutoff = 0.05, dpi = 300, extension = 'png', **kwargs):
'''Produce heatmap plot of t-test p-Values calculated gene-pair-wise
from the annotated clusters.
Parameters:
df: pandas.DataFrame
t-test statistic values
dfp: pandas.DataFrame
t-test p-Values calculated gene-pair-wise
label: str, Default None
Lebel to include in the plot
reorder: boolean, Default True
Reorder values to group similar
p_value_cutoff: float, Default 0.05
p-Value cutoff
dpi: int, Default 600
Resolution of the figure image
extension: str, Default 'png'
Format of the figure file
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makeTtestPlot(df)
'''
if reorder:
def metricCommonEuclidean(u,v):
where_common = (~np.isnan(u)) * (~np.isnan(v))
return np.sqrt(((u[where_common] - v[where_common]) ** 2).sum())
order = scipy.cluster.hierarchy.dendrogram(scipy.cluster.hierarchy.linkage(df.values, method='average', metric=metricCommonEuclidean), no_plot=True, get_leaves=True)['leaves']
df = df[df.columns.values[order]]
dfp = dfp[dfp.columns.values[order]]
df = df.loc[df.index.values[order]]
dfp = dfp.loc[dfp.index.values[order]]
df = df[df.columns[::-1]]
dfp = dfp[dfp.columns[::-1]]
fig = plt.figure(figsize=(5,5))
ax = fig.add_axes([0.35,0.02,0.6,0.6])
cmap = plt.cm.PuOr_r #BrBG #PiYG #seismic
cmap.set_bad('grey')
ax.imshow(df.values.astype(float), cmap=cmap, interpolation='None', aspect='auto')
wh = np.where(dfp.values.T <= p_value_cutoff)
ax.plot(wh[0], wh[1], '*k')
ax.set_xticks(range(df.shape[1]))
ax.set_yticks(range(df.shape[0]))
ax.set_xticklabels(df.columns.values, rotation=90, fontsize=8)
ax.set_yticklabels(df.index.values, rotation=0, fontsize=8)
ax.set_xlim([-0.5, df.shape[1] - 0.5])
ax.set_ylim([-0.5, df.shape[0] - 0.5])
ax.xaxis.tick_top()
if not label is None:
ax.text(-0.5, 1.5, label, transform=ax.transAxes, fontsize=10, color='k', ha='left', va='top').set_path_effects([path_effects.Stroke(linewidth=0.5, foreground='blue'),path_effects.Normal()])
ax.set_title('Two-tailed p-Value (t-test)')
data = df.values.flatten().astype(float)
data = data[np.where(~np.isnan(data))]
dataMin = np.min(data)
dataMax = np.max(data)
axisColor = fig.add_axes([0.22,0.75,0.08,0.02])
norm = matplotlib.colors.Normalize(vmin=dataMin, vmax=dataMax)
mapp = cm.ScalarMappable(norm=norm, cmap=cmap)
mapp.set_array(data)
fig.colorbar(mapp, cax=axisColor, ticks=[dataMax,dataMin], orientation='horizontal')
axisColor.tick_params(labelsize=4)
axisColor.set_xlabel('Statistic\n*p-Value < %s' % (p_value_cutoff), fontsize=5)
axisColor.set_yticklabels([np.round(dataMax,2), np.round(dataMin,2)])
self.saveFigure(fig, self.saveDir, self.dataName + '_ttest_%s' % (label.replace('\n', '_')), extension=extension, dpi=dpi, **kwargs)
return fig
[docs] @tryExcept
def makeCellMarkersPiePlot(self, type1, type2, df_marker_cell_type = 'all', nameToAppend = None, listUnexpressedMarkers = True, orthogonalSectorsShift = 0.1, rotationAngle = 0, dpi = 300, extension = 'png', **kwargs):
'''Make summary of markers comparison between two cell types.
Parameters:
type1: str
Name of the first cell type to compare
type2: str
Name of the second cell type to compare
df_marker_cell_type: pandas.DataFrame or str, Default 'all'
Celltypes/Markers matrix. If 'expressed', then only expressed markers will be used.
If 'all' then all markers of the input marker list will be used.
If an instance of a pandas.DataFrame is passed, then its all markers will be used.
nameToAppend: str, Default None
String to append to the figure file name.
listUnexpressedMarkers: boolean, Default True
List (highlight) markers that are not expressed. This option is ignored
unless df_marker_cell_type=='all'
orthogonalSectorsShift: float, Default 0.1
Sectors marked as '+/-' and '-/+' are shifted off-center.
Set this parameter to zero to have round continuous pie chart.
rotationAngle: int or float, Default 0
Angle in degrees that will rotate the whole pie chart counterclockwise.
dpi: int, Default 600
Resolution of the figure image
extension: str, Default 'png'
Format of the figure file
Returns:
Marker lists split into categories.
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makeCellMarkersPiePlot('T cells', 'B cells')
'''
try:
df_marker_expression = pd.read_excel(os.path.join(self.saveDir, self.dataName + '_annotation.xlsx'),
sheet_name='Marker cell type weight matrix', index_col=0, header=0).T
except Exception as exception:
if self.verbose >= 1:
print(exception)
print('Marker expression data unavailable')
df_marker_expression = None
listUnexpressedMarkers = False
if type(df_marker_cell_type) is str:
if df_marker_cell_type == 'expressed':
listUnexpressedMarkers = False
if not df_marker_expression is None:
df_marker_cell_type = df_marker_expression
else:
if self.verbose >= 1:
print("Try using option 'all'")
return
additional_name = 'expressed'
elif df_marker_cell_type == 'all':
df_marker_cell_type = self.readMarkerFile()
additional_name = 'all'
if nameToAppend is None:
nameToAppend = '.'.join(os.path.basename(self.geneListFileName).split('.')[:-1])
else:
additional_name = 'custom'
if nameToAppend is None:
nameToAppend = ''
df_marker_cell_type = df_marker_cell_type.fillna(0.)
def getSet(df, celltype):
try:
pos = set(df.index[(df.loc[:, celltype] > 0.)].values)
neg = set(df.index[(df.loc[:, celltype] < 0.)].values)
except Exception as exception:
if self.verbose >= 1:
print(exception)
print('Cell type %s not found' % (celltype))
print('Available celltypes are: %s' % (df.columns.values.tolist()))
return
return pos, neg
def getEightAll(t1p, t2p, t1n, t2n):
p1 = t1p.intersection(t2n)
p3 = t1n.intersection(t2n)
p5 = t1n.intersection(t2p)
p7 = t1p.intersection(t2p)
p0 = t1p - p1 - p7
p2 = t2n - p1 - p3
p4 = t1n - p3 - p5
p6 = t2p - p5 - p7
all = t1p.union(t2p).union(t1n).union(t2n)
return [p0, p1, p2, p3, p4, p5, p6, p7], all
try:
t1p, t1n = getSet(df_marker_cell_type, type1)
t2p, t2n = getSet(df_marker_cell_type, type2)
sets, all = getEightAll(t1p, t2p, t1n, t2n)
except:
return
if listUnexpressedMarkers:
try:
t1pe, t1ne = getSet(df_marker_expression, type1)
t2pe, t2ne = getSet(df_marker_expression, type2)
setsE, allE = getEightAll(t1pe, t2pe, t1ne, t2ne)
except Exception as exception:
if self.verbose >= 1:
print(exception)
listUnexpressedMarkers = False
labels = '+/*', '+/-', '*/-', '-/-', '-/*', '-/+', '*/+', '+/+'
colors = ['limegreen', 'thistle', 'lightcoral', 'red', 'lightcoral', 'thistle', 'limegreen', 'green']
titles = ['Positive in %s:' % (type1),
'Positive in %s, Negative in %s:' % (type1, type2),
'Negative in %s:' % (type2),
'Negative in both:',
'Negative in %s:' % (type1),
'Negative in %s, Positive in %s:' % (type1, type2),
'Positive in %s:' % (type2),
'Positive in both:']
sizes = [len(item) for item in sets]
labels = [(label if size > 0 else '') for label, size in zip(labels, sizes)]
explode = (0.0, orthogonalSectorsShift, 0.0, 0.0, 0.0, orthogonalSectorsShift, 0.0, 0.0)
def findAll(a, b):
start = 0
while True:
start = a.find(b, start)
if start == -1:
return
yield start
start += len(b)
return
if listUnexpressedMarkers:
str_sets = [str(sorted([i for i in list(setsE[j])])).replace("'", "").replace(']','').replace('[','').replace(' ','') for j in range(8)]
else:
str_sets = [str(sorted([i for i in list(sets[j])])).replace("'", "").replace(']','').replace('[','').replace(' ','') for j in range(8)]
for i, item in enumerate(str_sets):
all_temp = item.split(',')
new_item = all_temp[0]
temp_item = all_temp[0]
limit = 75
for gene in all_temp[1:]:
if len(temp_item) > limit:
temp_item = '\n' + gene
new_item += '\n' + gene
else:
new_item += ', ' + gene
temp_item += ', ' + gene
if listUnexpressedMarkers:
str_sets[i] = titles[i] + ' (%s):' % (len(all_temp)) + '\n' + new_item
else:
str_sets[i] = titles[i] + '\n' + new_item
if listUnexpressedMarkers:
str_setsU = [str(sorted([i for i in list(sets[j].difference(setsE[j]))])).replace("'", "").replace(']','').replace('[','').replace(' ','') for j in range(8)]
for i, item in enumerate(str_setsU):
all_temp = item.split(',')
new_item = all_temp[0]
temp_item = all_temp[0]
limit = 75
for gene in all_temp[1:]:
if len(temp_item) > limit:
temp_item = '\n' + gene
new_item += '\n' + gene
else:
new_item += ', ' + gene
temp_item += ', ' + gene
if len(new_item) > 1:
str_sets[i] += '\n' + 'Not expressed (%s):' % (len(all_temp)) + '\n' + new_item
fig = plt.figure(figsize=(8,4))
ax = fig.add_axes([0.25,0.25,0.5,0.5])
currentWedge = 0
def autopctFunc(value):
nonlocal currentWedge
n = int(np.round((float(value) / 100. * float(np.sum(sizes))), 0))
if listUnexpressedMarkers:
u = len(sets[currentWedge].difference(setsE[currentWedge]))
else:
u = 0
if n > 0:
if u > 0:
format = "{:d}\n({:d})".format(n,u)
else:
format = "{:d}".format(n)
else:
format = ""
currentWedge += 1
return format
wedges, texts, autotexts = ax.pie(sizes, explode=explode, labels=labels, colors=colors, labeldistance=1.05,
textprops={'size':6, 'weight':'semibold', 'color':'b'},
autopct=autopctFunc, wedgeprops={'linewidth': 0.5, 'edgecolor':'aqua', 'width': 0.7},
shadow=False, startangle=-180 + rotationAngle, frame=False, rotatelabels=False)
plt.setp(autotexts, size=6, weight="semibold", color='k')
bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.5)
kw = dict(arrowprops=dict(arrowstyle="-"), bbox=bbox_props, zorder=0, va="center")
for i, p in enumerate(wedges):
if len(sets[i]) == 0:
continue
ang = (p.theta2 - p.theta1) / 2. + p.theta1
y = np.sin(np.deg2rad(ang))
x = np.cos(np.deg2rad(ang))
horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))]
connectionstyle = "angle,angleA=0,angleB={}".format(ang)
kw["arrowprops"].update({"connectionstyle": connectionstyle})
ax.annotate(str_sets[i], xy=(x, y), xytext=(1.6 * np.sign(x), 1.8 * y), fontsize=3.5, ha=horizontalalignment, **kw)
fig.suptitle('%s & %s' % (type1, type2), fontsize=11, color='b')
ax.axis('equal')
ax.text(0., 0., '%s\n(%s)' % (len(all), len(all.difference(allE))) if listUnexpressedMarkers else '%s' % (len(all)),
color='k', fontsize=8, ha='center', va='center').set_path_effects([path_effects.Stroke(linewidth=1, foreground='blue'),path_effects.Normal()])
saveName = 'Markers_of_%s_vs_%s_(%s)_%s' % (type1.replace('/',''), type2.replace('/',''), nameToAppend, additional_name)
self.saveFigure(fig, self.saveDir, self.dataName + saveName, extension=extension, dpi=dpi, **kwargs)
return dict(zip(labels, list(sets))), fig
@tryExcept
def makeHopfieldPCplot(self, colormap = cm.hot_r, plotTrLines = False, clusterid = 1, trID = 0, axisOff = False, fontscale = 1., trPath = None, dpi = 300, extension = 'png', **kwargs):
'''Make radar plot of the attractors in their principal components coordinates
Parameters:
colormap: matplotlib.colormap or str, Default cm.hot_r
Colormap or its string name
plotTrLines: boolean, Default False
Whether to plot trajectories
clusterid: int, Default 1
Identifier of the cluster to plot trajectories of
trID: int, Default 0
Identifier of the trajectories to plot
axisOff: boolean, Default False
Whether to hide the axes lines
trPath: str, Default None
Path to trajectories files
dpi: int, Default 300
Resolution of the figure
extension: str, Default 'png'
Format extension of the figure
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makeHopfieldPCplot()
'''
if axisOff:
ax.axis('off')
if trPath is None:
trPath = os.path.join(self.saveDir, 'HopfieldTrajectories')
if not os.path.exists(trPath):
if self.verbose >= 1:
print('Data not found', flush=True)
return
fig = plt.figure(figsize=(8,8))
ax = plt.subplot(111, polar=True, theta_direction=-1, theta_offset=0.5*np.pi)
attrs, attrs_names = read(os.path.join(trPath, 'attrs'))
N = attrs.shape[1]
df = pd.DataFrame(data=attrs[:N], index=attrs_names,
columns=['PC%s\n%s%%'%(i+1, np.int(100.*attrs[N][i])) for i in range(N)])
wherePC = attrs[N] > 0.001
df = df[df.columns[wherePC]]
N = df.shape[1]
vmaxAt = df.max(axis=0).max()
vminAt = df.min(axis=0).min()
ax.set_ylim(vminAt, vmaxAt)
angles = [n / float(N) * 2 * np.pi for n in range(N)] + [0.]
ax.set_xticklabels([])
ax.set_xticks(angles)
for i, celltype in enumerate(df.index):
values=df.loc[celltype].values.flatten().tolist()
values.append(values[0])
color = cm.jet(i/len(attrs_names))
ax.plot(angles, values, color=color, linewidth=1.75, linestyle='solid', label=celltype)
#ax.fill(angles, values, alpha=0.2, color=color, zorder=1)
ax.fill_between(angles, 0, values, alpha=0.2, facecolor=color)
temp_texts = ax.text(angles[np.argmax(values)], values[np.argmax(values)], celltype, color=color, fontsize=12.*fontscale, ha='center', va='center')
temp_texts.set_path_effects([path_effects.Stroke(linewidth=1., foreground='k'), path_effects.Normal()])
if plotTrLines:
trajectories = read(os.path.join(trPath, 'trajectories%s')%(trID))
initial, final, typesNames, clusterNames = read(os.path.join(trPath, 'additional'))
thisTr = trajectories[:, clusterid, wherePC].T
vmax = max(thisTr.max(axis=0).max(axis=0), vmaxAt)
vmin = min(thisTr.min(axis=0).min(axis=0), vminAt)
ax.set_ylim(vmin, vmax)
inId = initial[clusterid]
outId = final[clusterid]
if self.verbose >= 3:
print('ClusterID: %s, Initial state: %s (%s), Final state: %s (%s)'%(clusterNames[clusterid], typesNames[inId], inId, typesNames[outId], outId))
suffix = clusterNames[clusterid]
fig.suptitle('Cluster %s: %s -> %s' % (clusterNames[clusterid], typesNames[inId] if inId!=-1 else 'Unknown', typesNames[outId] if outId!=-1 else 'Unknown'), fontsize=11, color='b')
values = thisTr.T[0].tolist() + [thisTr.T[0][0]]
timeLimit = thisTr.shape[1]
for t in range(timeLimit):
values = thisTr.T[t].tolist() + [thisTr.T[t][0]]
if t==0:
ax.plot(angles, values, 'o-', ms=4.0, color='b', alpha=1.0, clip_on=False)
ax.plot(angles, values, color='b', linewidth=0.5, alpha=0.04, linestyle='solid')
ax.fill(angles, values, alpha=0.005, color='b')
if t == timeLimit - 1:
ax.plot(angles, values, 'o-', ms=4.0, color='r', alpha=1.0, clip_on=False)
ax.set_rlabel_position(0)
else:
vmax = vmaxAt
vmin = vminAt
suffix = 'attractors'
for i, pc in enumerate(df.columns):
temp_texts = ax.text(angles[i], 1.15 * (vmax - vmin) + vmin, pc, color='k', fontsize=14*fontscale, ha='center', va='center')
temp_texts.set_path_effects([path_effects.Stroke(linewidth=0.5*fontscale, foreground='w'), path_effects.Normal()])
ax.set_axisbelow(True)
#ax.plot(angles, [0]*len(angles), color='k', linewidth=1., linestyle='-', label=celltype)
fig.canvas.draw()
ylabels = ax.get_yticklabels()
ax.set_yticklabels([])
for label in ylabels:
ax.text(label._x, label._y, label._text, zorder=np.inf)
self.saveFigure(fig, self.saveDir, self.dataName + '_polar_%s'%(suffix), extension=extension, dpi=dpi, **kwargs)
return fig
@tryExcept
def HopfieldLandscapePlot(self, legend = False, labels = False, PCx = 0, PCy = 1, colorbar = True, fontsize = 10, plotMesh = True, plotAttractors = True, adjustText = True, axisOff = True, colorbarva = 0.75, colorbarha = 0.85, trPath = None, colormap = matplotlib.colors.LinearSegmentedColormap.from_list('cmap', [(1, 1, 1), (0, 1, 1), (0, 0, 1), (1, 0, 0)], N=1000), dpi = 300, extension = 'png', **kwargs):
'''Make heatmap plot of the attractors in their two principal components coordinates
Parameters:
legend: boolean, Default False
Whether to add legend containing cell types names
labels: boolean, Default False
Whether to add labels
PCx: int, Default 0
Principal component for x-coordinate of the plot
PCy: int, Default 1
Principal component for y-coordinate of the plot
colorbar: boolean, Default False
Whether to add colorbar
fontsize: int, Default 10
Text labels font size
plotMesh: boolean, Default False
Whether to plot landscape heatmap
plotAttractors: boolean, Default False
Whether to plot attractor stars
adjustText: boolean, Default False
Whether to minimize text labels overlap
axisOff: boolean, Default False
Whether to hide the axes lines
colorbarva: float, Default 0.75
Vertical position of the bottom of the colorbar
colorbarha: float, Default 0.85
Horizontal position of the colorbar
trPath: str, Default None
Path to trajectories files
colormap: matplotlib.colormap or str, Default matplotlib.colors.LinearSegmentedColormap.from_list('cmap', [(1, 1, 1), (0, 1, 1), (0, 0, 1), (1, 0, 0)], N=1000)
Colormap or its string name
dpi: int, Default 300
Resolution of the figure
extension: str, Default 'png'
Format extension of the figure
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makeHopfieldLandscapePlot()
'''
np.random.seed(0)
colormap.set_bad('white')
def add_colorbar(fig, labels, cmap = matplotlib.colors.LinearSegmentedColormap.from_list('GR', [(0, 1, 0), (1, 0, 0)], N=100), fontsize = 10):
mapp = cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=np.min(labels), vmax=np.max(labels)), cmap=cmap)
sp = np.linspace(np.max(labels), np.min(labels), num=6, endpoint=True)
mapp.set_array(sp)
axisColor = fig.add_axes([colorbarha, colorbarva, 0.01, 0.2])
fig.colorbar(mapp, cax=axisColor, ticks=sp)
axisColor.tick_params(labelsize=fontsize)
axisColor.set_yticklabels(sp.astype(int))
return None
fig = plt.figure(figsize=(8,8))
ax = fig.add_axes([0.05,0.05,0.9,0.9])
if axisOff:
ax.axis('off')
if trPath is None:
trPath = os.path.join(self.saveDir, 'HopfieldTrajectories')
if not os.path.exists(trPath):
if self.verbose >= 1:
print('Data not found', flush=True)
return
attrs_xpca, attrs_names = read(os.path.join(trPath, 'attrs'))
attrs_xpca = attrs_xpca[:attrs_xpca.shape[1]]
mesh_xpca = read(os.path.join(trPath, 'mesh'))
mesh_energy = mesh_xpca[range(len(mesh_xpca)), -1]
mesh_xpca = mesh_xpca[range(len(mesh_xpca)), :-1]
data = np.vstack([attrs_xpca, mesh_xpca])
coords = data.T[[PCx, PCy], :]
attrs2D, mesh2D = coords[:, :attrs_xpca.shape[1]].T, coords[:, attrs_xpca.shape[1]:].T
if plotMesh:
vmin, vmax = min(mesh_energy), 0.
vals = mesh_energy.copy()
vals[np.where(vals > (vmax - 0.001))[0]] = vmax - 0.001
vals[np.where(vals < (vmin + 0.001))[0]] = vmin + 0.001
xmin, xmax = mesh2D.T[0].min(), mesh2D.T[0].max()
ymin, ymax = mesh2D.T[1].min(), mesh2D.T[1].max()
dx = (xmax - xmin) * 0.05
dy = (ymax - ymin) * 0.05
xmin -= dx
xmax += dx
ymin -= dy
ymax += dy
ngrid = 100
grid = np.zeros((ngrid + 1, ngrid + 1))
grid[:] = vmin
i = (ngrid * (mesh2D.T[0] - xmin) / (xmax - xmin)).astype(int)
j = (ngrid * (mesh2D.T[1] - ymin) / (ymax - ymin)).astype(int)
se = pd.Series(index=zip(i,j), data=mesh_energy).groupby(axis=0, level=0).agg(np.min)
se.index = pd.MultiIndex.from_tuples(se.index)
grid[(se.index.get_level_values(0).values, se.index.get_level_values(1).values)] = se.values
maskedArray = np.ma.array(grid.T, mask=np.isnan(grid.T,))
im = ax.imshow(maskedArray[::-1], vmin=vmin, vmax=vmax, cmap=colormap, alpha=0.8,
extent=[xmin, xmax, ymin, ymax], interpolation='quadric', zorder=-10**8, clip_on=False)
data = scipy.ndimage.gaussian_filter(grid.T, 1.5)
xgrid = np.linspace(xmin, xmax, num=(ngrid+1))
ygrid = np.linspace(ymin, ymax, num=(ngrid+1))
tempColormap = colormap
#tempColormap = matplotlib.colors.LinearSegmentedColormap.from_list('cmap', [(0.75, 0.75, 0.75), (0, 1, 1), (0, 0, 1), (1, 0, 0)], N=1000)
ax.contour(xgrid, ygrid, data, levels=10, cmap=tempColormap, linewidths=1.0, zorder=-10**8+1)
ax.contour(xgrid, ygrid, data, levels=10, colors='k', linestyles='solid', linewidths=0.25, zorder=-10**8+2)
if plotAttractors:
texts = []
ax.plot(attrs2D.T[0], attrs2D.T[1], '*', ms=14, color='k', alpha=1.0, zorder=-10**7, clip_on=False)
for attr in range(attrs2D.T[0].shape[0]):
temp_texts = ax.text(attrs2D.T[0][attr], attrs2D.T[1][attr], attrs_names[attr], fontsize=fontsize, fontweight=550, ha='left',va='center', zorder=10 ** 10, clip_on=False)
temp_texts.set_path_effects([path_effects.Stroke(linewidth=2.5, foreground='white'), path_effects.Normal()])
texts.append(temp_texts)
if adjustText:
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='k', lw=0.3, alpha=0.5), force_text=(0.05, 0.05))
if plotMesh and colorbar:
add_colorbar(fig, [vmax, vmin], cmap=colormap, fontsize=fontsize)
self.saveFigure(fig, self.saveDir, self.dataName + '_energy_landscape_PC%s_vs_PC%s'%(PCy, PCx), extension=extension, dpi=dpi, **kwargs)
return fig
# Plotly-powered figures
[docs] @tryExcept
def makeSankeyDiagram(self, df, colormapForIndex = None, colormapForColumns = None, linksColor = 'rgba(100,100,100,0.6)', title = '', attemptSavingHTML = False, quality = 4, width = 400, height = 400, border = 20, nodeLabelsFontSize = 15, nameAppend = '_Sankey_diagram'):
'''Make a Sankey diagram, also known as 'river plot' with two groups of nodes
Parameters:
df: pandas.DataFrame
With counts (overlaps)
colormapForIndex: dictionary, Default None
Colors to use for nodes specified in the DataFrame index
colormapForColumns: dictionary, Default None
Colors to use for nodes specified in the DataFrame columns
linksColor: str, Default 'rgba(100,100,100,0.6)'
Color of the non-overlapping links
title: str, Default ''
Title to print on the diagram
interactive: boolean , Default False
Whether to launch interactive JavaScript-based graph
quality: int, Default 4
Proportional to the resolution of the figure to save
nodeLabelsFontSize: int, Default 15
Font size for node labels
nameAppend: str, Default '_Sankey_diagram'
Name to append to the figure file
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makeSankeyDiagram(df)
'''
try:
temp_index = pd.MultiIndex.from_arrays([df.index, [colormapForIndex[item] for item in df.index]], names=['label', 'color'])
temp_columns = pd.MultiIndex.from_arrays([df.columns, [colormapForColumns[item] for item in df.columns]], names=['label', 'color'])
df.index = temp_index
df.columns = temp_columns
except Exception as exception:
if self.verbose >= 2:
print(exception)
print('Using default node colors')
colormapForIndex = None
colormapForColumns = None
if (colormapForIndex is None) or (colormapForColumns is None):
nodeColors = ['rgba(150,0,10,0.8)'] * len(df.index) + ['rgba(10,0,150,0.8)'] * len(df.columns)
nodeLabels = df.index.to_list() + df.columns.to_list()
else:
nodeLabels = df.index.get_level_values('label').to_list() + df.columns.get_level_values('label').to_list()
nodeColors = df.index.get_level_values('color').to_list() + df.columns.get_level_values('color').to_list()
sources, targets, values, labels = [], [], [], []
for i, item in enumerate(df.index):
sources.extend([i] * len(df.loc[item]))
targets.extend(list(range(len(df.index), len(df.index) + len(df.loc[item]))))
values.extend([j for j in df.loc[item].values])
if type(item) is tuple:
labels.extend([str(item[0]) + ' -> ' + str(jtem[0]) for jtem in df.loc[item].index])
else:
labels.extend([str(item) + ' -> ' + str(jtem) for jtem in df.loc[item].index])
colorscales = [dict(label=label, colorscale=[[0, linksColor], [1, linksColor]]) for label in labels]
if not nodeColors is None:
for i in range(len(sources)):
if nodeColors[sources[i]] == nodeColors[targets[i]]:
newColor = ','.join(nodeColors[sources[i]].split(',')[:3] + ['0.6)'])
colorscales[i] = dict(label=labels[i], colorscale=[[0, newColor], [1, newColor]])
fig = go.Figure(data=[go.Sankey(valueformat = '', valuesuffix = '', textfont = dict(color = 'rgb(255,0,0)', size = nodeLabelsFontSize, family = 'Arial'),
node = dict(pad = 20, thickness = 40, line = dict(color = 'white', width = 0.0), label = nodeLabels, color = nodeColors,
), # hoverlabel=dict(bordercolor = 'yellow')
link = dict(source = sources, target = targets, value = values, label = labels, colorscales = colorscales, hoverinfo='all'),
)],) #line ={'color':'rgba(255,0,0,0.8)', 'width':0.1}
if not title is None:
fig.update_layout(title_text=title, font_size=10)
fig.update_layout(margin=dict(l=border, r=border, t=border, b=border))
try:
fig.write_image(os.path.join(self.saveDir, self.dataName + nameAppend + '.png'), width=width, height=height, scale=quality)
except Exception as exception:
if self.verbose >= 2:
print('Cannot save static image (likely due to missing orca). Saving to interactive html')
attemptSavingHTML = True
if attemptSavingHTML:
fig.update_layout(margin=dict(l=200, r=200, t=100, b=100))
plot_offline(fig, filename=os.path.join(self.saveDir, self.dataName + nameAppend + '.html'), auto_open=False)
return fig
@tryExcept
def HopfieldLandscapePlot3D(self, PCx = 0, PCy = 1, colorbar = True, fontsize = 12, plotMesh = True, plotAttractors = True, trPath = None, attemptSavingHTML=False, nameAppend = '', quality = 4, **kwargs):
'''Make heatmap plot of the attractors in their two principal components coordinates
Parameters:
legend: boolean, Default False
Whether to add legend containing cell types names
labels: boolean, Default False
Whether to add labels
PCx: int, Default 0
Principal component for x-coordinate of the plot
PCy: int, Default 1
Principal component for y-coordinate of the plot
colorbar: boolean, Default False
Whether to add colorbar
fontsize: int, Default 10
Text labels font size
plotMesh: boolean, Default False
Whether to plot landscape heatmap
plotAttractors: boolean, Default False
Whether to plot attractor stars
adjustText: boolean, Default False
Whether to minimize text labels overlap
axisOff: boolean, Default False
Whether to hide the axes lines
colorbarva: float, Default 0.75
Vertical position of the bottom of the colorbar
colorbarha: float, Default 0.85
Horizontal position of the colorbar
trPath: str, Default None
Path to trajectories files
colormap: matplotlib.colormap or str, Default matplotlib.colors.LinearSegmentedColormap.from_list('cmap', [(1, 1, 1), (0, 1, 1), (0, 0, 1), (1, 0, 0)], N=1000)
Colormap or its string name
dpi: int, Default 300
Resolution of the figure
extension: str, Default 'png'
Format extension of the figure
Returns:
None
Usage:
DCS = DigitalCellSorter.DigitalCellSorter()
DCS.makeHopfieldLandscapePlot()
'''
if trPath is None:
trPath = os.path.join(self.saveDir, 'HopfieldTrajectories')
if not os.path.exists(trPath):
if self.verbose >= 1:
print('Data not found', flush=True)
return
attrs_xpca, attrs_names = read(os.path.join(trPath, 'attrs'))
attrs_xpca = attrs_xpca[:attrs_xpca.shape[1]]
mesh_xpca = read(os.path.join(trPath, 'mesh'))
mesh_energy = mesh_xpca[range(len(mesh_xpca)), -1]
mesh_xpca = mesh_xpca[range(len(mesh_xpca)), :-1]
data = np.vstack([attrs_xpca, mesh_xpca])
coords = data.T[[PCx, PCy], :]
attrs2D, mesh2D = coords[:, :attrs_xpca.shape[1]].T, coords[:, attrs_xpca.shape[1]:].T
vmin, vmax = min(mesh_energy), 0.
vals = mesh_energy.copy()
vals[np.where(vals > (vmax - 0.001))[0]] = vmax - 0.001
vals[np.where(vals < (vmin + 0.001))[0]] = vmin + 0.001
xmin, xmax = mesh2D.T[0].min(), mesh2D.T[0].max()
ymin, ymax = mesh2D.T[1].min(), mesh2D.T[1].max()
dx = (xmax - xmin) * 0.05
dy = (ymax - ymin) * 0.05
xmin -= dx
xmax += dx
ymin -= dy
ymax += dy
ngrid = 100
grid = np.zeros((ngrid + 1, ngrid + 1))
grid[:] = vmin
i = (ngrid * (mesh2D.T[0] - xmin) / (xmax - xmin)).astype(int)
j = (ngrid * (mesh2D.T[1] - ymin) / (ymax - ymin)).astype(int)
se = pd.Series(index=zip(i,j), data=mesh_energy).groupby(axis=0, level=0).agg(np.min)
se.index = pd.MultiIndex.from_tuples(se.index)
grid[(se.index.get_level_values(0).values, se.index.get_level_values(1).values)] = se.values
df = se.unstack(fill_value=vmin)
fig = go.Figure()
if plotMesh:
fig.add_trace(go.Surface(x=np.linspace(xmin, xmax, df.shape[0]),
y=np.linspace(ymin, ymax, df.shape[1]),
z=gaussian_filter(df.values.T, sigma=0.75), opacity=1., colorscale="blackbody_r",
showscale=colorbar,
hoverinfo='none',
contours= {'x': {'highlight': False},
'y': {'highlight': False},
'z': {'highlight': False}},))
fig.update_traces(contours_z=dict(show=True, width=3., highlightwidth=3., usecolormap=False, highlightcolor="limegreen", project=dict(x=True,y=True,z=True), highlight=True, color='grey', size=(vmax-vmin)/10.))
annotations = []
if plotAttractors:
for i, point in enumerate(zip(attrs2D.T[0], attrs2D.T[1])):
fig.add_trace(go.Scatter3d(x=[point[0], point[0]], y=[point[1], point[1]], z=[vmin, 0.5*vmin], mode='lines', hoverinfo='none', line=dict(width=2, color='blue'), showlegend=False))
annotations.append(dict(showarrow=False, x=point[0], y=point[1], z=0.4*vmin, text=attrs_names[i], xanchor="center", xshift=10, opacity=1, font=dict(color='black', size=fontsize)))
fig.add_trace(go.Scatter3d(x=attrs2D.T[0], y=attrs2D.T[1], z=0. * attrs2D.T[0] + 0.5*vmin, mode='markers',
hovertext=attrs_names,
hoverinfo='text',
marker=dict(size=5, color='blue'),
projection=dict(z=dict(show=True)),
showlegend=False))
fig.update_layout(title='Hopfield Attractors', autosize=False, width=700, height=700, margin=dict(l=75, r=75, b=75, t=90))
fig.update_layout(scene = {'xaxis': {'title_text': 'PC1', 'nticks': 10, 'spikesides': False, 'showspikes': False, 'showbackground': False, 'showline': False, 'showticklabels': False, 'showaxeslabels': False}, 'yaxis': {'title_text': 'PC2', 'nticks': 10, 'spikesides': False, 'showspikes': False, 'showbackground': False, 'showline': False, 'showticklabels': False, 'showaxeslabels': False}, 'zaxis': {'title_text': 'Energy', 'range': (vmin, 0.), 'nticks': 10, 'showspikes': False, 'showbackground': False, 'showline': False, 'showticklabels': False, 'showaxeslabels': False}, 'aspectratio': {'x': 1, 'y': 1, 'z': 0.33}, 'annotations': annotations})
fig.update_layout(scene_camera=dict(up=dict(x=0, y=0, z=2), center=dict(x=0, y=0, z=0), eye=dict(x=0, y=-0.25, z=1.25)))
fileName = self.dataName + '_energy_landscape_PC%s_vs_PC%s'%(PCy, PCx) + nameAppend
try:
fig.write_image(os.path.join(self.saveDir, fileName + '.png'), width=700, height=700, scale=quality)
except Exception as exception:
if self.verbose >= 2:
print('Cannot save static image (likely due to missing orca). Saving to interactive html')
attemptSavingHTML = True
if attemptSavingHTML:
plot_offline(fig, filename=os.path.join(self.saveDir, fileName + '.html'), auto_open=False)
return fig