-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot_functions.py
197 lines (180 loc) · 7.16 KB
/
plot_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import pandas as pd
import numpy as np
import scipy as sp
from matplotlib import pyplot as plt
import matplotlib
from matplotlib.patches import Rectangle
from matplotlib.patches import Patch
from matplotlib.gridspec import GridSpec
from matplotlib.lines import Line2D
from adjustText import adjust_text
import os
def plot_volcano(pvals, logfc, labels, marker_labels=None, marker_legend_title=None, pval_bounds=None, ymax_vol=None, ymax_qq=None, alp=0.1, logfc_thr=0.5, logfc_xlabel=None, nlab='sig', save_path=None, fig_size=None, dpi=None):
"""
Generates a volcano and a Q-Q plot for given p-values, logfold changes and labels.
Parameters
----------
pvals : numpy.ndarray
non-adjusted p-values
logfc : numpy.ndarray
log2(fold change)
labels : numpy.ndarray
array of str, gene names
marker_labels : numpy.ndarray
array of str, group names encoded by marker types, number of groups must be less than 7
marker_legend_title: str
title on the legend displaying marker label groups
pval_bounds : numpy.ndarray
bound to be displayed around p-values, dimensions: pvals.shape[0] x 2
ymax_vol : float
vertical limit of volcano plot
ymax_qq : float
vertical limit of QQ plot
alp : float (0 < alp < 1)
false-discovery threshold
logfc_thr : float > 0
threshold of logfold change
logfc_xlabel : str
label displayed on the volcano plot's x axis
nlab: str or int
if 'sig' the only labels corresponding to significant pvals are plotted
if int then all labels associated with the nlab smallest pvalues are plotted
save_path: str
path where figure will be saved
fig_size: tuple of two int
figure size in inches
dpi: int
dpi of figure
Returns
-------
None
"""
# size of markers in scatter plot
s = 10
# width of markers
lw = 1
# significant
col_sig = [1, 0, 0]
# non-significant
col_nsig = [0, 0, 1]
# marker color on legend
col_legend = [0] * 3
# list of markers (at most 6 groups are possible in mutsig)
marker_choices = ['s', '+', 'x', 'o', '^', '*']
# figure size
if fig_size is None:
fsiz = (12, 5)
else:
fsiz = fig_size
fig = plt.figure(figsize=fsiz)
gs = fig.add_gridspec(1, 2)
gs = GridSpec(1, 2, wspace=0.15)
ind_sorted = np.argsort(pvals)
pvals = pvals[ind_sorted].copy()
nvals = pvals.shape[0]
qvals = sp.stats.false_discovery_control(pvals)
logq = -np.log10(qvals)
labels = labels[ind_sorted].copy()
logfc = logfc[ind_sorted].copy()
if marker_labels is not None:
marker_labels = marker_labels[ind_sorted].copy()
marker_label_choices = np.unique(marker_labels)
if marker_label_choices.shape[0] > len(marker_choices):
raise ValueError('Too many groups in \"marker_labels\"! Please extend \"marker_choices\"!')
ind_sig = qvals < alp
ind_lfc = np.abs(logfc) > logfc_thr
ind_kept = np.logical_and(ind_sig, ind_lfc)
col = np.array([col_nsig] * nvals)
col[ind_kept] = col_sig
# volcano
ax = fig.add_subplot(gs[:, 0])
if ymax_vol is None:
ind_capped = [False] * pvals.shape[0]
else:
ind_capped = logq > ymax_vol
logq[ind_capped] = ymax_vol
if marker_labels is None:
ax.scatter(logfc, logq, s=s, c=col, edgecolors=None, alpha=1, zorder=11)
else:
for i, ml in enumerate(marker_label_choices):
ind_marker = marker_labels == ml
ax.scatter(logfc[ind_marker], logq[ind_marker], s=s, c=col[ind_marker], edgecolors=None, alpha=1, zorder=11, marker=marker_choices[i], linewidth=lw)
if pval_bounds is not None:
pval_bounds = pval_bounds[ind_sorted, :].copy()
qval_bounds = np.c_[sp.stats.false_discovery_control(pval_bounds[:, 0]), sp.stats.false_discovery_control(pval_bounds[:, 1])]
logq_bounds = -np.log10(qval_bounds).copy()
for i in range(logfc.shape[0]):
if not ind_capped[i]:
ax.plot(np.array([logfc[i]] * 2), logq_bounds[i, :], color=col[i], alpha=0.15, zorder=10)
xlim = ax.get_xlim()
if isinstance(ymax_vol, float) or isinstance(ymax_vol, int):
ylim = [0, ymax_vol * 1.05]
ax.plot(xlim, [ymax_vol] * 2, linestyle='dashed', color='gray', zorder=0)
else:
ylim = [0, np.max(logq) * 1.05]
ax.plot(xlim, [1, 1], 'gray', zorder=0)
ax.plot([0, 0], ylim, 'gray', zorder=0)
ax.plot([-logfc_thr] * 2, ylim, linestyle='dashed', color='gray', zorder=0)
ax.plot([logfc_thr] * 2, ylim, linestyle='dashed', color='gray', zorder=0)
if logfc_xlabel is None:
ax.set_xlabel(r'$\log_{2}(\mathrm{fold change})$')
else:
ax.set_xlabel(logfc_xlabel)
ax.set_ylabel(r'$-\log_{10}(\mathrm{FDR})$')
ax.set_xlim(xlim)
ax.set_ylim([0, ylim[1]])
if nlab is not None:
if isinstance(nlab, str):
if nlab=='sig':
X, Y, L = logfc[ind_kept], logq[ind_kept], labels[ind_kept]
elif isinstance(nlab, int):
X, Y, L = logfc[:nlab], logq[:nlab], labels[:nlab]
txt = [plt.text(x, y, lab, zorder=12) for x, y, lab in zip(X, Y, L)]
adjust_text(txt, arrowprops = { 'color' : 'k', "arrowstyle" : "-" })
# QQ
ax = fig.add_subplot(gs[:, 1])
x = -np.log10(np.arange(1, nvals + 1) / (nvals + 1))
y = -np.log10(pvals)
if ymax_qq is None:
ind_capped = [False] * pvals.shape[0]
else:
ind_capped = y > ymax_qq
y_plot = y.copy()
y_plot[ind_capped] = ymax_qq
if marker_labels is None:
ax.scatter(x, y_plot, marker='o', c=col, s=s, edgecolors=None, alpha=1, zorder=11)
else:
for i, ml in enumerate(marker_label_choices):
ind_marker = marker_labels == ml
ax.scatter(x[ind_marker], y_plot[ind_marker], s=s, c=col[ind_marker], edgecolors=None, alpha=1, zorder=11, marker=marker_choices[i], linewidth=lw, label=ml)
if pval_bounds is not None:
for i in range(pval_bounds.shape[0]):
if not ind_capped[i]:
ax.plot(np.array([x[i]] * 2), -np.log10(pval_bounds[i, :]), color=col[i], alpha=0.15, zorder=10)
xlim = ax.get_xlim()
if isinstance(ymax_qq, float) or isinstance(ymax_qq, int):
ylim = [0, ymax_qq * 1.05]
ax.plot([0, xlim[1]], [ymax_qq] * 2, linestyle='--', color='gray', zorder=0)
else:
ylim = [0, np.max(y_plot) * 1.05]
# add legend
if marker_labels is not None:
ax.legend()
leg = ax.get_legend()
for i in range(marker_label_choices.shape[0]):
leg.legendHandles[i].set_color(col_legend)
if marker_legend_title is not None:
leg.set_title(marker_legend_title)
ax.plot([0, xlim[1]], [0, xlim[1]], 'k--', zorder=1)
ax.set_xlabel(r'$-\log_{10}(p_\mathrm{expected})$')
ax.set_ylabel(r'$-\log_{10}(p_\mathrm{observed})$')
ax.set_xlim([0, xlim[1]])
ax.set_ylim([0, ylim[1]])
# save figure
if save_path is not None:
if dpi is None:
dpi_save=300
else:
dpi_save=dpi
fig.savefig(save_path, dpi=dpi_save)
plt.show()