from pathlib import Path
import operator
import cytoolz
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
from pyscenic.utils import load_motifs
from pyscenic.aucell import aucell
from pyscenic.binarization import binarize
from pyscenic.plotting import plot_binarization, plot_rss
from pyscenic.transform import df2regulons
import bioquest as bq #https://jihulab.com/BioQuest/bioquest
OUTPUT_DIR='output/05.SCENIC'
Path(OUTPUT_DIR).mkdir(parents=True,exist_ok=True)
adata = sc.read_h5ad('output/03.inferCNV/adata.h5')
adata_raw = adata.raw.to_adata()
def display_logos(df: pd.DataFrame,
top_target_genes: int = 3,
base_url: str = "http://motifcollections.aertslab.org/v10/logos/"
,column_name_logo = "MotifLogo"
,column_name_id = "MotifID"
, column_name_target = "TargetGenes"
):
"""
:param df:
:param base_url:
"""
# Make sure the original dataframe is not altered.
df = df.copy()
# Add column with URLs to sequence logo.
def create_url(motif_id):
return '<img src="{}{}.png" style="max-height:124px;"></img>'.format(base_url, motif_id)
df[("Enrichment", column_name_logo)] = list(map(create_url, df.index.get_level_values(column_name_id)))
# Truncate TargetGenes.
def truncate(col_val):
return sorted(col_val, key=op.itemgetter(1))[:top_target_genes]
df[("Enrichment", column_name_target)] = list(map(truncate, df[("Enrichment", column_name_target)]))
MAX_COL_WIDTH = pd.get_option('display.max_colwidth')
pd.set_option('display.max_colwidth', -1)
display(HTML(df.head().to_html(escape=False)))
pd.set_option('display.max_colwidth', MAX_COL_WIDTH)
def fetch_logo(regulon, base_url = "http://motifcollections.aertslab.org/v10/logos/"):
for elem in regulon.context:
if elem.endswith('.png'):
return '<img src="{}{}" style="max-height:124px;"></img>'.format(base_url, elem)
return ""
auc_mtx=pd.read_csv(OUTPUT_DIR+"/aucell.csv", index_col=0)
bin_mtx = pd.read_csv(OUTPUT_DIR+"/bin_mtx.csv", index_col=0)
thresholds = pd.read_csv(OUTPUT_DIR+"/thresholds.csv", index_col=0).threshold
# 删除基因后的(+)
auc_mtx.columns = bq.st.removes(string=auc_mtx.columns, pattern=r'\(\+\)')
bin_mtx.columns = bq.st.removes(string=bin_mtx.columns, pattern=r'\(\+\)')
thresholds.index = bq.st.removes(string=thresholds.index, pattern=r'\(\+\)')
regulon双峰图,以及红线表示二值化的阈值
auc_sum = auc_mtx.apply(sum,axis=0).sort_values(ascending=False)
fig, axes = plt.subplots(1, 5, figsize=(8, 2), dpi=100)
for x,y in enumerate(axes):
plot_binarization(auc_mtx, auc_sum.index[x], thresholds[auc_sum.index[x]], ax=y)
plt.tight_layout()
cell_type_key = "CellTypeS2"
cell_type_color_lut = dict(zip(adata.obs[cell_type_key].dtype.categories, adata.uns[f'{cell_type_key}_colors']))
cell_id2cell_type_lut = adata.obs[cell_type_key].to_dict()
bw_palette = sns.xkcd_palette(["white", "black"])
sns.set()
sns.set(font_scale=1.0)
sns.set_style("ticks", {"xtick.minor.size": 1, "ytick.minor.size": 0.1})
g = sns.clustermap(
data=bin_mtx.T,
col_colors=auc_mtx.index.map(cell_id2cell_type_lut).map(cell_type_color_lut),
cmap=bw_palette, figsize=(20,20)
)
g.ax_heatmap.set_xticklabels([])
g.ax_heatmap.set_xticks([])
g.ax_heatmap.set_xlabel('Cells')
g.ax_heatmap.set_ylabel('Regulons')
g.ax_col_colors.set_yticks([0.5])
g.ax_col_colors.set_yticklabels(['Cell Type'])
g.cax.set_visible(False)
df_regulons = pd.DataFrame(data=[list(map(operator.attrgetter('name'), regulons)),
list(map(len, regulons)),
list(map(fetch_logo, regulons))],
index=['name', 'count', 'logo']).T
MAX_COL_WIDTH = pd.get_option('display.max_colwidth')
pd.set_option('display.max_colwidth', -1)
import IPython.display
IPython.display.display(IPython.display.HTML(df_regulons.iloc[[2,5,7],:].to_html(escape=False)))
pd.set_option('display.max_colwidth', MAX_COL_WIDTH)
基于"X_aucell"聚类
from pyscenic.export import add_scenic_metadata
add_scenic_metadata(adata, auc_mtx, regulons);
sc.tl.umap(adata,init_pos="X_aucell")
sc.pl.umap(adata,color=cell_type_key)
df_scores=bq.tl.select(adata.obs,columns=[cell_type_key],pattern=r'^Regulon\(')
df_results = ((df_scores.groupby(by=cell_type_key).mean() - df_scores[df_scores.columns[1:]].mean())/ df_scores[df_scores.columns[1:]].std()).stack().reset_index().rename(columns={'level_1': 'regulon', 0:'Z'})
df_results['regulon'] = list(map(lambda s: s[8:-1], df_results.regulon))
df_results[(df_results.Z >= 3.0)].sort_values('Z', ascending=False).head()
df_heatmap = pd.pivot_table(data=df_results[df_results.Z >= 3.0].sort_values('Z', ascending=False),index=cell_type_key, columns='regulon', values='Z')
fig, ax1 = plt.subplots(1, 1, figsize=(10, 8))
sns.heatmap(df_heatmap, ax=ax1, annot=True, fmt=".1f", linewidths=.7, cbar=False, square=True, linecolor='gray',cmap="viridis", annot_kws={"size": 8})
ax1.set_ylabel('');
from pyscenic.rss import regulon_specificity_scores
rss = regulon_specificity_scores(auc_mtx, adata.obs[cell_type_key])
横坐标表示排名,纵坐标表示RSS特异性得分。排名前5位的Regulon以红色点表示。RSS越高的调控子可能与该细胞类型特异性相关。
plot_rss(rss, cell_type='B', top_n=5)
sc.set_figure_params(frameon=False, dpi=150, fontsize=8)
sc.pl.umap(adata, color=[cell_type_key, 'PAX5', 'TCF7', 'EOMES', 'TBX21', 'PRRX2', 'MAFB'],ncols=7, use_raw=True)
sc.set_figure_params(frameon=False, dpi=100, fontsize=8)
sc.pl.umap(adata, color=[cell_type_key,'Regulon(ZNF808)','Regulon(ZNF880)'],cmap='viridis')
aucell_adata = sc.AnnData(X=auc_mtx.sort_index(),dtype=np.float32)
aucell_adata.obs = adata.obs
names = list(map(operator.attrgetter('name'), filter(lambda r: r.score > 4.0, regulons)))
sc.pl.stacked_violin(aucell_adata, names, groupby=cell_type_key)
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。