Nature | 5月14日新发表：空间转录组文章复现（三）

天意生信云

发布于 2025-06-08 08:37:32

3600

代码可运行

文章被收录于专栏：天意生信俱乐部天意生信俱乐部

运行总次数：0

代码可运行

今天我们接着复现上周分享的文章《Spatial transcriptomics reveals human cortical layer and area specification》Figure3的内容，Figure3展示了动态变化的皮层层级基因表达。

原文链接：

https://www.nature.com/articles/s41586-025-09010-1#Sec2

数据下载地址：

https://zenodo.org/records/14422018

代码下载地址：

https://github.com/carsen-stringer/vizgen-postprocessing

动态变化的皮层层级基因表达

基于构建的皮层深度（CD）分析框架，作者首先评估了在人类成人皮层中鉴定出的标记基因在胎儿皮层中的层级表达模式，并发现其表达存在显著差异（图 a）。接着，筛选出具有层级依赖性富集表达的基因，并对其在皮层板（CP）中的层级表达进行了量化分析（图b）。

import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from multiprocessing import Pool

adata = sc.read('merscope_integrated_855.h5ad')
adata.obs_names_make_unique()
adata.X = np.exp(adata.X.toarray()) - 1
sc.pp.normalize_total(adata)

def find_genes(sample, region, area):
  adata1 = adata[(adata.obs['sample']==sample) & (adata.obs.region==region) & (adata.obs.area==area) & (~adata.obs['cortical_depth'].isna()) & (adata.obs.H1_annotation.isin(['EN-ET', 'EN-IT', 'EN-Mig']))].copy() 
  ge = adata1.X
  ge1 = ge.round()
  ge1 = ge1.astype('int')
#dist_all = [np.concatenate([adata1.obs.cortical_depth[i]*np.ones(ge1[:,j][i]) for i in range(len(adata1.obs.cortical_depth)) if ge1[:,j][i]>0]) for j in range(ge1.shape[1])]
  dist_all = [np.concatenate([adata1.obs.cortical_depth.iloc[i]*np.ones(ge1[:,j][i]) for i in range(len(adata1.obs.cortical_depth)) if ge1[:,j][i]>0]) for j in range(ge1.shape[1])]
  dist40 = [dist_all[i] for i in np.array([np.quantile(j,0.75)-np.quantile(j,0.25) for j in dist_all]).argsort()[:40]]
  genes = adata1.var.index[np.array([np.quantile(i,0.75)-np.quantile(i,0.25) for i in dist_all]).argsort()[:40]]
  dict1 = dict(zip(genes, dist40))
  genes1 = [[i]*len(dict1[i]) for i in dict1.keys()]
  genes1 = [x for xs in genes1 for x in xs]
  df1 = pd.DataFrame(genes1)
  df1.columns = ['gene']
#df1['cortical_depth'] = np.hstack(dict1.values())
  df1['cortical_depth'] = np.hstack(list(dict1.values()))
  genes2 = list(df1.groupby('gene').aggregate('median').sort_values(by='cortical_depth').index)
return genes2

def make_violin(sample, region, area, genes):
  adata1 = adata[(adata.obs['sample']==sample) & (adata.obs.region==region) & (adata.obs.area==area) & (~adata.obs['cortical_depth'].isna()) & (adata.obs.H1_annotation.isin(['EN-ET', 'EN-IT', 'EN-Mig']))].copy() 
  adata1 = adata1[:,genes].copy()
  ge = adata1.X
  ge1 = ge.round()
  ge1 = ge1.astype('int')
  dist40 = [np.concatenate([adata1.obs.cortical_depth.iloc[i]*np.ones(ge1[:,j][i]) for i in range(len(adata1.obs.cortical_depth)) if ge1[:,j][i]>0]) for j in range(ge1.shape[1])]
#dist40 = [np.concatenate([adata1.obs.cortical_depth[i]*np.ones(ge1[:,j][i]) for i in range(len(adata1.obs.cortical_depth)) if ge1[:,j][i]>0]) for j in range(ge1.shape[1])]
#dist40 = [dist_all[i] for i in
#dist40 = [dist_all[i] for i in np.array([np.quantile(j,0.75)-np.quantile(j,0.25) for j in dist_all]).argsort()[:40]]  
#genes = adata1.var.index[np.array([np.quantile(i,0.75)-np.quantile(i,0.25) for i in dist_all]).argsort()[:40]]
  dict1 = dict(zip(genes, dist40))
  genes1 = [[i]*len(dict1[i]) for i in dict1.keys()]
  genes1 = [x for xs in genes1 for x in xs]
  df1 = pd.DataFrame(genes1)
  df1.columns = ['gene']
#df1['cortical_depth'] = np.hstack(dict1.values())
  df1['cortical_depth'] = np.hstack(list(dict1.values()))
  layer_types = ['EN-L2-1', 'EN-IT-L3-A', 'EN-IT-L4-1', 'EN-ET-L5-1', 'EN-IT-L6-1']
  l2_3 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[0]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[1]].cortical_depth, 0.75))/2
  l3_4 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[1]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[2]].cortical_depth, 0.75))/2
  l4_5 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[2]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[3]].cortical_depth, 0.75))/2
  l5_6 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[3]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[4]].cortical_depth, 0.75))/2
  l = [l2_3,l3_4,l4_5,l5_6]
  order = genes
  plt.figure(figsize=(25,5));
  plot = sns.violinplot(x='gene', y='cortical_depth', hue='gene', data=df1, order=order, density_norm='width', inner = None, dodge=False, cut=0); plot.legend().remove();
  [plot.axhline(i, linestyle = '--') for i in l];
  plt.xticks(rotation=90, fontsize=9); plt.yticks(fontsize=9); plot.set(xlabel=None); plot.set_ylabel('Cortical Depth', fontsize=20); plt.ylim(0,1); plt.tight_layout();
  plt.savefig(sample + '_' + region + '_' + area + '_gene_violin.png', dpi=200, pad_inches=0)
#plt.savefig(sample + '_' + region + '_' + area + '_gene_violin.png', dpi=200, bbox_to_inches = 'tight', pad_inches=0)
#plt.show()

def main():
  genes = find_genes('FB123', 'F1', 'A-PFC')
  make_violin('FB123', 'F1', 'A-PFC', genes)
  make_violin('FB123', 'O2', 'A-Occi', genes)

if __name__=="__main__":
    main()

我使用的python版本是3.13.0，在执行过程中有一些数据结构的问题需要调整，例如：

df1['cortical_depth'] = np.hstack(dict1.values())
dict1.values()

dict1.values()返回的对象并不是一个序列类型，而是一个dict_values对象，这可能是导致 np.hstack() 报错的原因。np.hstack() 需要接收一个列表或元组作为输入，而dict_values并不直接满足这一要求。修改：

df1['cortical_depth'] = np.hstack(list(dict1.values()))

CBLN2 是一种突触组织因子，其在类人猿中特有的视黄酸反应性增强子中具有变异，在妊娠第22至34周（GW22–GW34）表现出在第2和第3层中强烈的额叶富集表达。此外，该基因在第6层中的低水平表达在 GW15–GW34 各皮层区域中均得以维持（图c,d)。

import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
from multiprocessing import Pool
from matplotlib_scalebar.scalebar import ScaleBar
from matplotlib.colors import Normalize  # 添加这个导入
from itertools import repeat
import matplotlib

adata = sc.read('merscope_integrated_855.h5ad')
adata.obs_names_make_unique()

def make_plot(sample, region, gene):
    adata1 = adata[(adata.obs['sample'] == sample) & (adata.obs['region'] == region)].copy()
    sc.pp.scale(adata1, zero_center=True, max_value=6)

    fig, ax = plt.subplots(figsize=(4, 4))
    # 设置 colormap 和 normalization
    vmin = adata1[:, gene].X.min()
    vmax = adata1[:, gene].X.max()
    norm = Normalize(vmin=vmin, vmax=vmax)

    # 绘图：注意 scanpy 默认会使用 `plt.gca()`，所以这里需要强制传入 ax
    sc.pl.embedding(
        adata1,
        basis="spatial",
        use_raw=False,
        color=gene,
        show=False,
        s=2,
        color_map=cmap,
        alpha=1,
        colorbar_loc=None,
        ax=ax
    )

    # 添加 colorbar
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])
    fig.colorbar(sm, ax=ax, location='top', orientation='horizontal', label=gene, shrink=0.3)

    # 添加 scalebar
    scalebar = ScaleBar(1, "um", fixed_value=500, location='lower right')
    ax.add_artist(scalebar)

    ax.axis("off")
    ax.set_aspect("equal")
    fig.tight_layout()
    fig.savefig(f"{sample}_{region}_{gene}.png", dpi=500)
    plt.close(fig)

samples = ['FB123-F1', 'FB123-F2', 'FB123-P1', 'FB123-O2', 'FB121-F1', 'UMB1117-F1a', 'UMB5900-BA9', 'FB080-O1c']
genes = ['CBLN2', 'SRM', 'RASGRF2', 'B3GALT2', 'SYT6', 'ETV1', 'PENK', 'CUX2', 'GLRA3', 'CUX1', 'RORB', 'PTK2B', 'FOXP1', 'TOX', 'FOXP2', 'TLE4','CALN1', 'SYBU', 'CPNE8', 'CYP26A1', 'STK32B', 'VSTM2L', 'CRYM', 'GNAL', 'PCDH17', 'FSTL5', 'NEUROG2', 'SLN', 'SOX5', 'TAFA1', 'ARFGEF3', 'OPCML', 'NEFM', 'NFIB', 'PPP3CA','B3GNT2', 'SORCS1', 'TRPM3', 'LPL']
cmap = 'YlGnBu'

def main():
    for sample in samples:
        with Pool(8) as pool:
            pool.starmap(make_plot, zip(repeat(sample.split('-')[0]), repeat(sample.split('-')[1]), genes))

if __name__ == "__main__":
    main()

一组基因在第2和第3层中表现出显著的额叶富集表达，但它们的峰值表达时间各不相同（图 e），这提示额叶第2和第3层的特化过程伴随着时间动态的转录变化。

import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import compress
from multiprocessing import Pool

adata = sc.read('merscope_integrated_855.h5ad')
adata.obs_names_make_unique()
adata.X = np.exp(adata.X.toarray()) - 1
sc.pp.normalize_total(adata)

gw15 = ['UMB1367', 'UMB1117']
gw20 = ['FB080', 'FB121']
gw22 = ['FB123']
gw34 = ['UMB5900']


adata = adata[~(((adata.obs['sample'] == 'FB080') & (adata.obs.region=='O1b')) | ((adata.obs['sample'] == 'UMB1117') & (adata.obs.region=='O1')) | ((adata.obs['sample'] == 'UMB1367') & (adata.obs.region=='O1') & (adata.obs.area=='C-V2')))].copy()

adata.obs['area'] = [i.split('-')[1] if isinstance(i, str) and '-'in i else i for i in adata.obs['area']]


def mean_exp(adata, section, area, layers):
  layer_dict = dict(zip(layers, [np.nan]*len(layers)))
  sample1 = section.split('_')[0]
  region = section.split('_')[1]
  adata2 = adata[(adata.obs['sample']==sample1) & (adata.obs.region==region) & (adata.obs.area==area)].copy()
for layer in layers:
    adata3 = adata2[adata2.obs.layer==layer].copy()
    if sum(adata2.obs.layer==layer)>0:
      layer_dict[layer] = adata3.X.mean()
    else:
      layer_dict[layer] = np.nan
return layer_dict


def cp_annotation(section, area):
      obs = pd.read_csv(image+'_obs_cp.csv', index_col = 0)
      obs.cp_dist = np.sqrt(obs.cp_dist)
      adata1 = adata[(adata.obs['sample']==sample1) & (adata.obs.region==region)].copy()
      if section.split('_')[0] in gw15:
        cp_layers = ['l4','l5','l6']
        layer_types = ['EN-IT-L4-1', 'EN-ET-L5-1', 'EN-IT-L6-1']
        l4_5 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[0]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[1]].cortical_depth, 0.75))/2
        l5_6 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[1]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[2]].cortical_depth, 0.75))/2
        l = [l4_5,l5_6,0]
      elif section.split('_')[0] in gw20:
        cp_layers = ['l3','l4','l5','l6']
        layer_types = ['EN-IT-L2/3-A1', 'EN-IT-L4-1', 'EN-ET-L5-1', 'EN-IT-L6-1']
        l3_4 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[0]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[1]].cortical_depth, 0.75))/2
        l4_5 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[1]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[2]].cortical_depth, 0.75))/2
        l5_6 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[2]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[3]].cortical_depth, 0.75))/2
        l = [l3_4,l4_5,l5_6,0]
      elif section.split('_')[0] in gw22:
        cp_layers = ['l2','l3','l4','l5','l6']
        layer_types = ['EN-L2-1', 'EN-IT-L3-A', 'EN-IT-L4-1', 'EN-ET-L5-1', 'EN-IT-L6-1']
        l2_3 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[0]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[1]].cortical_depth, 0.75))/2
        l3_4 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[1]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[2]].cortical_depth, 0.75))/2
        l4_5 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[2]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[3]].cortical_depth, 0.75))/2
        l5_6 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[3]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[4]].cortical_depth, 0.75))/2
        l = [l2_3,l3_4,l4_5,l5_6,0]
      elif section.split('_')[0] in gw34:
        cp_layers = ['l2','l3','l4','l5','l6']
        layer_types = ['EN-L2-4', 'EN-IT-L3-late', 'EN-IT-L4-late', 'EN-ET-L5-1', 'EN-IT-L6-late']
        l2_3 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[0]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[1]].cortical_depth, 0.75))/2
        l3_4 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[1]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[2]].cortical_depth, 0.75))/2
        l4_5 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[2]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[3]].cortical_depth, 0.75))/2
        l5_6 = (np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[3]].cortical_depth, 0.25)+np.quantile(adata1.obs[adata1.obs.H3_annotation==layer_types[4]].cortical_depth, 0.75))/2
        l = [l2_3,l3_4,l4_5,l5_6,0]
      layer_ann = [cp_layers[np.where((i-l)>=0)[0][0]] for i in np.array(obs.cortical_depth)]
      layer_list = ['l2','l3','l4','l5','l6']
      [np.save(section+'_'+area+'_'+j+'_index.npy', np.array(list(compress(obs.index, [i==j for i in layer_ann])))) for j in layer_list]


dict1 = {
    f"{sample}_{region}": list(set(areas.dropna()))
    for (sample, region), areas in adata.obs.groupby(['sample', 'region'])['area']
}


adata.obs['layer'][(adata.obs['layer'].isin(['l2', 'l3', 'l4', 'l5', 'l6'])) & (~adata.obs['H1_annotation'].isin(['EN-IT', 'EN-Mig', 'EN-ET']))] = np.nan


order = ['PFC', 'PMC', 'M1', 'S1', 'Par', 'Temp', 'Occi', 'V2', 'V1']


sections_all = [[i]*len(dict1[i]) for i in dict1.keys()]
sections_all = [x for xs in sections_all for x in xs]

def make_heatmap(gene):
print(gene)
  adata1 = adata[:,gene].copy()
  layers = ['l2', 'l3', 'l4', 'l5', 'l6', 'sp', 'iz', 'osvz', 'isvz', 'vz']
#layers = ['mz', 'l2', 'l3', 'l4', 'l5', 'l6', 'sp', 'iz', 'osvz', 'isvz', 'vz']
  sections = list(compress(sections_all, [sum([i.startswith(j) for j in gw15]) for i in sections_all]))
  _, idx = np.unique(sections, return_index = True)
  sections_unique = list(np.array(sections)[np.sort(idx)])
  images = [dict1[i] for i in sections_unique]
  images = [x for xs in images for x in xs]
#[cp_annotation(i,j) for i,j in zip(sections, images)]
  gw15_dict = [mean_exp(adata1,i,j,layers) for i,j in zip(sections, images)]
  gw15_df = pd.DataFrame(gw15_dict).transpose()
  images = [i.split('-')[1] if (len(i.split('-')) > 1) else i for i in images]
  gw15_df.columns = images
  images_ordered = [i for j in order for i in images if i==j]
  _, idx = np.unique(images_ordered, return_index = True)
  images_ordered = list(np.array(images_ordered)[np.sort(idx)])
  gw15_df = gw15_df[images_ordered]
#gw15_df = (gw15_df - gw15_df.min().min()) /(gw15_df.max().max() - gw15_df.min().min())
#sns.heatmap(gw15_df); plt.show()
#dir='/Users/kylecoleman/data/walsh/all/clustering2/fig3/gw20'
  sections = list(compress(sections_all, [sum([i.startswith(j) for j in gw20]) for i in sections_all]))
  _, idx = np.unique(sections, return_index = True)
  sections_unique = list(np.array(sections)[np.sort(idx)])
  images = [dict1[i] for i in sections_unique]
  images = [x for xs in images for x in xs]
#[cp_annotation(i,j) for i,j in zip(sections, images)]
  gw20_dict = [mean_exp(adata1,i,j,layers) for i,j in zip(sections, images)]
  gw20_df = pd.DataFrame(gw20_dict).transpose()
  images = [i.split('-')[1] if (len(i.split('-')) > 1) else i for i in images]
  gw20_df.columns = images
  images_ordered = [i for j in order for i in images if i==j]
  _, idx = np.unique(images_ordered, return_index = True)
  images_ordered = list(np.array(images_ordered)[np.sort(idx)])
  gw20_df = gw20_df[images_ordered]
#gw20_df = (gw20_df - gw20_df.min().min()) /(gw20_df.max().max() - gw20_df.min().min())
#sns.heatmap(gw20_df); plt.show()
#dir='/Users/kylecoleman/data/walsh/all/clustering2/fig3/gw22'
  sections = list(compress(sections_all, [sum([i.startswith(j) for j in gw22]) for i in sections_all]))
  _, idx = np.unique(sections, return_index = True)
  sections_unique = list(np.array(sections)[np.sort(idx)])
  images = [dict1[i] for i in sections_unique]
  images = [x for xs in images for x in xs]
#[cp_annotation(i,j) for i,j in zip(sections, images)]
  gw22_dict = [mean_exp(adata1,i,j,layers) for i,j in zip(sections, images)]
  gw22_df = pd.DataFrame(gw22_dict).transpose()
  images = [i.split('-')[1] if (len(i.split('-')) > 1) else i for i in images]
  gw22_df.columns = images
  images_ordered = [i for j in order for i in images if i==j]
  _, idx = np.unique(images_ordered, return_index = True)
  images_ordered = list(np.array(images_ordered)[np.sort(idx)])
  gw22_df = gw22_df[images_ordered]
#gw22_df = (gw22_df - gw22_df.min().min()) /(gw22_df.max().max() - gw22_df.min().min())
#sns.heatmap(gw22_df); plt.show()
  layers = ['l2', 'l3', 'l4', 'l5', 'l6']
#dir='/Users/kylecoleman/data/walsh/all/clustering2/fig3/gw34'
  sections = list(compress(sections_all, [sum([i.startswith(j) for j in gw34]) for i in sections_all]))
  _, idx = np.unique(sections, return_index = True)
  sections_unique = list(np.array(sections)[np.sort(idx)])
  images = [dict1[i] for i in sections_unique]
  images = [x for xs in images for x in xs]
#[cp_annotation(i,j) for i,j in zip(sections, images)]
  gw34_dict = [mean_exp(adata1,i,j,layers) for i,j in zip(sections, images)]
  gw34_df = pd.DataFrame(gw34_dict).transpose()
  images = [i.split('-')[1] if (len(i.split('-')) > 1) else i for i in images]
  _, idx = np.unique(images_ordered, return_index = True)
  images_ordered = list(np.array(images_ordered)[np.sort(idx)])
  gw34_df.columns = images
  images_ordered = [i for j in order for i in images if i==j]
  gw34_df = gw34_df[images_ordered]
#gw34_df = (gw34_df - gw34_df.min().min()) /(gw34_df.max().max() - gw34_df.min().min())
  gw34_df = pd.concat((gw34_df, pd.DataFrame(np.nan, index = ['sp', 'iz', 'osvz', 'isvz', 'vz'], columns = images)))
#sns.heatmap(gw34_df); plt.show()
  grid_max = max(gw15_df.max().max(), gw20_df.max().max(), gw22_df.max().max(), gw34_df.max().max())
  grin_min = min(gw15_df.min().min(), gw20_df.min().min(), gw22_df.min().min(), gw34_df.min().min())
  gw15_df = (gw15_df - grin_min) /(grid_max - grin_min)
  gw20_df = (gw20_df - grin_min) /(grid_max - grin_min)
  gw22_df = (gw22_df - grin_min) /(grid_max - grin_min)
  gw34_df = (gw34_df - grin_min) /(grid_max - grin_min)
  gw15_df = gw15_df.groupby(level=0, axis=1).agg(np.mean)
  gw20_df = gw20_df.groupby(level=0, axis=1).agg(np.mean)
  gw22_df = gw22_df.groupby(level=0, axis=1).agg(np.mean)
  gw34_df = gw34_df.groupby(level=0, axis=1).agg(np.mean)
  images_ordered = [i for j in order for i in gw15_df.columns if i==j]
  _, idx = np.unique(images_ordered, return_index = True)
  images_ordered = list(np.array(images_ordered)[np.sort(idx)])
  gw15_df = gw15_df[images_ordered]
  images_ordered = [i for j in order for i in gw20_df.columns if i==j]
  _, idx = np.unique(images_ordered, return_index = True)
  images_ordered = list(np.array(images_ordered)[np.sort(idx)])
  gw20_df = gw20_df[images_ordered]
  images_ordered = [i for j in order for i in gw22_df.columns if i==j]
  _, idx = np.unique(images_ordered, return_index = True)
  images_ordered = list(np.array(images_ordered)[np.sort(idx)])
  gw22_df = gw22_df[images_ordered]
  images_ordered = [i for j in order for i in gw34_df.columns if i==j]
  _, idx = np.unique(images_ordered, return_index = True)
  images_ordered = list(np.array(images_ordered)[np.sort(idx)])
  gw34_df = gw34_df[images_ordered]
  grid_max = max(gw15_df.max().max(), gw20_df.max().max(), gw22_df.max().max(), gw34_df.max().max())
  grin_min = min(gw15_df.min().min(), gw20_df.min().min(), gw22_df.min().min(), gw34_df.min().min())
  gw15_df = (gw15_df - grin_min) /(grid_max - grin_min)
  gw20_df = (gw20_df - grin_min) /(grid_max - grin_min)
  gw22_df = (gw22_df - grin_min) /(grid_max - grin_min)
  gw34_df = (gw34_df - grin_min) /(grid_max - grin_min)
  fig, axs = plt.subplots(figsize = (19,10), ncols=5, gridspec_kw=dict(width_ratios=[5,6,5,6,1]));
  sns.heatmap(gw15_df, cbar = False, ax = axs[0], cmap = 'rainbow', vmin=0, vmax=1); axs[0].set_title('GW15', size=25); 
  sns.heatmap(gw20_df, yticklabels = False, cbar = False, ax = axs[1], cmap = 'rainbow', vmin=0, vmax=1); axs[1].set_title('GW20', size=25);
  sns.heatmap(gw22_df, yticklabels = False, cbar = False, ax = axs[2], cmap = 'rainbow', vmin=0, vmax=1); axs[2].set_title('GW22', size=25);
  sns.heatmap(gw34_df, yticklabels = False, cbar = False, ax = axs[3], cmap = 'rainbow', vmin=0, vmax=1); axs[3].set_title('GW34', size=25);
  fig.colorbar(axs[0].collections[0], cax=axs[4]);
  plt.title(gene, fontsize=20);
  axs[0].tick_params(axis='both', which='major', labelsize=15)
  axs[1].tick_params(axis='both', which='major', labelsize=15)
  axs[2].tick_params(axis='both', which='major', labelsize=15)
  axs[3].tick_params(axis='both', which='major', labelsize=15)
  plt.tight_layout();
  plt.savefig(gene + '_10.png', dpi=500);
  plt.clf()

#genes = ['CBLN2', 'CPNE8', 'B3GNT2', 'SRM', 'STK32B', 'VSTM2L', 'PENK']
genes = adata.var.index

def main():
  with Pool(len(genes)) as pool:
    pool.map(make_heatmap,genes)

if __name__=="__main__":
    main()

分析各皮层层级中神经元亚型的区域（areal）特化情况。尽管 H2 层级的兴奋性神经元（EN）细胞类型在前后轴（AP axis）上的分布基本保持均一，但许多 H3 层级的 EN 亚型（25个 EN-ET 亚型中有7个，24个 EN-IT 亚型中有15个）表现出在前部或后部区域的逐渐富集趋势，并形成连续的梯度分布模式（图 g）。

import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
from multiprocessing import Pool
from matplotlib_scalebar.scalebar import ScaleBar
from itertools import repeat


adata = sc.read('merscope_integrated_855.h5ad')
adata.obs_names_make_unique()

def make_plot(sample, region, types, i):
    adata1 = adata[(adata.obs['sample']==sample) & (adata.obs.region==region)].copy()
    colors = ['red', 'blue']
    color_dict = dict(zip(types, colors))
    color_dict.update(dict(zip(np.setdiff1d(adata.obs.H3_annotation.unique(), types), ['grey']*len(np.setdiff1d(adata.obs.H3_annotation.unique(), types)))))
    plot = sc.pl.embedding(adata1, basis="spatial", color = 'H3_annotation', groups = types, show = False, s=2, palette = color_dict, alpha=1); plt.axis('off');
    plot.set_aspect('equal');
    handles, labels = plt.gca().get_legend_handles_labels();
    order = [labels.index(i) for i in types];    
    plt.legend([handles[idx] for idx in order],[labels[idx] for idx in order], loc = 'center', fontsize=2, ncol = 2, bbox_to_anchor=(1.0,1.0), markerscale=0.25);
    plot.get_figure().gca().set_title('');
    scalebar = ScaleBar(1, "um", fixed_value=500, location = 'lower right');
    plot.add_artist(scalebar);
    plt.tight_layout();
    plt.savefig(sample + '_' + region + '_' + str(i) + '.png', dpi=500); plt.clf()


samples = ['FB123-F1', 'FB123-F2', 'FB123-P1', 'FB123-O2']
types_all = [('EN-IT-L2/3-A2', 'EN-IT-L3-P'), ('EN-IT-L4-A', 'EN-IT-L4-late'), ('EN-IT-L4/5-1', 'EN-IT-L5/6-P'), ('EN-ET-L6-A', 'EN-ET-L6-P'), ('EN-ET-SP-A', 'EN-ET-SP-P1')]



def main():
  with Pool(5) as pool:
    for sample in samples:
      pool.starmap(make_plot, zip(repeat(sample.split('-')[0]), repeat(sample.split('-')[1]), types_all, range(5)))

if __name__=="__main__":
    main()

进一步分析所有显著的 DEGs（调整后的 P 值 < 0.05，log[fold change] > 0.5）后，识别出：有4个基因在五类神经元中均在前部富集，另有8个基因在后部富集。这些基因在整体 EN 群体中也展现出强烈的前部或后部富集特征，提示它们可作为皮层区域身份的标志基因（图 h）。

library(anndata)
library(stringr)
library(Matrix)
library(MASS)
library(reticulate)
library(Seurat)
library(dplyr)
library(splitstackshape)
library(ggplot2)
library(reshape2)
library(cowplot)
library(glue)
# library(Rfast)

# use_python("/usr/local/bin/python3.10")
align_legend <- function(p, hjust = 0.5) {
# extract legend
  g <- cowplot::plot_to_gtable(p)
  grobs <- g$grobs
  legend_index <- which(sapply(grobs, function(x) x$name) == "guide-box")
  legend <- grobs[[legend_index]]

# extract guides table
  guides_index <- which(sapply(legend$grobs, function(x) x$name) == "layout")

# there can be multiple guides within one legend box  
for (gi in guides_index) {
    guides <- legend$grobs[[gi]]
    
    # add extra column for spacing
    # guides$width[5] is the extra spacing from the end of the legend text
    # to the end of the legend title. If we instead distribute it by `hjust:(1-hjust)` on
    # both sides, we get an aligned legend
    spacing <- guides$width[5]
    guides <- gtable::gtable_add_cols(guides, hjust*spacing, 1)
    guides$widths[6] <- (1-hjust)*spacing
    title_index <- guides$layout$name == "title"
    guides$layout$l[title_index] <- 2
    
    # reconstruct guides and write back
    legend$grobs[[gi]] <- guides
  }

# reconstruct legend and write back
  g$grobs[[legend_index]] <- legend
# group_name <- group
# pdf(paste0(paste0(path, group_name, sep = "/"), ".pdf",sep = ""))
  g
}


bubble_draw <- function(count_select, zs_select) {
  count_select$layer <- factor(rownames(count_select), levels = rownames(count_select))
  zs_select$layer <- factor(rownames(zs_select), levels = rownames(count_select))

  nonzero_sub_melt <- melt(count_select, id = c("layer"))
  expr_sub_melt <- melt(zs_select, id = c("layer"))
  color_map <- expr_sub_melt$value
  mid <- mean(color_map)

  col_min <- 0
  col_max <- max(zs_select[,1:(ncol(zs_select)-1)]) + 0.005

  x = ggplot(nonzero_sub_melt, aes(x = layer, y = variable, size = value, color = color_map)) + 
    # geom_point(aes(size = value, fill = layer), alpha = 1, shape = 21) + 
    geom_point(aes(size = value)) + 
    scale_size_continuous(limits = c(0.000001, 100), range = c(1,10), breaks = c(20, 40, 60, 80)) +
    labs( x= "", y = "", size = "Percentage of expressed cells (%)", fill = "", color = "Relative expression")  +
    theme(legend.title.align = 0.5,
          legend.text.align = 0.5,
          legend.key=element_blank(),
          axis.text.x = element_text(colour = "black", size = 10, face = "bold", angle = 90), 
          axis.text.y = element_text(colour = "black", face = "bold", size = 10), 
          legend.text = element_text(size = 8, face ="bold", colour ="black"), 
          legend.title = element_text(size = 8, face = "bold"), 
          panel.background = element_blank(),  #legend.spacing.x = unit(0.5, 'cm'),
          legend.position = "right",
          legend.box.just = "top",
          legend.direction = "vertical",
          legend.box="vertical",
          legend.justification="center"
    ) +  
    # theme_minimal() +
    # theme(legend.title.align = 0)+
    
    # scale_fill_manual(values = color_map, guide = FALSE) + 
    scale_y_discrete(limits = rev(levels(nonzero_sub_melt$variable))) +
    scale_color_gradient(low="yellow", high="blue", space ="Lab", limits = c(col_min, col_max), position = "bottom") +
    # scale_fill_viridis_c(guide = FALSE, limits = c(-0.5,1.5)) + 
    guides(colour = guide_colourbar(direction = "vertical", barheight = unit(4, "cm"), title.vjust = 4))
return(x)
}




# count_tot <- read.csv("result/prop.csv", row.names = 1)
count_A <- read.csv("result/DEG/prop_A.csv", row.names = 1)
count_P <- read.csv("result/DEG/prop_P.csv", row.names = 1)
count_T <- read.csv("result/DEG/prop_T.csv", row.names = 1)
# gene_temp <- c("NR4A2", "FGFBP3", "MET", "NR2F1", "UNC5C")

# zs_select <- read.csv("result/expr.csv", row.names = 1)
zs_A <- read.csv("result/DEG/expr_A.csv", row.names = 1)
zs_P <- read.csv("result/DEG/expr_P.csv", row.names = 1)
zs_T <- read.csv("result/DEG/expr_T.csv", row.names = 1)
zs_select <- rbind(rbind(zs_A, zs_T), zs_P)

ap_num <- 10
t_num <- 5

if (ap_num == 10) {
  height <- 8.5
} elseif (ap_num == 15 & t_num == 10) {
  height <- 14
} elseif (ap_num == 20 & t_num == 10) {
  height <- 18
}

count_top <- count_A[1:ap_num, ]
count_top <- count_top[order(count_top[,1], decreasing = TRUE), ]
count_temp <- count_T[1:t_num, ]
count_temp <- count_temp[order(count_temp[,1], decreasing = TRUE), ]
count_bottom <- count_P[!rownames(count_P) %in% rownames(count_temp), ]
count_bottom <- count_bottom[1:ap_num, ]
count_bottom <- count_bottom[order(count_bottom[,ncol(count_bottom)], decreasing = FALSE), ]

count_select <- rbind(rbind(count_top, count_temp), count_bottom)
count_select <- count_select * 100

zs_select <- zs_select[rownames(count_select), ]

count_select <- t(count_select)
zs_select <- t(zs_select)
zs_select <- as.matrix(zs_select)
zs_min <- matrix(apply(zs_select, 2, min), nrow = nrow(zs_select), ncol = ncol(zs_select), byrow = TRUE)
zs_max <- matrix(apply(zs_select, 2, max), nrow = nrow(zs_select), ncol = ncol(zs_select), byrow = TRUE)
zs_select <- (zs_select - zs_min) / (zs_max - zs_min)
count_select <- data.frame(count_select)
zs_select <- data.frame(zs_select)

x <- bubble_draw(count_select, zs_select)
ggdraw(align_legend(x))
ggsave( glue("DEG_plot/bubble_{ap_num}_{t_num}.pdf"),
       width = 7, height = height, limitsize = TRUE)


gene_lst <- colnames(count_select)
class_lst <- c("EN-Mig", "RG", "IPC", "IN")
for (class in class_lst) {
  zs_tot <- read.csv(glue("result/expr_{class}.csv"), row.names = 1)
  count_tot <- read.csv(glue("result/prop_{class}.csv"), row.names = 1)
  count_tot <- count_tot * 100
  zs_select <- zs_tot[gene_lst, ]
  count_select <- count_tot[gene_lst, ]
  count_select <- t(count_select)
  zs_select <- t(zs_select)

  zs_select <- as.matrix(zs_select)
  zs_min <- matrix(apply(zs_select, 2, min), nrow = nrow(zs_select), ncol = ncol(zs_select), byrow = TRUE)
  zs_max <- matrix(apply(zs_select, 2, max), nrow = nrow(zs_select), ncol = ncol(zs_select), byrow = TRUE)
  zs_select <- (zs_select - zs_min) / (zs_max - zs_min)
  count_select <- data.frame(count_select)
  count_select <- data.frame(count_select)
  zs_select <- data.frame(zs_select)
  x <- bubble_draw(count_select, zs_select)
  ggdraw(align_legend(x))
  ggsave(glue("DEG_plot/bubble_{class}_{ap_num}_{t_num}.pdf"),
         width = 7, height = height, limitsize = TRUE)
}

总结

作者通过对人类胎儿皮层中基因的层级表达模式进行分析，识别出了一些具有层级依赖性的标记基因，尤其是在中期妊娠阶段（GW22–GW34）表现出强烈的区域特化和时间动态变化。作者构建了情境感知的标记基因集合，并展示了这些基因在不同皮层区域和神经元亚型中的表达差异。结果表明，这些基因可作为皮层区域身份的标志基因，有助于理解皮层层级和区域的特化过程。

本文参与腾讯云自媒体同步曝光计划，分享自微信公众号。

原始发表：2025-06-02，如有侵权请联系 cloudcommunity@tencent.com 删除

types