import scanpy as sc
import pandas as pd
import gseapy as gp
import numpy as np
import matplotlib.pyplot as plt
import os
# 创建保存结果的目录
os.makedirs('enrichment_results', exist_ok=True)
os.makedirs('enrichment_plots', exist_ok=True)
# 加载单细胞数据
adata = sc.read("test.h5ad")
####基础分析(需要调整)
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata = adata[:, adata.var.highly_variable]
sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])
sc.pp.scale(adata, max_value=10)
# 聚类分析
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=20)
sc.tl.leiden(adata, resolution=0.5)
sc.tl.umap(adata)
#最好细胞聚类与注释(细胞注释放在celltype列),这里以cluster为例。
sc.tl.rank_genes_groups(adata, groupby='cluster', method='wilcoxon')
# 获取所有cluster ID
clusters = adata.obs['leiden'].cat.categories.tolist()
# 设置富集分析的参数
gene_sets = ['GO_Biological_Process_2023', 'KEGG_2021_Human']
organism = 'human' ####物种参数
# 循环处理每个cluster
for cluster in clusters:
print(f"\nProcessing cluster {cluster}...")
# 1. 获取差异基因
deg_df = pd.DataFrame({
'names': adata.uns['rank_genes_groups']['names'][cluster],
'scores': adata.uns['rank_genes_groups']['scores'][cluster],
'pvals': adata.uns['rank_genes_groups']['pvals'][cluster],
'pvals_adj': adata.uns['rank_genes_groups']['pvals_adj'][cluster],
'logfoldchanges': adata.uns['rank_genes_groups']['logfoldchanges'][cluster]
})
# 筛选显著差异基因 (调整p值<0.05且logFC>0.5)
significant_genes = deg_df[(deg_df['pvals_adj'] < 0.05) & (abs(deg_df['logfoldchanges']) > 0.5)]
if len(significant_genes) < 5:
print(f"Cluster {cluster} has too few significant genes ({len(significant_genes)}), skipping...")
continue
# 取差异最显著的前100个基因
gene_list = significant_genes.sort_values('scores', ascending=False)['names'].head(100).tolist()
# 2. 执行富集分析
try:
# GO和KEGG富集分析
enr_results = gp.enrich(
gene_list=gene_list,
gene_sets=gene_sets,
organism=organism,
outdir=None,
cutoff=0.05
)
# 3. 保存结果
# 保存差异基因
deg_filename = f'enrichment_results/cluster_{cluster}_differential_genes.csv'
significant_genes.to_csv(deg_filename, index=False)
# 保存富集结果
if enr_results is not None:
enr_filename = f'enrichment_results/cluster_{cluster}_enrichment.csv'
enr_results.res2d.to_csv(enr_filename)
# 4. 可视化
# 绘制并保存前10个富集项
plt.figure(figsize=(10, 6))
top_terms = enr_results.res2d.head(10)
top_terms.plot.barh(x='Term', y='Adjusted P-value', legend=False)
plt.title(f'Cluster {cluster} - Top 10 Enriched Terms')
plt.xlabel('-log10(Adjusted P-value)')
plt.tight_layout()
plot_filename = f'enrichment_plots/cluster_{cluster}_enrichment.png'
plt.savefig(plot_filename)
plt.close()
print(f"Cluster {cluster} analysis completed. Results saved.")
else:
print(f"No enrichment results for cluster {cluster}")
except Exception as e:
print(f"Error processing cluster {cluster}: {str(e)}")
# 收集所有cluster的富集结果
all_enrichments = []
for cluster in clusters:
try:
filename = f'enrichment_results/cluster_{cluster}_enrichment.csv'
if os.path.exists(filename):
df = pd.read_csv(filename)
df['Cluster'] = cluster
all_enrichments.append(df)
except:
continue
if all_enrichments:
# 合并所有结果
combined_enr = pd.concat(all_enrichments)
# 保存合并结果
combined_enr.to_csv('enrichment_results/combined_enrichment_results.csv', index=False)
# 找出在多个cluster中富集的通路
common_pathways = combined_enr['Term'].value_counts().head(20)
print("\nPathways enriched in multiple clusters:")
print(common_pathways)
# 可视化共同通路
plt.figure(figsize=(10, 8))
common_pathways.plot.barh()
plt.title('Pathways Enriched in Multiple Clusters')
plt.xlabel('Number of Clusters')
plt.tight_layout()
plt.savefig('enrichment_plots/common_pathways_across_clusters.png')
plt.close()
else:
print("No enrichment results to combine")
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
扫码关注腾讯云开发者
领取腾讯云代金券
Copyright © 2013 - 2025 Tencent Cloud. All Rights Reserved. 腾讯云 版权所有
深圳市腾讯计算机系统有限公司 ICP备案/许可证号:粤B2-20090059 深公网安备号 44030502008569
腾讯云计算(北京)有限责任公司 京ICP证150476号 | 京ICP备11018762号 | 京公网安备号11010802020287
Copyright © 2013 - 2025 Tencent Cloud.
All Rights Reserved. 腾讯云 版权所有