Loading [MathJax]/jax/output/CommonHTML/config.js
首页
学习
活动
专区
圈层
工具
发布
社区首页 >专栏 >脚本更新----visium数据的细胞类型距离分析

脚本更新----visium数据的细胞类型距离分析

原创
作者头像
追风少年i
发布于 2025-01-13 09:46:32
发布于 2025-01-13 09:46:32
2740
举报

作者,追风少年i

今天我们继续距离分析,这个分析在空间转录组是比较关键的一环。

之前分享的距离分析文章,主要针对区域和细胞类型的关系,

脚本更新----空间转录组的Radial Distance分析

脚本更新---实现visium数据的空间距离分析2

大家可以复习一下2024年度单细胞空间课程系列的第23课,空间轨迹。

那如果衡量两种细胞类型的空间距离分析呢?尤其是多样本进行比较的情况下

我们来实现一下

首先第一步获取距离矩阵

代码语言:javascript
AI代码解释
复制
library(Seurat)
source("distance_comparison_function.R")

spatial_SO <- readRDS(file = 'spatial.rds')

spot_diameter_fullres = spatial_SO@images$image@scale.factors$fiducial###383.6347

pos <- GetTissueCoordinates(spatial_SO, scale = NULL) # returns positions in pixels
pos$imagerow <- pos$imagerow*(65/spot_diameter_fullres) # converts pixels to microns
pos$imagecol <- pos$imagecol*(65/spot_diameter_fullres) # converts pixels to microns

dist_mat <- matrix(NA, nrow = nrow(pos), ncol = nrow(pos))

# Nested Loop through rows and columns of position (pos) df
for(n in 1:nrow(pos)){
    for(j in 1:nrow(pos)){ 
      if(n == j){
        dist_mat[n,j] <- 0 
      } else {
        point1 <- as.list(pos[n,])
        point2 <- as.list(pos[j,])
        dist <- sqrt((point1$imagerow - point2$imagerow)^2 + (point1$imagecol - point2$imagecol)^2)
        dist_mat[n,j] <- dist
        dist_mat[j,n] <- dist
      }
    }
  }
colnames(dist_mat) <- rownames(pos)
rownames(dist_mat) <- rownames(pos)

计算距离,以三种细胞类型为例

代码语言:javascript
AI代码解释
复制
proportions <- data.frame(
      "pEMT" = c(spatial_SO$pEMT),
      "PTC" = c(spatial_SO$PTC),
      "myCAF" = c(spatial_SO$myCAF)
  )

  # Calculate min. distance of spots with at least 10% pEMT PTC phenotype from spots with at least 10% myCAF phenotype
  pEMT_myCAF_distance <- distance_comparison(distances = distance_matrix,
                                             proportions = proportions,
                                             topic1 = "pEMT",
                                             topic2 = "myCAF",
                                             t1_thresh = 0.1,
                                             t2_thresh = 0.1)

  # Calculate min. distance of spots with at least 10% PTC phenotype from spots with at least 10% myCAF phenotype
  PTC_myCAF_distance <- distance_comparison(distances = distance_matrix,
                                            proportions = proportions,
                                            topic1 = "PTC",
                                            topic2 = "myCAF",
                                            t1_thresh = 0.1,
                                            t2_thresh = 0.1)
  
  combined_distance <- rbind(pEMT_myCAF_distance, PTC_myCAF_distance)
  
  # Violin plot to compare distances 
  distance_violin <- ggplot(combined_distance, aes(x = barcodes, y = min, fill = barcodes)) +
    geom_violin(scale = "width") +
    theme_classic() +
    scale_fill_manual(values = c("darkgrey", "#FFCC99")) +
    scale_x_discrete(name ="Deconvolution", limits = c("PTC vs. myCAF", "pEMT vs. myCAF"))
    
    
  # Save violin plot
  ggsave(file = paste0("outputs/Distance_Plots/", samples[i], "/PTC_pEMT_myCAF_Min_Distance_Violin.png"),
         distance_violin, height = 5, width = 6, dpi = 600)
  
  # For each sample calculate the mean and the median of the minimum distance from myCAF for pEMT PTC and PTC
  mean_median_min_distances <- data.frame(
      "sample" = c(samples[i]),
      "pEMT_myCAF_mean" = c(mean(pEMT_myCAF_distance$min)),
      "PTC_myCAF_mean" = c(mean(PTC_myCAF_distance$min)),
      "pEMT_myCAF_median" = c(median(pEMT_myCAF_distance$min)),
      "PTC_myCAF_median" = c(median(PTC_myCAF_distance$min))
  )

distances_overview = mean_median_min_distances

distances_overview_plotting <- distances_overview %>% 
  pivot_longer(
    cols = starts_with("pEMT_myCAF_mean") | starts_with("pEMT_myCAF_median") | starts_with("PTC_myCAF_mean") | starts_with("PTC_myCAF_median"),
    names_to = c("Distance_Group"),
    values_to = "Distance"
  )

# plots based on median
median_distances_plotting <- distances_overview_plotting %>% subset(Distance_Group == "pEMT_myCAF_median" | Distance_Group == "PTC_myCAF_median")

# make plot 
plot <- ggplot(median_distances_plotting, aes(Distance_Group, Distance)) +
  geom_boxplot(outlier.size = -1, 
               aes(fill = Distance_Group),
               alpha = 0.9, 
               show.legend = FALSE) + 
  geom_point(aes(),
             #position = position_jitter(width = 0.1, height = 0),
             size = 2, 
             alpha = 0.7,
             show.legend = FALSE) +
  geom_line(aes(group = sample), color = "black", linewidth = 0.5, alpha = 0.5, linetype = "dashed") +
  scale_fill_manual(values = c("darkgrey", "#FFCC99")) +
  labs (x = "Tumor", y = "iPVCAF Correlation") + 
  theme_classic() + 
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5), 
    axis.title.x = element_text(face = "bold", size = 14.5), 
    axis.title.y = element_text(face = "bold", size = 12),
    axis.text.x = element_text(face = "bold", size = 10),
    axis.text.y = element_text(face = "bold", size = 16)) +
  scale_x_discrete(name ="Deconvolution", limits = c("PTC_myCAF_median", "pEMT_myCAF_median")) +
  scale_y_continuous(breaks = c(0, 300, 600, 900, 1200, 1500),
                     limits = c(0, 1550)) 
# Not, max and min values need to be restricted for each new gene that is plotted
ggsave("PTC_pEMT_myCAF_Distance_Boxplots.png", 
       width = 2.75, height = 3, 
       plot, dpi = 600, create.dir = TRUE)

# Plot with distance as log2 with a pseudocount to avoid log2(0)
plot <- ggplot(median_distances_plotting, aes(Distance_Group, log2(Distance+1))) +
  geom_boxplot(outlier.size = -1, 
               aes(fill = Distance_Group),
               alpha = 0.9, 
               show.legend = FALSE) + 
  geom_point(aes(),
             #position = position_jitter(width = 0.1, height = 0),
             size = 2, 
             alpha = 0.7,
             show.legend = FALSE) +
  geom_line(aes(group = sample), color = "black", linewidth = 0.5, alpha = 0.5, linetype = "dashed") +
  scale_fill_manual(values = c("darkgrey", "#FFCC99")) +
  labs (x = "Tumor", y = "iPVCAF Correlation") + 
  theme_classic() + 
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5), 
    axis.title.x = element_text(face = "bold", size = 14.5), 
    axis.title.y = element_text(face = "bold", size = 12),
    axis.text.x = element_text(face = "bold", size = 10),
    axis.text.y = element_text(face = "bold", size = 16)) +
  scale_x_discrete(name ="Deconvolution", limits = c("PTC_myCAF_median", "pEMT_myCAF_median")) +
  scale_y_continuous(breaks = c(0, 2, 4, 6, 8, 10),
                     limits = c(0, 11)) 
# Not, max and min values need to be restricted for each new gene that is plotted
ggsave("PTC_pEMT_myCAF_Log2_Distance_Boxplots.png", 
       width = 2.75, height = 3, 
       plot, dpi = 600, create.dir = TRUE)


######### MEAN plots ################
# Repeat of median plots above but using mean instead of median
# mean does a better job of capturing the variability 
mean_distances_plotting <- distances_overview_plotting %>% subset(Distance_Group == "pEMT_myCAF_mean" | Distance_Group == "PTC_myCAF_mean")

plot <- ggplot(mean_distances_plotting, aes(Distance_Group, Distance)) +
  geom_boxplot(outlier.size = -1, 
               aes(fill = Distance_Group),
               alpha = 0.9, 
               show.legend = FALSE) + 
  geom_point(aes(),
             #position = position_jitter(width = 0.1, height = 0),
             size = 2, 
             alpha = 0.7,
             show.legend = FALSE) +
  geom_line(aes(group = sample), color = "black", linewidth = 0.5, alpha = 0.5, linetype = "dashed") +
  scale_fill_manual(values = c("darkgrey", "#FFCC99")) +
  labs (x = "Tumor", y = "iPVCAF Correlation") + 
  theme_classic() + 
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5), 
    axis.title.x = element_text(face = "bold", size = 14.5), 
    axis.title.y = element_text(face = "bold", size = 12),
    axis.text.x = element_text(face = "bold", size = 10),
    axis.text.y = element_text(face = "bold", size = 16)) +
  scale_x_discrete(name ="Deconvolution", limits = c("PTC_myCAF_mean", "pEMT_myCAF_mean")) +
  scale_y_continuous(breaks = c(0, 400, 800, 1200, 1600),
                     limits = c(0, 1700)) 
# Not, max and min values need to be restricted for each new gene that is plotted
ggsave("PTC_pEMT_myCAF_Mean_Distance_Boxplots.png", 
       width = 2.75, height = 3, 
       plot, dpi = 600, create.dir = TRUE)

plot <- ggplot(mean_distances_plotting, aes(Distance_Group, log2(Distance+1))) +
  geom_boxplot(outlier.size = -1, 
               aes(fill = Distance_Group),
               alpha = 0.9, 
               show.legend = FALSE) + 
  geom_point(aes(),
             #position = position_jitter(width = 0.1, height = 0),
             size = 2, 
             alpha = 0.7,
             show.legend = FALSE) +
  geom_line(aes(group = sample), color = "black", linewidth = 0.5, alpha = 0.5, linetype = "dashed") +
  scale_fill_manual(values = c("darkgrey", "#FFCC99")) +
  labs (x = "Tumor", y = "iPVCAF Correlation") + 
  theme_classic() + 
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5), 
    axis.title.x = element_text(face = "bold", size = 14.5), 
    axis.title.y = element_text(face = "bold", size = 12),
    axis.text.x = element_text(face = "bold", size = 10),
    axis.text.y = element_text(face = "bold", size = 16)) +
  scale_x_discrete(name ="Deconvolution", limits = c("PTC_myCAF_mean", "pEMT_myCAF_mean")) +
  scale_y_continuous(breaks = c(0, 2, 4, 6, 8, 10),
                     limits = c(0, 11)) 
# Not, max and min values need to be restricted for each new gene that is plotted
ggsave("PTC_pEMT_myCAF_Log2_Mean_Distance_Boxplots.png", 
       width = 2.75, height = 3, 
       plot, dpi = 600, create.dir = TRUE)



# stats
distance_wide <- pivot_wider(mean_distances_plotting, names_from = Distance_Group, values_from = Distance)
t.test(distance_wide$pEMT_myCAF_mean, distance_wide$PTC_myCAF_mean, paired = TRUE)

需要source的脚本

代码语言:javascript
AI代码解释
复制
distance_comparison <- function(distances, proportions, topic1, topic2, t1_thresh, t2_thresh){
  
  # Filter the desired topics for barcode proportions greater than threshold and only keep barcode and topic column
  filtered_props_1 <- proportions[proportions[,topic1] > t1_thresh, ]
  rownames_holder <- rownames(filtered_props_1)
  filtered_props_1 <- data.frame(filtered_props_1[, c(topic1)])
  rownames(filtered_props_1) <- rownames_holder
  rm(rownames_holder)
  colnames(filtered_props_1) <- c(topic1)
  print(filtered_props_1)
  
  # Filter the desired topics for barcode proportions greater than threshold and only keep barcode and topic column
  filtered_props_2 <- proportions[proportions[,topic2] > t2_thresh, ]
  rownames_holder <- rownames(filtered_props_2)
  filtered_props_2 <- data.frame(filtered_props_2[, c(topic2)])
  rownames(filtered_props_2) <- rownames_holder
  rm(rownames_holder)
  colnames(filtered_props_2) <- c(topic2)
  print(filtered_props_2)
  
  # Create a new matrix that contains the first topic as rows and the second topic as columns
  dist_filtered <- distances
  dist_filtered <- dist_filtered[rownames(dist_filtered) %in% rownames(filtered_props_1), ]
  
  # Filter columns
  dist_filtered <- dist_filtered[, colnames(dist_filtered) %in% rownames(filtered_props_2)]
  
  print(dist_filtered)
  
  # Apply min function to each row (1=rows, 2=columns)
  min_df <- apply(dist_filtered, 1, function(x) min(x)) 
  
  # Convert to dataframe
  min_df <- as.data.frame(min_df)
  colnames(min_df) <- c("min")
  
  comp_str <- paste0(topic1, " vs. ", topic2)
  min_df$barcodes <- comp_str
  print(min_df)
  
  return(min_df)
}

生活很好,有你更好

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
暂无评论
推荐阅读
编辑精选文章
换一批
脚本优化--visium的细胞niche与共定位(R版本)
追风少年i
2025/10/29
1260
脚本优化--visium的细胞niche与共定位(R版本)
课前准备---空间转录组数据分析之分子niche
追风少年i
2024/07/17
3410
课前准备---空间转录组数据分析之分子niche
关于10X HD和Xenium数据整合分析以及HD解卷积RCTD的运用
追风少年i
2024/06/10
8500
关于10X HD和Xenium数据整合分析以及HD解卷积RCTD的运用
流程更新----空间细胞聚类及配受体共现分析(针对visium、bin模式的Stereo-seq、以及HD)
追风少年i
2024/11/17
5500
流程更新----空间细胞聚类及配受体共现分析(针对visium、bin模式的Stereo-seq、以及HD)
[GBD数据库挖掘] 13.ggplot2绘制风险因素图
R语言数据分析指南
2023/11/23
7380
[GBD数据库挖掘] 13.ggplot2绘制风险因素图
复现GMM文章(一):图1代码和数据
所有的数据百度网盘链接: https://pan.baidu.com/s/1isKEK1G5I6X90KYqLufmWw
生信学习者
2024/07/17
2130
复现GMM文章(一):图1代码和数据
文献复现-单细胞揭示新辅助治疗后NSCLC的免疫微环境变化
文章在这:Tumor microenvironment remodeling after neoadjuvant immunotherapy in non-small cell lung cancer revealed by single-cell RNA sequencing 方法:来自3名治疗前和12名接受新辅助PD-1阻断联合化疗的非小细胞肺癌(NSCLC)患者的~92,000个单细胞的转录组。根据病理反应将12个治疗后样本分为两组:MPR(n = 4)和非MPR(n = 8)。
生信菜鸟团
2023/09/09
2.2K0
文献复现-单细胞揭示新辅助治疗后NSCLC的免疫微环境变化
你还缺scRNA-seq的workflow吗?
之前曾老师给我看了一位在pipebio工作的生信工程师Roman Hillje的scRNA-seq的workflow,今天整理一下分享给大家。
生信菜鸟团
2024/07/31
7340
你还缺scRNA-seq的workflow吗?
NC图表复现|箱线图叠加多重注释元素
R语言数据分析指南
2024/03/02
2050
NC图表复现|箱线图叠加多重注释元素
可视化—转录组箱线图美化
以前画的箱线图比较基础,最近看到一篇关于单细胞箱线图美化的帖子,正好今天在做bulkRNA的分析,就照个这篇帖子的代码,重写了一份适合bulkRNA的箱线图美化的代码。
sheldor没耳朵
2025/08/14
2042
可视化—转录组箱线图美化
可视化—KEGG\GO分析结果气泡图美化
之前做的图比较丑,这里记录下比较好看的美化代码。没有封装函数了,改起来比较方便,随用随拿。
sheldor没耳朵
2025/05/30
1.1K1
可视化—KEGG\GO分析结果气泡图美化
基于CellMiner数据库的基因表达与药敏分析
CellMiner数据库,主要是通过国家癌症研究所癌症研究中心(NCI)所列出的60种癌细胞为基础而建立的。该数据库最初发表于2009年,后于2012年在Cancer Research杂志上进行了更新,题目为“CellMiner: a web-based suite of genomic and pharmacologic tools to explore transcript and drug patterns in the NCI-60 cell line set”。大家后期在使用该数据库记得应用相关文献。
DoubleHelix
2022/11/24
4.9K0
基于CellMiner数据库的基因表达与药敏分析
多组差异表达分析火山图合并绘制
总共4个分组的差异分析,频率为4的基因就是共同的差异表达基因。我们选择3个来显示:
DoubleHelix
2023/02/27
1.6K0
多组差异表达分析火山图合并绘制
文献组图
追风少年i
2025/01/07
2880
文献组图
Nature图表复现 | 组合绘制个性化热图
R语言数据分析指南
2023/09/20
7191
Nature图表复现 | 组合绘制个性化热图
ggplot2可视化情人节消费支出
R语言数据分析指南
2024/02/22
1830
ggplot2可视化情人节消费支出
R语言ggplot2每周一图活动:第四周~簇状柱形图和堆积柱形图
https://github.com/kaustavSen/tidytuesday/blob/master/2021/week_11.R
用户7010445
2022/05/23
6920
R语言ggplot2每周一图活动:第四周~簇状柱形图和堆积柱形图
nature microbiology图表复现之聚类条形图
「关于jjAnno」更多详细的内容可点击下方链接https://mp.weixin.qq.com/s/CVXsJPPx12saw0WYiReQag
R语言数据分析指南
2022/09/21
7150
nature microbiology图表复现之聚类条形图
酷不酷炫!想不想学!带统计学的PCoA完美解决打样本量多组数据不好区分的问题!!
由于高通量测序的价格降的越来越低,现在很多人的研究已经从早期几个、十几个样品的研究发展到了几十、几百、甚至上千个样品,这种确实在以扩增子测序为基础的研究中越发明显。
DataCharm
2021/02/22
1.9K0
酷不酷炫!想不想学!带统计学的PCoA完美解决打样本量多组数据不好区分的问题!!
手把手带你复现NC图表之Figure 4
探究从肺泡和外膜成纤维细胞亚群(在对照组织中富集)到肌成纤维细胞(在肿瘤组织中富集)的分化过程,对scRNA-seq数据集进行轨迹推断。结果表明外膜和肺泡成纤维细胞可以作为独立的祖细胞,肌成纤维细胞可以从这些祖细胞转分化
生信技能树jimmy
2023/09/26
6190
手把手带你复现NC图表之Figure 4
推荐阅读
相关推荐
脚本优化--visium的细胞niche与共定位(R版本)
更多 >
领券
社区新版编辑器体验调研
诚挚邀请您参与本次调研,分享您的真实使用感受与建议。您的反馈至关重要,感谢您的支持与参与!
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档
首页
学习
活动
专区
圈层
工具
MCP广场
首页
学习
活动
专区
圈层
工具
MCP广场