Scrublet)、多种整合方法任选(Uncorrected、Seurat, scVI,Harmony, Scanorama, BBKNN)、RNA速率、PAGA、Slingshot、等这些关键功能能否跑通,才是这个包的关键,但是这个往往需要解决很多bug才能装成。装成了,真的很省事,就比如跑RNA速率这个事,基本上好的文章经常能看到,但真要自己基于python取单独跑,还是挺麻烦的,现在全部都基于Seurat对象,直接输入,还是非常方便的;我们已经把SCOP做成一个app在桌面上,根据我们的全套教程,打开即用;
# 此脚本运行是有关scop(全能型单细胞测序分析的工具),由“生信钱同学”公众号提供;# scop的使用发文请引用https://github.com/mengxu98/scop# 此脚本需借助“生信钱同学云计算”服务器中的VNC桌面系统中的SCOP软件才能正常运行# 其他地方无法保证全部正常运行# 如何使用VNC,请看https://mp.weixin.qq.com/s/TblLF90KvJ0NG1P_4P6ZAA# 所有图表都将保存在“plots”目录中,按顺序编号运行,保证可产生此代码全部结果,不会报错。# 此脚本名称会带有日期标志,我们会定期维护,请用最新的代码。# ==========================================================================# 此脚本如果你把所有整合方法都跑一遍,Seurat对象容量会不断累积,# 仅仅此套示例数据就将达到60G内存,请严格注意自己的内存使用情况# 怎么“最直观的查看自己内存使用情况”可以看⬇️笔记链接# https://notes.r-py.com/003-%E6%9C%8D%E5%8A%A1%E5%99%A8%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98%E6%B1%87%E6%80%BB/008-%E6%9C%80%E7%9B%B4%E8%A7%82%E7%9A%84%E6%9F%A5%E7%9C%8B%E8%87%AA%E5%B7%B1%E7%9A%84%E5%86%85%E5%AD%98%E7%9A%84%E4%BD%BF%E7%94%A8%E6%96%B9%E6%B3%95.html# 此scop环境是一个功能强大的集成的单细胞分析环境,为保证环境稳定性,不支持大家再装包进去# 只允许运行此环境现有的功能,如需其他功能分析,可自建环境# 如果你是新手,必须从头到位运行,否则极易出错,不要跳过任何计算的步骤# 必须全部运行# 必须强调的是SCOP是基于Seurat v5做,不要用Seurat v4的对象的数据去跑,绝对报错,这也是SCOP与SCP最大的区别# Seurat v4的对象的数据请用我们的SCP教程去做options(scop_env_init = TRUE, reticulate.conda_binary = "/scp-pub000/scop202512/bin/conda", scop_envname = "scop_env")options(future.globals.maxSize = 12000 * 1024 ^ 2)current_user <- Sys.info()[["user"]]user_lib_path <- file.path("/home", current_user, "R/x86_64-conda-linux-gnu-library/4.4")# 必须全部运行.libPaths(c("/scp-pub000/scop202512/envs/scop/lib/R/library",user_lib_path))# 1. 环境设置# --------------------------------------------------------------------------# 此脚本需借助“生信钱同学云计算”服务器中的VNC桌面系统中的SCOP软件才能正常运行library(reticulate)library(scop)##功能强大,但是数据对象会越堆越大,注意自己的内存使用library(Seurat)library(thisutils)library(slingshot)# 此脚本需借助“生信钱同学云计算”服务器中的VNC桌面系统中的SCOP软件才能正常运行# 如何使用VNC,请看https://mp.weixin.qq.com/s/TblLF90KvJ0NG1P_4P6ZAA# scop功能强大,我们保证当前代码全部正常运行,但此工具参数众多,此脚本之外的使用,请自行尝试解决。# 请注意,你的所有结果,会放在右侧的plot文件夹中,可自行查看dir.create("plots", showWarnings = FALSE)# --------------------------------------------------------------------------# 2. 数据加载、预处理与探索# --------------------------------------------------------------------------print("--- 2. 正在加载数据, 运行标准流程并进行初步探索 ---")data(pancreas_sub)pancreas_sub <- standard_scop(srt = pancreas_sub)# 此脚本需借助“生信钱同学云计算”服务器中的VNC桌面系统中的SCOP软件才能正常运行print(pancreas_sub)# --- 初步探索性数据分析与可视化 ---print("--- 2a. 初步探索性数据分析与可视化 ---")# [序号 01] 按细胞类型和亚型分组png("plots/01_CellDimPlot_CellType_SubCellType.png", width = 10, height = 8, units = "in", res = 300)CellDimPlot(srt = pancreas_sub,group.by = c("CellType", "SubCellType"),reduction = "UMAP",theme_use = "theme_blank")dev.off()# [序号 02] 按亚型分组,按细胞周期着色png("plots/02_CellDimPlot_SubCellType_by_Phase.png", width = 10, height = 8, units = "in", res = 300)CellDimPlot(srt = pancreas_sub,group.by = "SubCellType",stat.by = "Phase",reduction = "UMAP",theme_use = "theme_blank")dev.off()# [序号 03] 特征基因表达png("plots/03_FeatureDimPlot_Markers.png", width = 10, height = 8, units = "in", res = 300)FeatureDimPlot(srt = pancreas_sub,features = c("Sox9", "Neurog3", "Fev", "Rbp4"),reduction = "UMAP",theme_use = "theme_blank")dev.off()# [序号 04] 多个特征比较png("plots/04_FeatureDimPlot_Compare_Markers.png", width = 10, height = 8, units = "in", res = 300)FeatureDimPlot(srt = pancreas_sub,features = c("Ins1", "Gcg", "Sst", "Ghrl"),compare_features = TRUE,label = TRUE,label_insitu = TRUE,reduction = "UMAP",theme_use = "theme_blank")dev.off()# [序号 05] 分组热图ht <- GroupHeatmap(srt = pancreas_sub,features = c("Sox9", "Anxa2","Neurog3", "Hes6","Fev", "Neurod1","Rbp4", "Pyy","Ins1", "Gcg", "Sst", "Ghrl"),group.by = c("CellType", "SubCellType"),heatmap_palette = "YlOrRd",cell_annotation = c("Phase", "G2M_score", "Cdh2"),cell_annotation_palette = c("Dark2", "Paired", "Paired"),show_row_names = TRUE, row_names_side = "left",add_dot = TRUE, add_reticle = TRUE)png("plots/05_GroupHeatmap_Markers.png", width = 12, height = 10, units = "in", res = 300)print(ht$plot)dev.off()# --- 标准流程UMAP图 ---print("--- 2b. 保存标准流程UMAP图 ---")# [序号 06] 标准流程UMAPpng("plots/06_StandardUMAP_CellType.png", width = 10, height = 8, units = "in", res = 300)CellDimPlot(srt = pancreas_sub,group.by = c("CellType", "SubCellType"),reduction = "StandardUMAP2D",theme_use = "theme_blank")dev.off()# --------------------------------------------------------------------------# 3. 细胞质控(CellQC)# --------------------------------------------------------------------------print("--- 3. 正在运行细胞质控并保存图表 ---")# 提供了两种非常常用的双细胞计算方法,任选其一;pancreas_sub <- RunCellQC(srt = pancreas_sub, db_method="scDblFinder")#pancreas_sub <- RunCellQC(srt = pancreas_sub, db_method="Scrublet")# [序号 07] 质控UMAPpng("plots/07_CellQC_UMAP.png", width = 10, height = 8, units = "in", res = 300)CellDimPlot(srt = pancreas_sub, group.by = "CellQC", reduction = "UMAP")dev.off()# [序号 08] 质控统计条形图png("plots/08_CellStatPlot_QC_by_CellType.png", width = 10, height = 8, units = "in", res = 300)CellStatPlot(srt = pancreas_sub, stat.by = "CellQC", group.by = "CellType", label = TRUE)dev.off()# [序号 09] 质控Upset图png("plots/09_CellStatPlot_QC_Upset.png", width = 10, height = 8, units = "in", res = 300)CellStatPlot(srt = pancreas_sub,stat.by = c("db_qc", "outlier_qc","umi_qc", "gene_qc","mito_qc", "ribo_qc","ribo_mito_ratio_qc", "species_qc"),plot_type = "upset",stat_level = "Fail")dev.off()# --------------------------------------------------------------------------# 4. 多样本数据整合# 我们提供了多种多样本整合去除批次效应的方法,任选其一就行# 请注意后续的脚本都是基于integration_method = "Seurat"结果做的# 如果你使用了其他方法整合,后续脚本可能需要修改参数,请自行到scop官网学习# scop的使用请引用https://github.com/mengxu98/scop# --------------------------------------------------------------------------print("--- 4. 正在运行多样本数据整合流程 ---")data("panc8_sub")panc8_sub <- standard_scop(srt = panc8_sub)panc8_sub <- integration_scop(srt_merge = panc8_sub, batch = "tech", integration_method = "Seurat")#panc8_sub <- integration_scop(srt_merge = panc8_sub, batch = "tech", integration_method = "Uncorrected")#scVI巨慢无比,非必需不建议大家跑#panc8_sub <- integration_scop(srt_merge = panc8_sub, batch = "tech", integration_method = "scVI")#panc8_sub <- integration_scop(srt_merge = panc8_sub, batch = "tech", integration_method = "Harmony")#panc8_sub <- integration_scop(srt_merge = panc8_sub, batch = "tech", integration_method = "Scanorama")#这个也较慢,请注意#panc8_sub <- integration_scop(srt_merge = panc8_sub, batch = "tech", integration_method = "BBKNN")# ⚠️⚠️⚠️# 请注意后续的脚本都是基于integration_method = "Seurat"结果做的# 如果你使用了其他方法整合,后续脚本可能需要修改参数,请自行到scop官网学习# scop的使用请引用https://github.com/mengxu98/scop# [序号 10] Seurat整合结果png("plots/10_Integration_Seurat_UMAP.png", width = 10, height = 8, units = "in", res = 300)CellDimPlot(srt = panc8_sub,group.by = c("celltype", "tech"),reduction = "SeuratUMAP2D",title = "Seurat",theme_use = "theme_blank")dev.off()# --------------------------------------------------------------------------# 5. 细胞注释与投射# --------------------------------------------------------------------------print("--- 5. 正在进行细胞注释与投射 ---")# --- 细胞投射 ---print("--- 5a. 运行细胞投射 (Cell Projection) ---")genenames <- make.unique(thisutils::capitalize(rownames(panc8_sub[["RNA"]]),force_tolower = TRUE))names(genenames) <- rownames(panc8_sub)panc8_rename <- RenameFeatures(srt = panc8_sub,newnames = genenames,assays = "RNA")srt_query <- RunKNNMap(srt_query = pancreas_sub,srt_ref = panc8_rename,ref_umap = "SeuratUMAP2D")# [序号 11] 投射图png("plots/11_ProjectionPlot.png", width = 10, height = 8, units = "in", res = 300)ProjectionPlot(srt_query = srt_query,srt_ref = panc8_rename,query_group = "SubCellType",ref_group = "celltype")dev.off()# --- 基于Bulk RNA-seq数据的注释 ---print("--- 5b. 使用Bulk RNA-seq数据集进行细胞注释 ---")data("ref_scMCA")pancreas_sub <- RunKNNPredict(srt_query = pancreas_sub,bulk_ref = ref_scMCA,filter_lowfreq = 20)# [序号 12] Bulk注释结果png("plots/12_Annotation_Bulk_UMAP.png", width = 10, height = 8, units = "in", res = 300)CellDimPlot(srt = pancreas_sub,group.by = "KNNPredict_classification",reduction = "UMAP",label = TRUE)dev.off()# --- 基于单细胞数据集的注释 ---print("--- 5c. 使用单细胞数据集进行细胞注释 ---")pancreas_sub <- RunKNNPredict(srt_query = pancreas_sub,srt_ref = panc8_rename,ref_group = "celltype",filter_lowfreq = 20)# [序号 13] 单细胞注释结果png("plots/13_Annotation_SC_UMAP.png", width = 10, height = 8, units = "in", res = 300)CellDimPlot(srt = pancreas_sub,group.by = "KNNPredict_classification",reduction = "UMAP",label = TRUE)dev.off()# [序号 14] 相关性热图ht <- CellCorHeatmap(srt_query = pancreas_sub,srt_ref = panc8_rename,query_group = "SubCellType",ref_group = "celltype",nlabel = 3,label_by = "row",show_row_names = TRUE,show_column_names = TRUE)png("plots/14_CellCorHeatmap.png", width = 10, height = 8, units = "in", res = 300)print(ht$plot)dev.off()# --------------------------------------------------------------------------# 6. 拟时序分析与动态基因分析# --------------------------------------------------------------------------print("--- 6. 正在运行拟时序分析与动态基因分析 ---")# --- PAGA (细胞群关联图) ---print("--- 6a. 运行PAGA分析 ---")pancreas_sub <- RunPAGA(srt = pancreas_sub,group_by = "SubCellType",linear_reduction = "PCA",nonlinear_reduction = "UMAP")# [序号 15] PAGA图png("plots/15_PAGA_Plot.png", width = 10, height = 8, units = "in", res = 300)PAGAPlot(srt = pancreas_sub,reduction = "UMAP",label = TRUE,label_insitu = TRUE,label_repel = TRUE)dev.off()# --- RNA Velocity (RNA速率) ---##计算很耗时,正常现象print("--- 6b. 运行RNA Velocity分析 ---")pancreas_sub <- RunSCVELO(srt = pancreas_sub,group_by = "SubCellType",linear_reduction = "PCA",nonlinear_reduction = "UMAP")# [序号 16] RNA速率向量图png("plots/16_VelocityPlot_Vectors.png", width = 10, height = 8, units = "in", res = 300)VelocityPlot(srt = pancreas_sub,reduction = "UMAP",group_by = "SubCellType")dev.off()# [序号 17] RNA速率流线图png("plots/17_VelocityPlot_Stream.png", width = 10, height = 8, units = "in", res = 300)VelocityPlot(srt = pancreas_sub,reduction = "UMAP",plot_type = "stream")dev.off()# --- Slingshot (轨迹推断) ---print("--- 6c. 运行Slingshot轨迹推断 ---")pancreas_sub <- RunSlingshot(srt = pancreas_sub,group.by = "SubCellType",reduction = "UMAP")# [序号 18] Slingshot谱系图png("plots/18_Slingshot_Lineages.png", width = 10, height = 8, units = "in", res = 300)FeatureDimPlot(pancreas_sub,features = paste0("Lineage", 1:3),reduction = "UMAP",theme_use = "theme_blank")dev.off()# --- 动态特征、差异表达与富集分析 ---print("--- 6d. 分析动态基因、差异表达与功能富集 ---")pancreas_sub <- RunDynamicFeatures(srt = pancreas_sub,lineages = c("Lineage1", "Lineage2"),n_candidates = 200)##计算很耗时,正常现象pancreas_sub <- RunDEtest(srt = pancreas_sub,group_by = "CellType",fc.threshold = 1,only.pos = FALSE)# [序号 19] 差异表达火山图png("plots/19_VolcanoPlot_DE.png", width = 12, height = 8, units = "in", res = 300)VolcanoPlot(srt = pancreas_sub,group_by = "CellType")dev.off()##计算很耗时,正常现象pancreas_sub <- RunEnrichment(srt = pancreas_sub,group_by = "CellType",db = c("TF", "CSPA","GO_BP","GO_CC","GO_MF","KEGG"),species = "Mus_musculus",DE_threshold = "avg_log2FC > log2(1.5) & p_val_adj < 0.05")# [序号 20] 富集分析条形图png("plots/20_Enrichment_Bar.png", width = 10, height = 8, units = "in", res = 300)EnrichmentPlot( srt = pancreas_sub, group_by = "CellType", group_use = c("Ductal", "Endocrine"), plot_type = "bar")dev.off()# [序号 21] 富集分析词云图png("plots/21_Enrichment_Wordcloud.png", width = 10, height = 8, units = "in", res = 300)EnrichmentPlot( srt = pancreas_sub, group_by = "CellType", group_use = c("Ductal", "Endocrine"), plot_type = "wordcloud")dev.off()##计算很耗时,正常现象pancreas_sub <- RunGSEA(srt = pancreas_sub,group_by = "CellType",db = "GO_BP",species = "Mus_musculus",DE_threshold = "p_val_adj < 0.05")# [序号 22] GSEA富集图png("plots/22_GSEAPlot.png", width = 10, height = 8, units = "in", res = 300)GSEAPlot( srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", id_use = "GO:0007186")dev.off()# --- 可视化动态特征 ---# [序号 23] 动态热图ht <- DynamicHeatmap(srt = pancreas_sub,lineages = c("Lineage1", "Lineage2"),use_fitted = TRUE,n_split = 6,reverse_ht = "Lineage1",species = "Mus_musculus",db = c("TF", "CSPA", "GO_BP"),anno_terms = TRUE,anno_keys = TRUE,anno_features = TRUE,heatmap_palette = "viridis",cell_annotation = "SubCellType",separate_annotation = list("SubCellType", c("Nnat", "Irx1")),separate_annotation_palette = c("Paired", "Set1"),feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),pseudotime_label = 25,pseudotime_label_color = "red",height = 5,width = 12)png("plots/23_DynamicHeatmap.png", width = 18, height = 10, units = "in", res = 300)print(ht$plot)dev.off()# [序号 24] 动态基因变化曲线png("plots/24_DynamicPlot_Lineages.png", width = 12, height = 8, units = "in", res = 300)DynamicPlot(srt = pancreas_sub,lineages = c("Lineage1", "Lineage2"),group.by = "SubCellType",features = c("Plk1", "Hes1", "Neurod2", "Ghrl", "Gcg", "Ins2"),compare_lineages = TRUE,compare_features = FALSE)dev.off()

