欢迎关注R语言数据分析指南
❝本节来分享nature communications上一篇论文中的一张综合图,作者提供了完整的代码及数据。小编在原文代码的基础上进行了修改后可以实现正常出图,结果与原文有所不同,个人观点,仅供参考,更多详细内容请查看论文内介绍。
论文信息
Gut microbial β-glucuronidases and their role in the microbiome-metabolite axis in colorectal cancer
Chen, J., Li, Y., Tang, S. et al. Gut microbial β-glucuronidases and their role in the microbiome-metabolite axis in colorectal cancer. Nat Commun 16, 10660 (2025). https://doi.org/10.1038/s41467-025-65679-y
论文原图
结果图
图形解读
此图整体来看主要由5组图组成,内容非常的多。但是仔细拆分一下可以看出都是由基础简单图组成的,因此只要按部就班的来绘制即可,主要点在于对个部分数据的理解,难度系数一般适合进阶学习。
论文代码
https://github.com/yr2008-UM/CRC_GUS/tree/master/04.DataAnalysis
代码展示
library(tidyverse)
library(ggpubr)
library(ggtree)
library(treeio)
library(coin)
library(patchwork)
group_mean <- function(group.subgroup,data,colInfo="Group"){
# Get unique groups
tmpg=unique(group.subgroup[,colInfo])
tmpn=length(tmpg)
rows=nrow(data)
# Initialize result matrix
result <- matrix(nrow=rows,ncol=tmpn*3)
name <- NULL
# Create column names
for(i in tmpg){ name <- c(name,paste(i,'Median',sep='_'),paste(i,'Mean',sep='_'),paste(i,'SD',sep='_')) }
colnames(result)=name
# Calculate statistics for each group
for(i in tmpg){
# Subset samples for current group
tmpdata=data[,rownames(group.subgroup[which(group.subgroup[,colInfo]==i),,drop=F])]
# colnames
name <- c(paste(i,'Median',sep='_'),paste(i,'Mean',sep='_'),paste(i,'SD',sep='_'))
# Calculate statistics for each feature
for (j in1:rows){
tmp.median=median(as.numeric(tmpdata[j,]))
tmp.mean=mean(as.numeric(tmpdata[j,]))
tmp.SD=sd(as.numeric(tmpdata[j,]))
result[j,name] <- c(tmp.median,tmp.mean,tmp.SD)
}
}
# Format and return results
rownames(result) <- rownames(data)
result <- data.frame(result)
return(result)
}
wilcoxon.FDR.TAX <- function(data,group){
data=data[,rownames(group)]
tmpg=unique(group$Stage)
species.abun <- data
#statistics
result <- group_mean(group,data,'Stage')
#wilcoxon
for (i in c('Healthy')) {
for (j in c('MP','S0',"SI_II","SIII_IV")){
#wilcoxon test
sample1=rownames(group[which(group[,'Stage']==i),,drop=F])
sample2=rownames(group[which(group[,'Stage']==j),,drop=F])
wilcox.01.p <- apply(data,1,function(x) {if(sum(x[c(sample1,sample2)]) == 0){return('NA')}else{test <- wilcox.test(x[sample1],x[sample2],conf.int = T);p <- test$p.value;return(p);}}) %>% as.numeric()
wilcox.01.q <- rep("NA",length(wilcox.01.p))
wilcox.01.q[which(wilcox.01.p < 0.05)] <- p.adjust(wilcox.01.p[which(wilcox.01.p < 0.05)], method = "fdr")
wilname.p <- data.frame(p=wilcox.01.p,q=wilcox.01.q)
colnames(wilname.p) <- paste0(c("wilcox.test.p","wilcox.test.q"),paste0("(",i,' VS ',j,")"))
result <- cbind(result,wilname.p)
}
}
# cutoff for sign species
cutoff <- nrow(group)*0.1
cutFC <- log2(2)
# Screen and format sign species
signSpecies = list()
# formatted the sign species across CRC stages
signTaxMP <- result %>% dplyr::mutate(log2FC=ifelse(MP_Mean > Healthy_Mean,log2(MP_Mean/Healthy_Mean),-log2(Healthy_Mean/MP_Mean))) %>% dplyr::mutate(log2FC=ifelse(!is.finite(log2FC),ifelse(MP_Mean>Healthy_Mean,10,-10),log2FC))
signTax <- signTaxMP %>% dplyr::filter(`wilcox.test.p(Healthy VS MP)` < 0.05)
data <- species.abun[signTax %>% dplyr::arrange(-log2FC) %>% rownames(),]
data[data>0] <- 1
signTaxG0 <- intersect(signTax %>% dplyr::filter(abs(log2FC) >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
signSpecies[['Up']][['MP']] <- intersect(signTax %>% dplyr::filter(log2FC >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
signSpecies[['Down']][['MP']] <- intersect(signTax %>% dplyr::filter(log2FC <= -cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
signTaxS0 <- result %>% dplyr::mutate(log2FC=ifelse(S0_Mean > Healthy_Mean,log2(S0_Mean/Healthy_Mean),-log2(Healthy_Mean/S0_Mean))) %>% dplyr::mutate(log2FC=ifelse(!is.finite(log2FC),ifelse(S0_Mean>Healthy_Mean,10,-10),log2FC))
signTax <- signTaxS0 %>% dplyr::filter(`wilcox.test.q(Healthy VS S0)` < 0.05)
data <- species.abun[signTax %>% dplyr::arrange(-log2FC) %>% rownames(),]
data[data>0] <- 1
signTaxG1 <- intersect(signTax %>% dplyr::filter(abs(log2FC) >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
signSpecies[['Up']][['S0']] <- intersect(signTax %>% dplyr::filter(log2FC >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
signSpecies[['Down']][['S0']] <- intersect(signTax %>% dplyr::filter(log2FC <= -cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
signTaxS12 <- result %>% dplyr::mutate(log2FC=ifelse(SI_II_Mean > Healthy_Mean,log2(SI_II_Mean/Healthy_Mean),-log2(Healthy_Mean/SI_II_Mean))) %>% dplyr::mutate(log2FC=ifelse(!is.finite(log2FC),ifelse(SI_II_Mean>Healthy_Mean,10,-10),log2FC))
signTax <- signTaxS12 %>% dplyr::filter(`wilcox.test.q(Healthy VS SI_II)` < 0.05)
data <- species.abun[signTax %>% dplyr::arrange(-log2FC) %>% rownames(),]
data[data>0] <- 1
signTaxG2 <- intersect(signTax %>% dplyr::filter(abs(log2FC) >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
signSpecies[['Up']][['S12']] <- intersect(signTax %>% dplyr::filter(log2FC >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
signSpecies[['Down']][['S12']] <- intersect(signTax %>% dplyr::filter(log2FC <= -cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
signTaxS34 <- result %>% dplyr::mutate(log2FC=ifelse(SIII_IV_Mean > Healthy_Mean,log2(SIII_IV_Mean/Healthy_Mean),-log2(Healthy_Mean/SIII_IV_Mean))) %>% dplyr::mutate(log2FC=ifelse(!is.finite(log2FC),ifelse(SIII_IV_Mean>Healthy_Mean,10,-10),log2FC))
signTax <- signTaxS34 %>% dplyr::filter(`wilcox.test.q(Healthy VS SIII_IV)` < 0.05)
data <- species.abun[signTax %>% dplyr::arrange(-log2FC) %>% rownames(),]
data[data>0] <- 1
signTaxG3 <- intersect(signTax %>% dplyr::filter(abs(log2FC) >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
signSpecies[['Up']][['S34']] <- intersect(signTax %>% dplyr::filter(log2FC >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
signSpecies[['Down']][['S34']] <- intersect(signTax %>% dplyr::filter(log2FC <= -cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
allSign <- unique(c(signTaxG0,signTaxG1,signTaxG2,signTaxG3))
signFeatureStageFC <- data.frame(Tax=allSign,MP=signTaxMP[allSign,'log2FC'],S0=signTaxS0[allSign,'log2FC'],S12=signTaxS12[allSign,'log2FC'],S34=signTaxS34[allSign,'log2FC']) %>% tibble::column_to_rownames('Tax')
signFeatureStageMean <- result[allSign,c("Healthy_Mean","MP_Mean","S0_Mean",'SI_II_Mean',"SIII_IV_Mean")]
# the output of the function
return(list(testResult=result,signFC=signFeatureStageFC,signMean=signFeatureStageMean,signSpecies=signSpecies))
}
### SECTION 2: data loading ###
# Define color scheme for loop categories
loop.colors <- c("No Loop"="#FABA39","Mini-Loop 1"="#1AE4B6",
"Loop 1"="#4686FB","Loop 2"="#7A0403",
"Mini-Loop 2"= "#E4460A",
"Mini-Loop 1,2"="#A2FC3C",
"No coverage" ="#30123B")
# Load metadata and abundance matrix
GUSabun_TPM <- read.csv("00.rawdata/GUSabun_TPM.csv",header = T,row.names = 1)
group <- read.csv("00.rawdata/group.csv",header = T,row.names = 1)
GUSsStat <- read.csv("00.rawdata/GUSsStat.csv",header = T,row.names = 1)
# Analysis configuration
analysis.list.info <- list(
compaired = list(
"Stage"=list(c("Healthy" ,"MP"),c("Healthy" ,"S0"),c("Healthy" ,"SI_II"),c("Healthy" ,"SIII_IV"))
),
levels = list(
"Stage"=c("Healthy" ,"MP","S0","SI_II","SIII_IV")
),
colors = list(
"Stage"=c("Healthy" ="#cecccb","MP"="#55B7E6","S0"="#193E8F","SI_II"="#F09739","SIII_IV"="#E53528")
))
### SECTION 3: differential abundance analysis ###
signGUSs <- wilcoxon.FDR.TAX(GUSabun_TPM,group)
绘制树图
# Load and plot phylogenetic tree
beast_tree <- read.newick("00.rawdata/38gmGUS.tre")
tree <- ggtree(beast_tree) +
geom_tiplab(size=3) +
xlim(0,13)
色块填充图
ggtreeOrderGUS <- get_taxa_name(tree)
p2 <- GUSsStat[ggtreeOrderGUS,] %>% dplyr::select(Loop) %>%
tibble::rownames_to_column(var='GUSID') %>%
dplyr::mutate(Lab=gsub(
"Loop ","L",gsub("Mini-Loop ","mL",gsub(
"No Loop","NL",Loop)))) %>%
mutate(x="Loop Category") %>%
ggplot(aes(x,GUSID,fill=Loop,label=Lab)) +
geom_tile() + geom_text() +
scale_fill_manual(values = loop.colors) +
labs(x=NULL,y=NULL) +
ylim(rev(ggtreeOrderGUS)) +
theme_void() +
theme(legend.position = "none")
ggplot2绘制热图
df <- signGUSs$testResult %>%
dplyr::select(
Healthy_Mean, MP_Mean, S0_Mean,
SI_II_Mean, SIII_IV_Mean) %>%
slice(match(ggtreeOrderGUS, rownames(.))) %>%
tibble::rownames_to_column("Feature") %>%
pivot_longer(-Feature,
names_to = "Stage",
values_to = "Value") %>%
group_by(Feature) %>%
mutate(Value_z = as.numeric(scale(Value))) %>%
ungroup()
df$Feature <- factor(df$Feature,levels = rev(unique(df$Feature)))
p3 <- ggplot(df, aes(x = Stage, y = Feature, fill = Value_z)) +
geom_tile(color = "white", size = 0.1) +
scale_fill_gradientn(
colors = c("navy", "white", "firebrick3"),
name = "Z-score") +
theme_void() +
theme(axis.text.x = element_text(size = 10, angle = 45, hjust =1,vjust=1),
legend.title.position = "top",
legend.title = element_text(vjust=0.5,hjust=0.5,color="black"),
legend.position = "top")
散点图
signFeatureStage <- rbind(
signGUSs$signSpecies$Up %>% unlist() %>% as.data.frame() %>%
dplyr::rename(Tax = 1) %>% tibble::rownames_to_column('Group') %>%
dplyr::mutate(Group=gsub("S34.*","S34",Group),Type='Up') %>%
dplyr::mutate(Group=gsub("S12.*","S12",Group)),
signGUSs$signSpecies$Down %>% unlist() %>% as.data.frame() %>%
dplyr::rename(Tax = 1) %>% tibble::rownames_to_column('Group') %>%
dplyr::mutate(Group=gsub("S12.*","S12",Group),Type='Down') %>%
dplyr::mutate(Group=gsub("S34.*","S34",Group))) %>%
dplyr::mutate(Group=gsub("S0.*","S0",gsub("MP.*","MP",Group)))
p4 <- signFeatureStage %>% ggplot(aes(Group,Tax,shape=Type,fill=Type)) +
geom_point(size=2) + scale_shape_manual(values = c("Down"=25,"Up"=24)) +
scale_fill_manual(values = c("Down"="blue","Up"="red")) +
labs(x=NULL,y=NULL) +
ylim(rev(ggtreeOrderGUS)) +
theme_void() +
theme(legend.position = "top",
legend.title = element_blank(),
axis.line = element_line(color="black"),
axis.text.x = element_text(angle = 45,hjust = 1,vjust=1,color="black"))
折线点图
# Calculate and visualize prevalence
calculatePrevalence <- function(inputData,groupCol){
group <- group %>% dplyr::filter(! is.na(get(groupCol)))
mid <- inputData
mid[mid > 0] <- 1
mark <- matrix(nrow = nrow(inputData),ncol = length(unique(group[,groupCol])))
colnames(mark) <- unique(group[,groupCol])
rownames(mark) <- rownames(inputData)
for (i in1:nrow(inputData)) {
for (mid.group in unique(group[,groupCol])) {
mid.group2 <- group %>% filter(get(groupCol) == mid.group) %>% rownames()
mid.mark <- (sum( mid[i,mid.group2]) / length(mid.group2) ) * 100
mark[i,mid.group] <- mid.mark
}
}
return(mark)
}
preResult <- calculatePrevalence(GUSabun_TPM[ggtreeOrderGUS,],"Stage") %>% reshape2::melt(by="row.names")
preResult$Var2 <- factor(preResult$Var2,levels = analysis.list.info$levels$Stage)
p5 <- preResult %>% ggplot(aes(value,Var1,color=Var2)) +
geom_vline(xintercept = c(10),linetype=2,color="grey") +
geom_line(group="Var2") + geom_point() +
facet_wrap("Var2",nrow = 1,ncol = 5) +
theme_classic() + xlab("Prevalence (100%)") +
ylab("") +
scale_color_manual(values = analysis.list.info$colors$Stage) +
theme(legend.position = "none",
axis.text.y = element_blank(),) +
ylim(rev(ggtreeOrderGUS))
拼图
(tree|p2|p3|p4|free(p5,type="label")) +
plot_layout(widths = c(1,0.2,0.4,0.5,1.3))
关注下方公众号下回更新不迷路
❝本节介绍到此结束,最近依然有不少读者想购买小编之前写的R绘图文档,因此在附上一下购买方式及所能提供的服务具体如下:以往的交流群已经满员不在考虑建立新群,因此只会通过公众号后台发送腾讯文档链接
1.内容更新截止2025年/12月/31日结束,后期无任何更新。
2.只分享案例内容,无答疑。
3.内容只包含R语言绘图内容,无任何生信分析全套代码,有过高期望者不要买。
4.有充足时间者可以考虑继续等待,2026年会正式出版R语言绘图相关的书籍
❝若上述几点都可以接受,欢迎点击下方所付费购买此文档,购买后会通过公众号后台发送文档链接,通过在线腾讯文档分析案例清单,内附案例下载链接,案例代码文件夹支持下载到本地查看。
只分享文档查看链接,无交流群,不添加任何联系方式 购买前建议后台沟通了解。
在线目录大纲
❝通过腾讯文档在线编辑,小编可实时进行bug注解,只要保证R软件及R包版本号与案例所示一致,无须担心代码运行报错问题。在线文档内附有案例代码下载链接
html注释文档
从2025年起的除极个别案例外,其余案例图提供下方所示html注释文档非常方便查看


