大数跨境
0
0

跟着NC学习多类型综合图绘制

跟着NC学习多类型综合图绘制 R语言数据分析指南
2025-12-02
6

欢迎关注R语言数据分析指南

本节来分享nature communications上一篇论文中的一张综合图,作者提供了完整的代码及数据。小编在原文代码的基础上进行了修改后可以实现正常出图,结果与原文有所不同,个人观点,仅供参考,更多详细内容请查看论文内介绍。

论文信息

Gut microbial β-glucuronidases and their role in the microbiome-metabolite axis in colorectal cancer

Chen, J., Li, Y., Tang, S. et al. Gut microbial β-glucuronidases and their role in the microbiome-metabolite axis in colorectal cancer. Nat Commun 16, 10660 (2025). https://doi.org/10.1038/s41467-025-65679-y

论文原图

结果图

图形解读

此图整体来看主要由5组图组成,内容非常的多。但是仔细拆分一下可以看出都是由基础简单图组成的,因此只要按部就班的来绘制即可,主要点在于对个部分数据的理解,难度系数一般适合进阶学习。

论文代码

https://github.com/yr2008-UM/CRC_GUS/tree/master/04.DataAnalysis

代码展示

library(tidyverse)
library(ggpubr)     
library(ggtree)      
library(treeio)     
library(coin)  
library(patchwork)
group_mean <- function(group.subgroup,data,colInfo="Group"){
# Get unique groups
  tmpg=unique(group.subgroup[,colInfo])
  tmpn=length(tmpg)
  rows=nrow(data)
# Initialize result matrix
  result <- matrix(nrow=rows,ncol=tmpn*3)
  name <- NULL

# Create column names
for(i in tmpg){ name <- c(name,paste(i,'Median',sep='_'),paste(i,'Mean',sep='_'),paste(i,'SD',sep='_'))    }
  colnames(result)=name

# Calculate statistics for each group
for(i in tmpg){
    # Subset samples for current group
    tmpdata=data[,rownames(group.subgroup[which(group.subgroup[,colInfo]==i),,drop=F])]
    
    # colnames
    name <- c(paste(i,'Median',sep='_'),paste(i,'Mean',sep='_'),paste(i,'SD',sep='_'))
    
    # Calculate statistics for each feature
    for (j in1:rows){
      tmp.median=median(as.numeric(tmpdata[j,]))
      tmp.mean=mean(as.numeric(tmpdata[j,]))
      tmp.SD=sd(as.numeric(tmpdata[j,]))
      result[j,name] <- c(tmp.median,tmp.mean,tmp.SD)
    }
  }

# Format and return results
  rownames(result) <- rownames(data)
  result <- data.frame(result)
return(result)
}

wilcoxon.FDR.TAX <- function(data,group){

  data=data[,rownames(group)]
  tmpg=unique(group$Stage)
  species.abun <- data

#statistics
  result <- group_mean(group,data,'Stage')

#wilcoxon
for (i in c('Healthy')) {
    for (j in c('MP','S0',"SI_II","SIII_IV")){
      #wilcoxon test
      sample1=rownames(group[which(group[,'Stage']==i),,drop=F])
      sample2=rownames(group[which(group[,'Stage']==j),,drop=F])
      wilcox.01.p <- apply(data,1,function(x) {if(sum(x[c(sample1,sample2)]) == 0){return('NA')}else{test <- wilcox.test(x[sample1],x[sample2],conf.int = T);p <- test$p.value;return(p);}}) %>% as.numeric()
      wilcox.01.q <- rep("NA",length(wilcox.01.p))
      wilcox.01.q[which(wilcox.01.p < 0.05)] <- p.adjust(wilcox.01.p[which(wilcox.01.p < 0.05)], method = "fdr")
      wilname.p <- data.frame(p=wilcox.01.p,q=wilcox.01.q)
      colnames(wilname.p) <- paste0(c("wilcox.test.p","wilcox.test.q"),paste0("(",i,' VS ',j,")"))
      result <- cbind(result,wilname.p)
    }
  }
# cutoff for sign species
  cutoff <- nrow(group)*0.1
  cutFC <- log2(2)

# Screen and format sign species
  signSpecies = list()

# formatted the sign species across CRC stages
  signTaxMP <- result %>% dplyr::mutate(log2FC=ifelse(MP_Mean > Healthy_Mean,log2(MP_Mean/Healthy_Mean),-log2(Healthy_Mean/MP_Mean))) %>% dplyr::mutate(log2FC=ifelse(!is.finite(log2FC),ifelse(MP_Mean>Healthy_Mean,10,-10),log2FC))
  signTax <- signTaxMP %>% dplyr::filter(`wilcox.test.p(Healthy VS MP)` < 0.05
  data <- species.abun[signTax %>% dplyr::arrange(-log2FC) %>% rownames(),]
  data[data>0] <- 1
  signTaxG0 <- intersect(signTax %>% dplyr::filter(abs(log2FC) >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
  signSpecies[['Up']][['MP']] <- intersect(signTax %>% dplyr::filter(log2FC >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
  signSpecies[['Down']][['MP']] <- intersect(signTax %>% dplyr::filter(log2FC <= -cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])

  signTaxS0 <- result %>% dplyr::mutate(log2FC=ifelse(S0_Mean > Healthy_Mean,log2(S0_Mean/Healthy_Mean),-log2(Healthy_Mean/S0_Mean))) %>% dplyr::mutate(log2FC=ifelse(!is.finite(log2FC),ifelse(S0_Mean>Healthy_Mean,10,-10),log2FC))
  signTax <- signTaxS0 %>% dplyr::filter(`wilcox.test.q(Healthy VS S0)` < 0.05
  data <- species.abun[signTax %>% dplyr::arrange(-log2FC) %>% rownames(),]
  data[data>0] <- 1
  signTaxG1 <- intersect(signTax %>% dplyr::filter(abs(log2FC) >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
  signSpecies[['Up']][['S0']] <- intersect(signTax %>% dplyr::filter(log2FC >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
  signSpecies[['Down']][['S0']] <- intersect(signTax %>% dplyr::filter(log2FC <= -cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])

  signTaxS12 <- result %>% dplyr::mutate(log2FC=ifelse(SI_II_Mean > Healthy_Mean,log2(SI_II_Mean/Healthy_Mean),-log2(Healthy_Mean/SI_II_Mean))) %>% dplyr::mutate(log2FC=ifelse(!is.finite(log2FC),ifelse(SI_II_Mean>Healthy_Mean,10,-10),log2FC))
  signTax <- signTaxS12 %>% dplyr::filter(`wilcox.test.q(Healthy VS SI_II)` < 0.05
  data <- species.abun[signTax %>% dplyr::arrange(-log2FC) %>% rownames(),]
  data[data>0] <- 1
  signTaxG2 <- intersect(signTax %>% dplyr::filter(abs(log2FC) >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
  signSpecies[['Up']][['S12']] <- intersect(signTax %>% dplyr::filter(log2FC >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
  signSpecies[['Down']][['S12']] <- intersect(signTax %>% dplyr::filter(log2FC <= -cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])

  signTaxS34 <- result %>% dplyr::mutate(log2FC=ifelse(SIII_IV_Mean > Healthy_Mean,log2(SIII_IV_Mean/Healthy_Mean),-log2(Healthy_Mean/SIII_IV_Mean))) %>% dplyr::mutate(log2FC=ifelse(!is.finite(log2FC),ifelse(SIII_IV_Mean>Healthy_Mean,10,-10),log2FC))
  signTax <- signTaxS34 %>% dplyr::filter(`wilcox.test.q(Healthy VS SIII_IV)` < 0.05
  data <- species.abun[signTax %>% dplyr::arrange(-log2FC) %>% rownames(),]
  data[data>0] <- 1
  signTaxG3 <- intersect(signTax %>% dplyr::filter(abs(log2FC) >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
  signSpecies[['Up']][['S34']] <- intersect(signTax %>% dplyr::filter(log2FC >= cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])
  signSpecies[['Down']][['S34']] <- intersect(signTax %>% dplyr::filter(log2FC <= -cutFC) %>% rownames(),rownames(data)[rowSums(data) >= cutoff])

  allSign <- unique(c(signTaxG0,signTaxG1,signTaxG2,signTaxG3))
  signFeatureStageFC <- data.frame(Tax=allSign,MP=signTaxMP[allSign,'log2FC'],S0=signTaxS0[allSign,'log2FC'],S12=signTaxS12[allSign,'log2FC'],S34=signTaxS34[allSign,'log2FC']) %>% tibble::column_to_rownames('Tax')
  signFeatureStageMean <- result[allSign,c("Healthy_Mean","MP_Mean","S0_Mean",'SI_II_Mean',"SIII_IV_Mean")]

# the output of the function
return(list(testResult=result,signFC=signFeatureStageFC,signMean=signFeatureStageMean,signSpecies=signSpecies))
}
### SECTION 2: data loading ###

# Define color scheme for loop categories
loop.colors <- c("No Loop"="#FABA39","Mini-Loop 1"="#1AE4B6",
                 "Loop 1"="#4686FB","Loop 2"="#7A0403",
                 "Mini-Loop 2"=  "#E4460A",
                 "Mini-Loop 1,2"="#A2FC3C",
                 "No coverage" ="#30123B")

# Load metadata and abundance matrix
GUSabun_TPM <- read.csv("00.rawdata/GUSabun_TPM.csv",header = T,row.names = 1)
group <- read.csv("00.rawdata/group.csv",header = T,row.names = 1)
GUSsStat <- read.csv("00.rawdata/GUSsStat.csv",header = T,row.names = 1)

# Analysis configuration
analysis.list.info <- list(
  compaired = list(
    "Stage"=list(c("Healthy" ,"MP"),c("Healthy" ,"S0"),c("Healthy" ,"SI_II"),c("Healthy" ,"SIII_IV"))
  ),
  levels = list(
    "Stage"=c("Healthy" ,"MP","S0","SI_II","SIII_IV")
  ),
  colors = list(
    "Stage"=c("Healthy" ="#cecccb","MP"="#55B7E6","S0"="#193E8F","SI_II"="#F09739","SIII_IV"="#E53528")
  ))

### SECTION 3: differential abundance analysis ###

signGUSs <- wilcoxon.FDR.TAX(GUSabun_TPM,group)

绘制树图

# Load and plot phylogenetic tree
beast_tree <- read.newick("00.rawdata/38gmGUS.tre")

tree <- ggtree(beast_tree) + 
   geom_tiplab(size=3) +
   xlim(0,13)

色块填充图

ggtreeOrderGUS <- get_taxa_name(tree)

p2 <- GUSsStat[ggtreeOrderGUS,] %>% dplyr::select(Loop) %>% 
  tibble::rownames_to_column(var='GUSID') %>% 
  dplyr::mutate(Lab=gsub(
    "Loop ","L",gsub("Mini-Loop ","mL",gsub(
      "No Loop","NL",Loop)))) %>% 
  mutate(x="Loop Category") %>% 
  ggplot(aes(x,GUSID,fill=Loop,label=Lab)) + 
  geom_tile()  + geom_text() + 
  scale_fill_manual(values = loop.colors) + 
  labs(x=NULL,y=NULL) +
  ylim(rev(ggtreeOrderGUS)) + 
  theme_void() +
  theme(legend.position = "none")

ggplot2绘制热图

df <- signGUSs$testResult %>%
  dplyr::select(
    Healthy_Mean, MP_Mean, S0_Mean,
    SI_II_Mean, SIII_IV_Mean) %>%
  slice(match(ggtreeOrderGUS, rownames(.))) %>%
  tibble::rownames_to_column("Feature") %>%
  pivot_longer(-Feature,
               names_to = "Stage",
               values_to = "Value") %>% 
  group_by(Feature) %>%
  mutate(Value_z = as.numeric(scale(Value))) %>%
  ungroup()

df$Feature <- factor(df$Feature,levels = rev(unique(df$Feature)))

p3 <- ggplot(df, aes(x = Stage, y = Feature, fill = Value_z)) +
  geom_tile(color = "white", size = 0.1) +
  scale_fill_gradientn(
    colors = c("navy""white""firebrick3"),
    name = "Z-score") +
  theme_void() +
  theme(axis.text.x = element_text(size = 10, angle = 45, hjust =1,vjust=1),
    legend.title.position = "top",
    legend.title = element_text(vjust=0.5,hjust=0.5,color="black"),
    legend.position = "top")

散点图

signFeatureStage <- rbind(
  signGUSs$signSpecies$Up %>% unlist() %>% as.data.frame() %>% 
    dplyr::rename(Tax = 1) %>% tibble::rownames_to_column('Group') %>% 
    dplyr::mutate(Group=gsub("S34.*","S34",Group),Type='Up') %>% 
    dplyr::mutate(Group=gsub("S12.*","S12",Group)),
  signGUSs$signSpecies$Down %>% unlist() %>% as.data.frame() %>% 
    dplyr::rename(Tax = 1) %>% tibble::rownames_to_column('Group') %>%
    dplyr::mutate(Group=gsub("S12.*","S12",Group),Type='Down')  %>% 
    dplyr::mutate(Group=gsub("S34.*","S34",Group))) %>% 
  dplyr::mutate(Group=gsub("S0.*","S0",gsub("MP.*","MP",Group)))

p4 <- signFeatureStage %>% ggplot(aes(Group,Tax,shape=Type,fill=Type)) + 
  geom_point(size=2) + scale_shape_manual(values = c("Down"=25,"Up"=24)) + 
  scale_fill_manual(values = c("Down"="blue","Up"="red")) +
  labs(x=NULL,y=NULL) +
  ylim(rev(ggtreeOrderGUS)) +
  theme_void() +
  theme(legend.position = "top",
        legend.title = element_blank(),
        axis.line = element_line(color="black"),
        axis.text.x = element_text(angle = 45,hjust = 1,vjust=1,color="black"))

折线点图

# Calculate and visualize prevalence
calculatePrevalence <- function(inputData,groupCol){
  group <- group %>% dplyr::filter(! is.na(get(groupCol)))
  mid <- inputData
  mid[mid > 0] <- 1
  mark <- matrix(nrow = nrow(inputData),ncol = length(unique(group[,groupCol])))
  colnames(mark) <- unique(group[,groupCol])
  rownames(mark) <- rownames(inputData)
for (i in1:nrow(inputData)) {
    for (mid.group in unique(group[,groupCol])) {
      mid.group2 <- group %>% filter(get(groupCol) == mid.group) %>% rownames()
      mid.mark <- (sum( mid[i,mid.group2]) / length(mid.group2) ) * 100
      mark[i,mid.group] <- mid.mark
    }
  }
return(mark)
}

preResult <- calculatePrevalence(GUSabun_TPM[ggtreeOrderGUS,],"Stage") %>% reshape2::melt(by="row.names"
preResult$Var2 <- factor(preResult$Var2,levels = analysis.list.info$levels$Stage)

p5 <- preResult %>% ggplot(aes(value,Var1,color=Var2)) + 
  geom_vline(xintercept = c(10),linetype=2,color="grey") + 
  geom_line(group="Var2") + geom_point() + 
  facet_wrap("Var2",nrow = 1,ncol = 5) + 
  theme_classic() + xlab("Prevalence (100%)") + 
  ylab("") + 
  scale_color_manual(values = analysis.list.info$colors$Stage)  + 
  theme(legend.position = "none",
        axis.text.y = element_blank(),) + 
  ylim(rev(ggtreeOrderGUS))

拼图

(tree|p2|p3|p4|free(p5,type="label")) +
  plot_layout(widths = c(1,0.2,0.4,0.5,1.3))

关注下方公众号下回更新不迷路

本节介绍到此结束,最近依然有不少读者想购买小编之前写的R绘图文档,因此在附上一下购买方式及所能提供的服务具体如下:以往的交流群已经满员不在考虑建立新群,因此只会通过公众号后台发送腾讯文档链接

1.内容更新截止2025年/12月/31日结束,后期无任何更新。
2.只分享案例内容,无答疑。
3.内容只包含R语言绘图内容,无任何生信分析全套代码,有过高期望者不要买。
4.有充足时间者可以考虑继续等待,2026年会正式出版R语言绘图相关的书籍

若上述几点都可以接受,欢迎点击下方所付费购买此文档,购买后会通过公众号后台发送文档链接,通过在线腾讯文档分析案例清单,内附案例下载链接,案例代码文件夹支持下载到本地查看。
只分享文档查看链接,无交流群,不添加任何联系方式 购买前建议后台沟通了解。

R绘图进阶实战|ggplot2数百经典案例

在线目录大纲

通过腾讯文档在线编辑,小编可实时进行bug注解,只要保证R软件及R包版本号与案例所示一致,无须担心代码运行报错问题。在线文档内附有案例代码下载链接

html注释文档

从2025年起的除极个别案例外,其余案例图提供下方所示html注释文档非常方便查看


【声明】内容源于网络
0
0
R语言数据分析指南
R语言重症爱好者,喜欢绘制各种精美的图表,喜欢的小伙伴可以关注我,跟我一起学习
内容 1180
粉丝 0
R语言数据分析指南 R语言重症爱好者,喜欢绘制各种精美的图表,喜欢的小伙伴可以关注我,跟我一起学习
总阅读1.8k
粉丝0
内容1.2k