CAMOIP的数据主要来自TCGA,可用于分析ICI-Treated和TCGA临床队列的表达数据。通过该数据库,可以探索TCGA和ICI-Treated中基因表达的差异。此处,我们采用TCGA Cohort的数据展示KIRC患者中AQP9与相关基因的表达情况。
我们以GSE15111的数据为例复现,从数据下载、数据分组,到差异可视化展示。
####----GSE11151_ccRCC----####rm(list = ls())getwd()#library(GEOquery)#data <- getGEO("GSE46699",destdir = ".",AnnotGPL = F, getGPL = F)exprset <- read.table(file = "GSE11151_series_matrix.txt.gz",sep = "\t",header = T,quote = '""',fill=T,comment.char = "!")rownames(exprset) <- exprset[,1]exprset <- exprset[,-1]####----002_Expression_log2_transform----####ex <- exprsetqx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))LogC <- (qx[5] > 100) ||(qx[6]-qx[1] > 50 && qx[2] > 0) ||(qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)if (LogC) {ex[which(ex <= 0)] <- NaNexprset <- log2(ex)print("log2 transform finished")}else{print("log2 transform not needed")}library(limma)boxplot(exprset,outline=FALSE, notch=T, las=2)exprSet=normalizeBetweenArrays(exprset)boxplot(exprSet,outline=FALSE, notch=T, las=2)exprSet <- as.data.frame(exprSet)####----003_probe_ID_translation----####platformMap <- data.table::fread("platformMap.txt",data.table = F)index <-"GPL570"paste0(platformMap$bioc_package[grep(index,platformMap$gpl)],".db")if(!requireNamespace("hgu133plus2.db")){options(BioC_mirror="https://mirrors.ustc.edu.cn/bioc/")BiocManager::install("hgu133plus2.db",update = F,ask = F)}library("hgu133plus2.db")ids <- toTable(get("hgu133plus2SYMBOL"))ids <- idslength(unique(ids$probe_id))length(unique(ids$symbol))tail(sort(table(ids$symbol)))table(sort(table(ids$symbol)))plot(table(sort(table(ids$symbol))))####----004_probe_ID_to_Expression----####library(dplyr)library(tibble)exprSet <- exprSet %>%rownames_to_column("probe_id")%>%inner_join(ids,by="probe_id") %>%select(-probe_id) %>%select(symbol,everything()) %>%mutate(rowMean =rowMeans(.[,-1])) %>%arrange(desc(rowMean)) %>%distinct(symbol,.keep_all = T) %>%select(-rowMean) %>%column_to_rownames("symbol")save(exprSet,file = "exprSet11151.Rdata")exprSet <- as.data.frame(exprSet)####----Sample_group (all the samples included)----####Normal <- data.table::fread("GSE15111_N.txt",data.table = F)Normal <- as.character(Normal)write(Normal,file = "Normal_sample.txt")Tumor <- data.table::fread("GSE11151_T.txt",data.table = F)Tumor <- as.character(Tumor)write(Tumor,file = "Tumor_sample.txt")library(dplyr)library(ggplot2)DEGs <- select(exprSet,c("GSM281311", "GSM281312", "GSM281314", "GSM281315", "GSM281316","GSM281285", "GSM281286", "GSM281287", "GSM281288", "GSM281289", "GSM281290", "GSM281291", "GSM281292", "GSM281293", "GSM281294", "GSM281295", "GSM281296", "GSM281297", "GSM281298", "GSM281299", "GSM281300", "GSM281301", "GSM281302", "GSM281303", "GSM281304", "GSM281305", "GSM281306", "GSM281307", "GSM281308", "GSM281309", "GSM281310"))group1 <- c(rep("normal",5),rep("cancer",26))group1 <- factor(group1,levels = c("normal","cancer"))design <- model.matrix(~0 + group1)colnames(design) <- levels(group1)designcontrast.matrix <- makeContrasts( "cancer - normal", levels = design)fit1 <- lmFit(DEGs,design)fit1 <- contrasts.fit(fit1, contrast.matrix)fit2 <- eBayes(fit1)allDiff=topTable(fit2,adjust='fdr',coef=1,number=10000)write.table(allDiff,file="alldiff.xls",sep="\t",quote=F)allLimma=allDiffallLimma=allLimma[order(allLimma$logFC),]allLimma=rbind(Gene=colnames(allLimma),allLimma)write.table(allLimma,file="GSE15111_limmaTab.txt",sep="\t",quote=F,col.names=F)
logFoldChange=1adjustP=0.05diffSig <- allDiff[with(allDiff, (abs(logFC)>logFoldChange & adj.P.Val < adjustP )), ]write.table(diffSig,file="diff.xls",sep="\t",quote=F)diffUp <- allDiff[with(allDiff, (logFC>logFoldChange & adj.P.Val < adjustP )), ]write.table(diffUp,file="up.xls",sep="\t",quote=F)diffDown <- allDiff[with(allDiff, (logFC<(-logFoldChange) & adj.P.Val < adjustP )), ]write.table(diffDown,file="down.xls",sep="\t",quote=F)hmExp=DEGs[rownames(diffSig),]diffExp=rbind(id=colnames(hmExp),hmExp)write.table(diffExp,file="diffExp.txt",sep="\t",quote=F,col.names=F)xMax=max(-log10(allDiff$adj.P.Val))yMax=max(abs(allDiff$logFC))library(ggplot2)allDiff$change <- ifelse(allDiff$adj.P.Val < 0.05 & abs(allDiff$logFC) > 1,ifelse(allDiff$logFC > 1,'UP','DOWN'),'NOT')ggplot(data= allDiff, aes(x = -log10(adj.P.Val), y = logFC, color = change)) +geom_point(alpha=0.8, size = 1) +theme_bw(base_size = 15) +theme(plot.title=element_text(hjust=0.5), # 标题居中panel.grid.minor = element_blank(),panel.grid.major = element_blank()) + # 网格线设置为空白geom_hline(yintercept= 0 ,linetype= 2 ) +scale_color_manual(name = "",values = c("red", "green", "black"),limits = c("UP", "DOWN", "NOT")) +xlim(0,xMax) +ylim(-yMax,yMax) +labs(title = 'Volcano', x = '-Log10(adj.P.Val)', y = 'LogFC')
topDEGs = DEGs[rownames(diffSig)[1:15],]annotation_col = data.frame(group = group1)rownames(annotation_col) = colnames(DEGs)library(pheatmap)pheatmap(topDEGs,annotation_col = annotation_col,color = colorRampPalette(c("blue", "gray", "red"))(50),fontsize = 7)
TarDEGs <- as.data.frame(t(DEGs))DF1 <- select(TarDEGs,c("AQP9"))group <- as.data.frame(c(rep("normal",5),rep("cancer",26)))names(group)[1] <- "group"DF2 <- cbind(DF1,group)library(ggplot2)ggplot(data = DF2,aes(x=group,y=AQP9,fill=group))+geom_boxplot()+geom_point()+theme_bw()
# Basic violin plotggplot(data = DF2,aes(x=group,y=AQP9,fill=group)) +geom_violin()
# Basic dot plotggplot(data = DF2,aes(x=group,y=AQP9,fill=group)) +geom_dotplot(binaxis='y', stackdir='center')

