rm(list = ls()) options(stringsAsFactors = F) getwd() path='G:\\silicosis\\geo\\GSE82158_Bulk-seq_bleomycin_IM_AM_macrophage_ontogeny\\GSE82158_macrophage_ontogeny_norm_counts' setwd(path) getwd() getwd() rt=read.table("G:/silicosis/geo/GSE82158_Bulk-seq_bleomycin_IM_AM_macrophage_ontogeny/GSE82158_macrophage_ontogeny_norm_counts.txt.gz", sep = '\t',header = T,comment.char = '!') head(rt)[1:3,1:4] rownames(rt)=rt$Symbol dat=rt[,-1] head(dat)[1:3,1:4] dat[1:4,1:4] #查看dat这个矩阵的1至4行和1至4列,逗号前为行,逗号后为列
boxplot(dat[,1:10],las=2)
mydat=dat[,grep(colnames(dat),pattern = '^C8flx')]
coldata=DataFrame(row.names = colnames(mydat),
treatment=c(rep("IM_D14",4),
rep("IM_D19",5),
rep('TRAM_D0',3),
rep('TRAM_D14',4),
rep('TRAM_D19',5),
rep('Mo-AM_D14',4),
rep('Mo-AM_D19',5),
rep('mono_D14',4),
rep('mono_D19',5) ))
mydata=SummarizedExperiment::SummarizedExperiment(assays = mydat,
colData = coldata)
colData(mydata)
getwd()
#save(mydata,file ="G:/silicosis/geo/GSE82158_Bulk-seq_bleomycin_IM_AM_macrophage_ontogeny/GSE82158_macrophage_ontogeny_norm_counts/proper.rds" )
load("G:/silicosis/geo/GSE82158_Bulk-seq_bleomycin_IM_AM_macrophage_ontogeny/GSE82158_macrophage_ontogeny_norm_counts/proper.rds")
library(SummarizedExperiment)
library(stringr)
pd=pData(mydata)
str_split(colnames(mydata),"_",simplify = T)[,2:3]
paste0(str_split(colnames(mydata),"_",simplify = T)[,2],'_',str_split(colnames(mydata),"_",simplify = T)[,3])
g=paste0(str_split(colnames(mydata),"_",simplify = T)[,2],'_',str_split(colnames(mydata),"_",simplify = T)[,3])
table(g)
g
dat=mydata[,g %in% c('Sfhi_D19','Sflow_D19')]
group_list=g[g %in% c('Sfhi_D19','Sflow_D19')]
group_list
table(group_list)
dat[1:4,1:4]
#dat=assay(dat)
dat[1:4,1:4]
ncol(dat)
getwd()
source('G:/r/r language introduction_2020_12_05/R in action/2020_1205_geo/GEO-master/airway_RNAseq/run_DEG_RNA-seq.R')
# 这个 run_DEG_RNAseq 函数,是我自定义的
# 主要是包装了3个RNA-seq数据分析的R包
# 以及部分可视化函数
#exprSet=exprSet[,colnames(exprSet) %in% c("TRAM_D0","Mo-AM_D14")]
#group_list=group_list[colnames(exprSet) %in% c("TRAM_D0","Mo-AM_D14")]
dat=as.integer(dat)
dat=as.matrix(dat)
table(is.na(dat))
dat[1:3,1:3]
colnames(dat)
run_DEG_RNAseq(dat,group_list,
g1="Sfhi_D19",g2="Sflow_D19" #,pro = "silica"
)
## 挑选一些感兴趣的临床表型。
library(stringr)
if(1==1){
str_split(colnames(dat),'_',simplify = T)[,2:4]
for(eachelement in str_split(colnames(dat),'_',simplify = T)[,2:4]){
print(paste(eachelement,collapse = "_"))
}
paste(str_split(colnames(dat),'_',simplify = T)[,2:4],collapse = '_')
paste(c("Sflow","D14","2"),collapse="_")
g=str_split(colnames(dat),'_',simplify = T)[,2:4]
}
colnames(dat)
mydat=dat[,grep(colnames(dat),pattern = '^C8flx')]
mydat[1:4,1:4]
colnames(mydat)
#改名字列名
grep(colnames(mydat),pattern = 'IM_D14')
colnames(mydat)[grep(colnames(mydat),pattern = 'IM_D14')]='IM_D14'
colnames(mydat)[grep(colnames(mydat),pattern = 'IM_D19')]='IM_D19'
colnames(mydat)[grep(colnames(mydat),pattern = 'mono_D14')]='mono_D14'
colnames(mydat)[grep(colnames(mydat),pattern = 'mono_D19')]='mono_D19'
colnames(mydat)[grep(colnames(mydat),pattern = 'Sfhi_D0')]='TRAM_D0'
colnames(mydat)[grep(colnames(mydat),pattern = 'Sfhi_D14')]='TRAM_D14'
colnames(mydat)[grep(colnames(mydat),pattern = 'Sfhi_D19')]='TRAM_D19'
colnames(mydat)[grep(colnames(mydat),pattern = 'Sflow_D14')]='Mo-AM_D14'
colnames(mydat)[grep(colnames(mydat),pattern = 'Sflow_D19')]='Mo-AM_D19'
mydat[1:3,1:4]
table(colnames(mydat))
library(SummarizedExperiment)
myexpreset=SummarizedExperiment(assays =list(counts=mydat),
colData = DataFrame(row.names = colnames(mydat),
treatment=c(rep("IM_D14",4),
rep("IM_D19",5),
rep('TRAM_D0',3),
rep('TRAM_D14',4),
rep('TRAM_D19',5),
rep('Mo-AM_D14',4),
rep('Mo-AM_D19',5),
rep('mono_D14',4),
rep('mono_D19',5)
)
)
)
getwd()
group_list=colData(myexpreset)
pData(myexpreset)
exprSet=assay(myexpreset)
table(group_list)
colnames(myexpreset)
#save(myexpreset,file = "G:/silicosis/geo/GSE82158_Bulk-seq_bleomycin_IM_AM_macrophage_ontogeny/GSE82158_macrophage_ontogeny_norm_counts/myexprset_raw.rds")
load("G:/silicosis/geo/GSE82158_Bulk-seq_bleomycin_IM_AM_macrophage_ontogeny/GSE82158_macrophage_ontogeny_norm_counts/myexprset_raw.rds")
exprSet[1:4,1:4]
as.data.frame(exprSet[1:4,1:4])
t(as.data.frame(exprSet[1:4,1:4]))
myexpr=t(as.data.frame(exprSet))
head(myexpr)[1:4,2:3]
rownames(myexpr)
myexpr=as.data.frame(myexpr)
myexpr$'type'=ifelse(grepl(rownames(myexpr),pattern = 'IM_D14'),'IM_D14',
ifelse(grepl(rownames(myexpr),pattern = 'IM_D19'),'IM_D19',
ifelse(grepl(rownames(myexpr),pattern = 'Mo.AM_D19'),'Mo.AM_D19',
ifelse(grepl(rownames(myexpr),pattern = 'Mo.AM_D14'),'Mo.AM_D14',
ifelse(grepl(rownames(myexpr),pattern = 'mono_D19'),'mono_D19',
ifelse(grepl(rownames(myexpr),pattern = 'mono_D14'),'mono_D14',
ifelse(grepl(rownames(myexpr),pattern = 'TRAM_D19'),'TRAM_D19',
ifelse(grepl(rownames(myexpr),pattern = 'TRAM_D14'),'TRAM_D14',
'TRAM_D0')))
)))))
library(ggplot2)
ggplot2::ggplot(myexpr,aes(type,ENSMUSG00000002603,fill=type))+geom_boxplot()
ggplot2::ggplot(myexpr,aes(type,ENSMUSG00000014599,fill=type))+geom_boxplot()
exprSet['ENSMUSG00000002603',]
getwd()
openxlsx::write.xlsx(exprSet,'exprset.xlsx',row.names=TRUE)
t(exprSet['ENSMUSG00000002603',])
boxplot(t(exprSet['ENSMUSG00000002603',]))
boxplot(colnames(mydat),exprSet['ENSMUSG00000002603',])
data_for_ggplot=transform.data.frame(exprSet)
ggplot2::ggplot(data=exprSet['ENSMUSG00000002603',],
mapping = aes(x=colnames(mydat)))+boxplot(x=colnames(mydat))
library(ggplot2)
data(diamonds)
head(diamonds)