失眠网 > 单细胞转录组测序分析----多样本去除批次效应

单细胞转录组测序分析----多样本去除批次效应

时间：2020-12-24 11:29:18

上期我们讲了使用Seurat分析单个样本，那么2个或多个样本的分析会有哪些不同呢？单细胞转录组数据中一个重要的问题就在于批次效应，任何一个试验步骤都容易带来批次效应，多个样本的比较分析更甚。Seurat专门开发了一种能够比较多个样本的分析方法，我们一起来看看。

假设有2个样本，分别对2个样本进行数据的预处理及标准化，类似于单个样本的分析：

#样本1matrix <- Read10X_h5("filtered_gene_bc_matrices_h5.h5")filter_10x_object <-CreateSeuratObject(raw.data = matrix,min.cells = 3)filter_10x_object@meta.data$group <- "sample_1"mito.genes <- grep(pattern = "^MT-", x = rownames(x = filter_10x_object@data), value = TRUE)percent.mito <- Matrix::colSums(filter_10x_object@raw.data[mito.genes, ])/Matrix::colSums(filter_10x_object@raw.data)filter_10x_object <- AddMetaData(object = filter_10x_object, metadata = percent.mito,col.name = "percent.mito")filter_10x_object <- FilterCells(object = filter_10x_object, subset.names = c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), high.thresholds = c(2500, 0.05))filter_10x_object <- NormalizeData(object = filter_10x_object, normalization.method = "LogNormalize", scale.factor = 10000)filter_10x_object<- ScaleData(filter_10x_object, vars.to.regress = c("nUMI", "percent.mito"), display.progress = F)filter_10x_object <- FindVariableGenes(filter_10x_object, do.plot = F)g.1 <- head(rownames(filter_10x_object@hvg.info), 1000)#样本2matrix2 <- Read10X_h5("filtered_gene_bc_matrices_h5.h5")filter_10x_object_2<-CreateSeuratObject(raw.data = matrix2,min.cells = 3)filter_10x_object_2@meta.data$group <- "sample_2"mito.genes <- grep(pattern = "^MT-", x = rownames(x = filter_10x_object_2@data), value = TRUE)percent.mito <- Matrix::colSums(filter_10x_object_2@raw.data[mito.genes, ])/Matrix::colSums(filter_10x_object_2@raw.data)filter_10x_object_2 <- AddMetaData(object = filter_10x_object_2, metadata = percent.mito,col.name = "percent.mito")filter_10x_object_2<- FilterCells(object = filter_10x_object_2, subset.names = c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), high.thresholds = c(2500, 0.05))filter_10x_object_2<- NormalizeData(object = filter_10x_object_2, normalization.method = "LogNormalize", scale.factor = 10000)filter_10x_object_2<- ScaleData(filter_10x_object_2, vars.to.regress = c("nUMI", "percent.mito"), display.progress = F)filter_10x_object_2<- FindVariableGenes(filter_10x_object_2, do.plot = F)g.2 <- head(rownames(filter_10x_object_2@hvg.info), 1000)#选取2个样本高可变基因的交集作为后续分析的基因genes.use <- unique(c(g.1, g.2))genes.use <- intersect(genes.use, rownames(filter_10x_object@scale.data))genes.use <- intersect(genes.use, rownames(filter_10x_object_2@scale.data))#执行RunCCA函数，主要目的是鉴定2个数据集共有的变异源，同时也会将2个对象合并成一个对象，#并进行数据的标准化和归一化（scale）combined <- RunCCA(filter_10x_object, filter_10x_object_2, genes.use = genes.use, = 30,add.cell.id1="sample_1",add.cell.id2="sample_2")#如果是2个以上的样本，则需要使用RunMultiCCA函数#类似于PCA分析的PC选择，执行RunCCA后也需要选择合适的CC个数进行后续分析p3 <- MetageneBicorPlot(combined, grouping.var = "group", dims.eval = 1:30, display.progress = FALSE)

8.jpg

执行完上述步骤后进行align the CCA subspaces

combined <- AlignSubspace(combined, reduction.type = "cca", grouping.var = "group", dims.align = 1:20)#执行完此步骤后会返回一个新的降维数据用于后续聚类分析combined <- RunTSNE(combined, reduction.use = "cca.aligned", dims.use = 1:20, do.fast = T)combined <- FindClusters(combined, reduction.type = "cca.aligned", resolution = 0.6, dims.use = 1:20)#接下去就根据自己的分析需求进行多组数据比较分析了，这里就不介绍了注：Seurat的多个样本合并分析，与10x官网的cellranger aggr分析相差很大。

如果觉得《单细胞转录组测序分析----多样本去除批次效应》对你有帮助，请点赞、收藏，并留下你的观点哦！

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。