An example of the RNA-ATAC-seq integration based on the gene ativity scores.
Load libraries
library(conos)
## Loading required package: Matrix
## Loading required package: igraph
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(pagoda2)
##
library(parallel)
library(ggplot2)
Load example data and annotations:
load(url("http://pklab.med.harvard.edu/peterk/conos/atac_rna/data.RData"))
The data contains lung count matrices from three RNA datasets (two lung samples from Tabula Muris, one lung sample from Han et al.), and two lung sci-ATAC-seq replicates from Cusanovich et al. For sci-ATAC-seq, the matrix values correspond to gene activity scores, as estimated by the authors using Cicero.
str(data,1)
## List of 5
## $ RNA_SS2:Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## $ RNA_10x:Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## $ RNA_mw :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## $ ATAC_1 :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## $ ATAC_2 :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
Standard Conos processing applies. We first run pre-processing using pagoda2:
p2l <- mclapply(data,basicP2proc,n.odgenes=3e3,min.cells.per.gene=-1,nPcs=30,make.geneknn=F,n.cores=30,mc.cores=1)
## 1716 cells, 23433 genes; normalizing ... using plain model winsorizing ... log scale ... done.
## calculating variance fit ... using gam 920 overdispersed genes ... 920 persisting ... done.
## running PCA using 3000 OD genes .... done
## Estimating embeddings.
## running tSNE using 30 cores:
## 1520 cells, 23433 genes; normalizing ... using plain model winsorizing ... log scale ... done.
## calculating variance fit ... using gam 617 overdispersed genes ... 617 persisting ... done.
## running PCA using 3000 OD genes .... done
## Estimating embeddings.
## running tSNE using 30 cores:
## 2835 cells, 20765 genes; normalizing ... using plain model winsorizing ... log scale ... done.
## calculating variance fit ... using gam 117 overdispersed genes ... 117 persisting ... done.
## running PCA using 3000 OD genes .... done
## Estimating embeddings.
## running tSNE using 30 cores:
## 1486 cells, 20783 genes; normalizing ... using plain model winsorizing ... log scale ... done.
## calculating variance fit ... using gam 923 overdispersed genes ... 923 persisting ... done.
## running PCA using 3000 OD genes .... done
## Estimating embeddings.
## running tSNE using 30 cores:
## 1549 cells, 20783 genes; normalizing ... using plain model winsorizing ... log scale ... done.
## calculating variance fit ... using gam 889 overdispersed genes ... 889 persisting ... done.
## running PCA using 3000 OD genes .... done
## Estimating embeddings.
## running tSNE using 30 cores:
Now create Conos object, build and embed the graph:
l.con <- Conos$new(p2l,n.cores=30)
l.con$buildGraph(k=15,k.self=5,k.self.weigh=0.01,ncomps=30,n.odgenes=5e3,space='PCA')
## found 0 out of 10 cached PCA space pairs ... running 10 additional PCA space pairs done
## inter-sample links using mNN done
## local pairs local pairs done
## building graph ..done
l.con$findCommunities(resolution=1.5)
l.con$embedGraph(alpha=1/2);
## Estimating embeddings.
Plot RNA-seq and ATAC-seq cells separately (on the same embedding), using the annotations provided in the Tabula Muris (for RNA) and sci-ATAC manuscript (for ATAC)
p1 <- l.con$plotGraph(font.size=c(3,5),title='conos clusters',alpha=0.2) #+ annotate("text", x=-Inf, y = Inf, label = "clusters", vjust=1, hjust=0)
p2 <- l.con$plotGraph(groups=rna.annotation,mark.groups=T,alpha=0.2,plot.na=F,title='annotation: RNA',font.size=c(3,5))+xlim(range(l.con$embedding[,1]))+ylim(range(l.con$embedding[,2]));
p2c <- l.con$plotGraph(groups=atac.annotation,mark.groups=T,alpha=0.2,plot.na=F,title='annotation: ATAC',font.size=c(3,5))+xlim(range(l.con$embedding[,1]))+ylim(range(l.con$embedding[,2]));
p3 <- l.con$plotGraph(color.by='sample',mark.groups=F,alpha=0.1,show.legend=T,title='platform',raster=T)+theme(legend.position=c(1,1),legend.justification = c(1,1))+guides(color=guide_legend(ncol=2,override.aes = list(size=3,alpha=0.8)))
pp <- cowplot::plot_grid(plotlist=list(p2,p2c,p1,p3), ncol=2)
pp