An example of the RNA-ATAC-seq integration based on the gene ativity scores.

Load libraries

library(conos)
## Loading required package: Matrix
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(pagoda2)
## 
library(parallel)
library(ggplot2)

Load example data and annotations:

load(url("http://pklab.med.harvard.edu/peterk/conos/atac_rna/data.RData"))

The data contains lung count matrices from three RNA datasets (two lung samples from Tabula Muris, one lung sample from Han et al.), and two lung sci-ATAC-seq replicates from Cusanovich et al. For sci-ATAC-seq, the matrix values correspond to gene activity scores, as estimated by the authors using Cicero.

str(data,1)
## List of 5
##  $ RNA_SS2:Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
##  $ RNA_10x:Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
##  $ RNA_mw :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
##  $ ATAC_1 :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
##  $ ATAC_2 :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots

Standard Conos processing applies. We first run pre-processing using pagoda2:

p2l <- mclapply(data,basicP2proc,n.odgenes=3e3,min.cells.per.gene=-1,nPcs=30,make.geneknn=F,n.cores=30,mc.cores=1)
## 1716 cells, 23433 genes; normalizing ... using plain model winsorizing ... log scale ... done.
## calculating variance fit ... using gam 920 overdispersed genes ... 920 persisting ... done.
## running PCA using 3000 OD genes .... done
## Estimating embeddings.
## running tSNE using 30 cores:
## 1520 cells, 23433 genes; normalizing ... using plain model winsorizing ... log scale ... done.
## calculating variance fit ... using gam 617 overdispersed genes ... 617 persisting ... done.
## running PCA using 3000 OD genes .... done
## Estimating embeddings.
## running tSNE using 30 cores:
## 2835 cells, 20765 genes; normalizing ... using plain model winsorizing ... log scale ... done.
## calculating variance fit ... using gam 117 overdispersed genes ... 117 persisting ... done.
## running PCA using 3000 OD genes .... done
## Estimating embeddings.
## running tSNE using 30 cores:
## 1486 cells, 20783 genes; normalizing ... using plain model winsorizing ... log scale ... done.
## calculating variance fit ... using gam 923 overdispersed genes ... 923 persisting ... done.
## running PCA using 3000 OD genes .... done
## Estimating embeddings.
## running tSNE using 30 cores:
## 1549 cells, 20783 genes; normalizing ... using plain model winsorizing ... log scale ... done.
## calculating variance fit ... using gam 889 overdispersed genes ... 889 persisting ... done.
## running PCA using 3000 OD genes .... done
## Estimating embeddings.
## running tSNE using 30 cores:

Now create Conos object, build and embed the graph:

l.con <- Conos$new(p2l,n.cores=30)
l.con$buildGraph(k=15,k.self=5,k.self.weigh=0.01,ncomps=30,n.odgenes=5e3,space='PCA') 
## found 0 out of 10 cached PCA  space pairs ... running 10 additional PCA  space pairs  done
## inter-sample links using  mNN   done
## local pairs local pairs  done
## building graph ..done
l.con$findCommunities(resolution=1.5)
l.con$embedGraph(alpha=1/2);
## Estimating embeddings.

Plot RNA-seq and ATAC-seq cells separately (on the same embedding), using the annotations provided in the Tabula Muris (for RNA) and sci-ATAC manuscript (for ATAC)

p1 <- l.con$plotGraph(font.size=c(3,5),title='conos clusters',alpha=0.2) #+ annotate("text",  x=-Inf, y = Inf, label = "clusters", vjust=1, hjust=0)
p2 <- l.con$plotGraph(groups=rna.annotation,mark.groups=T,alpha=0.2,plot.na=F,title='annotation: RNA',font.size=c(3,5))+xlim(range(l.con$embedding[,1]))+ylim(range(l.con$embedding[,2]));
p2c <- l.con$plotGraph(groups=atac.annotation,mark.groups=T,alpha=0.2,plot.na=F,title='annotation: ATAC',font.size=c(3,5))+xlim(range(l.con$embedding[,1]))+ylim(range(l.con$embedding[,2]));
p3 <- l.con$plotGraph(color.by='sample',mark.groups=F,alpha=0.1,show.legend=T,title='platform',raster=T)+theme(legend.position=c(1,1),legend.justification = c(1,1))+guides(color=guide_legend(ncol=2,override.aes = list(size=3,alpha=0.8)))

pp <- cowplot::plot_grid(plotlist=list(p2,p2c,p1,p3), ncol=2) 
pp