library(pagoda2)
Loading required package: Matrix
Loading required package: igraph
package ‘igraph’ was built under R version 3.5.3
Attaching package: ‘igraph’

The following objects are masked from ‘package:stats’:

    decompose, spectrum

The following object is masked from ‘package:base’:

    union
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:igraph’:

    as_data_frame, groups, union

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(conos)
library(parallel)
library(cowplot)
Loading required package: ggplot2

Attaching package: ‘cowplot’

The following object is masked from ‘package:ggplot2’:

    ggsave
library(ggrepel)
library(Matrix)
library(data.table)
data.table 1.11.8  Latest news: r-datatable.com

Attaching package: ‘data.table’

The following objects are masked from ‘package:dplyr’:

    between, first, last
# some extra code for the comparative analysis here
source("/home/pkharchenko/m/pavan/DLI/conp2.r")

Load data

load("normal.RData")

cdl <- lapply(pagoda2:::sn(names(cdl)),function(n) { x <- cdl[[n]]; colnames(x) <- paste(n,colnames(x),sep='_'); x})
cdml <- lapply(pagoda2:::sn(names(cdl)),function(n) { x <- cdml[[n]]; rownames(x) <- paste(n,rownames(x),sep='_'); x})
cell.type.f <- as.factor(setNames(unlist(lapply(cdml,function(x) x$cell_line)),unlist(lapply(cdml,rownames))))

Integrate with Conos

pre-processing

matl.p2 <- lapply(cdl, basicP2proc, n.cores=30, min.cells.per.gene=0, n.odgenes=2e3, get.largevis=T, make.geneknn=FALSE,get.tsne=T)

Integrate

con <- Conos$new(matl.p2,n.cores=30)
con$buildGraph(k=15, k.self=5, space='PCA', ncomps=30, n.odgenes=2000, matching.method='mNN', metric='angular', verbose=TRUE)

Cluster and generate a 2D ebmedding

con$findCommunities(r=1) # 1.5 means slightly higher than default resolution 
con$embedGraph(method='UMAP'); 
con$misc$embeddings <- list(UMAP=con$embedding)
con$misc$embeddings$largeVis <- con$embedGraph()

Take a look:

alpha <- 0.2; size <- 0.8;
p1 <- con$plotGraph(title='clusters',alpha=alpha,size=size);
p2 <- con$plotGraph(color.by='sample',title='samples',mark.groups=F,alpha=alpha,size=size);
p3 <- con$plotGraph(groups=cell.type.f,alpha=alpha,size=size,title='annotation',font.size=c(8,8))
p4 <- con$plotGraph(gene='ENSG00000148773',alpha=0.8,size=size,title='Mki67')
cowplot::plot_grid(plotlist=list(p3,p2,p1,p4),nrow=2)

Look at gene scatter plots

mat <- con$getJointCountMatrix(raw=T)
mat <- mat/rowSums(mat)*1e6

Calculate average magnitudes per cell type per platform

cta <- tapply(rownames(mat),list(cell.type.f[rownames(mat)],sample.f[rownames(mat)]),function(x) colMeans(mat[x,]))
odf <- do.call(rbind,tapply(rownames(mat),list(cell.type.f[rownames(mat)],sample.f[rownames(mat)]),function(x) c(as.character(cell.type.f[x[1]]),as.character(sample.f[x[1]])),simplify = F))
names(cta) <- rownames(odf) <- apply(odf,1,paste,collapse='-')
sample.f <- con$getDatasetPerCell()

For every pair of platforms, draw three scatter plots comparing different techniques:

cell.lines <- unique(odf[,1])
tpairs <- combn(unique(odf[,2]),2)
i <- 2;
par(mfrow=c(1,3), mar = c(2.5,2.5,2.5,0.5), mgp = c(1.5,0.65,0), cex = 0.85)
lapply(cell.lines,function(n) {
  nn <- paste(n,tpairs[,i],sep='-')
  df <- do.call(cbind,cta[nn]);
  colnames(df) <- c('x','y')
  df <- log10(df+1);
  smoothScatter(df[,1],df[,2],xlab=nn[1],ylab=nn[2])
  abline(lm(y~x+0,data=data.frame(df)),col=2,lty=2)
})

Hihghlight outlier genes

i <- 2;
n <- cell.lines[[1]]; 
nn <- paste(n,tpairs[,i],sep='-')
df <- do.call(cbind,cta[nn]);
colnames(df) <- c('x','y')
df <- log10(df+1);
m <- lm(y~x+0,data=data.frame(df))
thr <- 1;
genes.up <- names(residuals(m))[residuals(m)>thr]
genes.down <- names(residuals(m))[residuals(m)< -1*thr]
pdf(file='platforms.pdf',width=9,height=3.2)
par(mfrow=c(1,3), mar = c(2.5,2.5,2.5,0.5), mgp = c(1.5,0.65,0), cex = 0.85)
lapply(cell.lines,function(n) {
  nn <- paste(n,tpairs[,i],sep='-')
  df <- do.call(cbind,cta[nn]);
  colnames(df) <- c('x','y')
  df <- df[rowSums(df)>0,]
  df <- log10(df+1);
  smoothScatter(df[,1],df[,2],xlab=tpairs[1,i],ylab=tpairs[2,i],main=paste(n,'cells'),colramp = function(n) adjustcolor(colorRampPalette(c("white", blues9))(n),alpha=0.7))
  abline(lm(y~x+0,data=data.frame(df)),col=1,lty=3)
  if(n=='H1975') {
    abline(1,coefficients(m)[1],col='red',lty=2)
    abline(-1,coefficients(m)[1],col='darkgreen',lty=2)
  }
  vg <- rownames(df) %in% genes.up
  points(df[vg,1],df[vg,2],col='red',pch=19,cex=0.2)
  vg <- rownames(df) %in% genes.down
  points(df[vg,1],df[vg,2],col='darkgreen',pch=19,cex=0.3)
})
[[1]]
NULL

[[2]]
NULL

[[3]]
NULL
dev.off()
null device 
          1 
LS0tCnRpdGxlOiAiRGV0ZWN0aW9uIGRpZmZlcmVuY2UgZXhhbXBsZSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkocGFnb2RhMikKbGlicmFyeShkcGx5cikKbGlicmFyeShjb25vcykKbGlicmFyeShwYXJhbGxlbCkKbGlicmFyeShjb3dwbG90KQpsaWJyYXJ5KGdncmVwZWwpCmxpYnJhcnkoTWF0cml4KQpsaWJyYXJ5KGRhdGEudGFibGUpCiMgc29tZSBleHRyYSBjb2RlIGZvciB0aGUgY29tcGFyYXRpdmUgYW5hbHlzaXMgaGVyZQpzb3VyY2UoIi9ob21lL3BraGFyY2hlbmtvL20vcGF2YW4vRExJL2NvbnAyLnIiKQpgYGAKCkxvYWQgZGF0YQoKYGBge3J9CmxvYWQoIm5vcm1hbC5SRGF0YSIpCgpjZGwgPC0gbGFwcGx5KHBhZ29kYTI6OjpzbihuYW1lcyhjZGwpKSxmdW5jdGlvbihuKSB7IHggPC0gY2RsW1tuXV07IGNvbG5hbWVzKHgpIDwtIHBhc3RlKG4sY29sbmFtZXMoeCksc2VwPSdfJyk7IHh9KQpjZG1sIDwtIGxhcHBseShwYWdvZGEyOjo6c24obmFtZXMoY2RsKSksZnVuY3Rpb24obikgeyB4IDwtIGNkbWxbW25dXTsgcm93bmFtZXMoeCkgPC0gcGFzdGUobixyb3duYW1lcyh4KSxzZXA9J18nKTsgeH0pCmBgYAoKYGBge3J9CmNlbGwudHlwZS5mIDwtIGFzLmZhY3RvcihzZXROYW1lcyh1bmxpc3QobGFwcGx5KGNkbWwsZnVuY3Rpb24oeCkgeCRjZWxsX2xpbmUpKSx1bmxpc3QobGFwcGx5KGNkbWwscm93bmFtZXMpKSkpCmBgYAoKCkludGVncmF0ZSB3aXRoIENvbm9zCgpwcmUtcHJvY2Vzc2luZwpgYGB7cn0KbWF0bC5wMiA8LSBsYXBwbHkoY2RsLCBiYXNpY1AycHJvYywgbi5jb3Jlcz0zMCwgbWluLmNlbGxzLnBlci5nZW5lPTAsIG4ub2RnZW5lcz0yZTMsIGdldC5sYXJnZXZpcz1ULCBtYWtlLmdlbmVrbm49RkFMU0UsZ2V0LnRzbmU9VCkKYGBgCgpJbnRlZ3JhdGUKYGBge3J9CmNvbiA8LSBDb25vcyRuZXcobWF0bC5wMixuLmNvcmVzPTMwKQpjb24kYnVpbGRHcmFwaChrPTE1LCBrLnNlbGY9NSwgc3BhY2U9J1BDQScsIG5jb21wcz0zMCwgbi5vZGdlbmVzPTIwMDAsIG1hdGNoaW5nLm1ldGhvZD0nbU5OJywgbWV0cmljPSdhbmd1bGFyJywgdmVyYm9zZT1UUlVFKQpgYGAKCkNsdXN0ZXIgYW5kIGdlbmVyYXRlIGEgMkQgZWJtZWRkaW5nCmBgYHtyfQpjb24kZmluZENvbW11bml0aWVzKHI9MSkgIyAxLjUgbWVhbnMgc2xpZ2h0bHkgaGlnaGVyIHRoYW4gZGVmYXVsdCByZXNvbHV0aW9uIApjb24kZW1iZWRHcmFwaChtZXRob2Q9J1VNQVAnKTsgCmNvbiRtaXNjJGVtYmVkZGluZ3MgPC0gbGlzdChVTUFQPWNvbiRlbWJlZGRpbmcpCmBgYAoKYGBge3J9CmNvbiRtaXNjJGVtYmVkZGluZ3MkbGFyZ2VWaXMgPC0gY29uJGVtYmVkR3JhcGgoKQpgYGAKCgpUYWtlIGEgbG9vazoKYGBge3IgZmlnLndpZHRoPTYsZmlnLmhlaWdodD03fQphbHBoYSA8LSAwLjI7IHNpemUgPC0gMC44OwpwMSA8LSBjb24kcGxvdEdyYXBoKHRpdGxlPSdjbHVzdGVycycsYWxwaGE9YWxwaGEsc2l6ZT1zaXplKTsKcDIgPC0gY29uJHBsb3RHcmFwaChjb2xvci5ieT0nc2FtcGxlJyx0aXRsZT0nc2FtcGxlcycsbWFyay5ncm91cHM9RixhbHBoYT1hbHBoYSxzaXplPXNpemUpOwpwMyA8LSBjb24kcGxvdEdyYXBoKGdyb3Vwcz1jZWxsLnR5cGUuZixhbHBoYT1hbHBoYSxzaXplPXNpemUsdGl0bGU9J2Fubm90YXRpb24nLGZvbnQuc2l6ZT1jKDgsOCkpCnA0IDwtIGNvbiRwbG90R3JhcGgoZ2VuZT0nRU5TRzAwMDAwMTQ4NzczJyxhbHBoYT0wLjgsc2l6ZT1zaXplLHRpdGxlPSdNa2k2NycpCmNvd3Bsb3Q6OnBsb3RfZ3JpZChwbG90bGlzdD1saXN0KHAzLHAyLHAxLHA0KSxucm93PTIpCmBgYAoKCkxvb2sgYXQgZ2VuZSBzY2F0dGVyIHBsb3RzCmBgYHtyfQptYXQgPC0gY29uJGdldEpvaW50Q291bnRNYXRyaXgocmF3PVQpCm1hdCA8LSBtYXQvcm93U3VtcyhtYXQpKjFlNgpgYGAKCkNhbGN1bGF0ZSBhdmVyYWdlIG1hZ25pdHVkZXMgcGVyIGNlbGwgdHlwZSBwZXIgcGxhdGZvcm0KYGBge3J9CmN0YSA8LSB0YXBwbHkocm93bmFtZXMobWF0KSxsaXN0KGNlbGwudHlwZS5mW3Jvd25hbWVzKG1hdCldLHNhbXBsZS5mW3Jvd25hbWVzKG1hdCldKSxmdW5jdGlvbih4KSBjb2xNZWFucyhtYXRbeCxdKSkKb2RmIDwtIGRvLmNhbGwocmJpbmQsdGFwcGx5KHJvd25hbWVzKG1hdCksbGlzdChjZWxsLnR5cGUuZltyb3duYW1lcyhtYXQpXSxzYW1wbGUuZltyb3duYW1lcyhtYXQpXSksZnVuY3Rpb24oeCkgYyhhcy5jaGFyYWN0ZXIoY2VsbC50eXBlLmZbeFsxXV0pLGFzLmNoYXJhY3RlcihzYW1wbGUuZlt4WzFdXSkpLHNpbXBsaWZ5ID0gRikpCm5hbWVzKGN0YSkgPC0gcm93bmFtZXMob2RmKSA8LSBhcHBseShvZGYsMSxwYXN0ZSxjb2xsYXBzZT0nLScpCnNhbXBsZS5mIDwtIGNvbiRnZXREYXRhc2V0UGVyQ2VsbCgpCmBgYAoKCkZvciBldmVyeSBwYWlyIG9mIHBsYXRmb3JtcywgZHJhdyB0aHJlZSBzY2F0dGVyIHBsb3RzIGNvbXBhcmluZyBkaWZmZXJlbnQgdGVjaG5pcXVlczoKYGBge3J9CmNlbGwubGluZXMgPC0gdW5pcXVlKG9kZlssMV0pCnRwYWlycyA8LSBjb21ibih1bmlxdWUob2RmWywyXSksMikKYGBgCgpgYGB7ciBmaWcuaGVpZ2h0PTMsZmlnLndpZHRoPTl9CmkgPC0gMjsKcGFyKG1mcm93PWMoMSwzKSwgbWFyID0gYygyLjUsMi41LDIuNSwwLjUpLCBtZ3AgPSBjKDEuNSwwLjY1LDApLCBjZXggPSAwLjg1KQpsYXBwbHkoY2VsbC5saW5lcyxmdW5jdGlvbihuKSB7CiAgbm4gPC0gcGFzdGUobix0cGFpcnNbLGldLHNlcD0nLScpCiAgZGYgPC0gZG8uY2FsbChjYmluZCxjdGFbbm5dKTsKICBjb2xuYW1lcyhkZikgPC0gYygneCcsJ3knKQogIGRmIDwtIGxvZzEwKGRmKzEpOwogIHNtb290aFNjYXR0ZXIoZGZbLDFdLGRmWywyXSx4bGFiPW5uWzFdLHlsYWI9bm5bMl0pCiAgYWJsaW5lKGxtKHl+eCswLGRhdGE9ZGF0YS5mcmFtZShkZikpLGNvbD0yLGx0eT0yKQp9KQpgYGAKCkhpaGdobGlnaHQgb3V0bGllciBnZW5lcwpgYGB7cn0KaSA8LSAyOwpuIDwtIGNlbGwubGluZXNbWzFdXTsgCm5uIDwtIHBhc3RlKG4sdHBhaXJzWyxpXSxzZXA9Jy0nKQpkZiA8LSBkby5jYWxsKGNiaW5kLGN0YVtubl0pOwpjb2xuYW1lcyhkZikgPC0gYygneCcsJ3knKQpkZiA8LSBsb2cxMChkZisxKTsKbSA8LSBsbSh5fngrMCxkYXRhPWRhdGEuZnJhbWUoZGYpKQp0aHIgPC0gMTsKZ2VuZXMudXAgPC0gbmFtZXMocmVzaWR1YWxzKG0pKVtyZXNpZHVhbHMobSk+dGhyXQpnZW5lcy5kb3duIDwtIG5hbWVzKHJlc2lkdWFscyhtKSlbcmVzaWR1YWxzKG0pPCAtMSp0aHJdCmBgYAoKYGBge3IgZmlnLmhlaWdodD0yLjIsZmlnLndpZHRoPTZ9CnBkZihmaWxlPSdwbGF0Zm9ybXMucGRmJyx3aWR0aD05LGhlaWdodD0zLjIpCnBhcihtZnJvdz1jKDEsMyksIG1hciA9IGMoMi41LDIuNSwyLjUsMC41KSwgbWdwID0gYygxLjUsMC42NSwwKSwgY2V4ID0gMC44NSkKbGFwcGx5KGNlbGwubGluZXMsZnVuY3Rpb24obikgewogIG5uIDwtIHBhc3RlKG4sdHBhaXJzWyxpXSxzZXA9Jy0nKQogIGRmIDwtIGRvLmNhbGwoY2JpbmQsY3RhW25uXSk7CiAgY29sbmFtZXMoZGYpIDwtIGMoJ3gnLCd5JykKICBkZiA8LSBkZltyb3dTdW1zKGRmKT4wLF0KICBkZiA8LSBsb2cxMChkZisxKTsKICBzbW9vdGhTY2F0dGVyKGRmWywxXSxkZlssMl0seGxhYj10cGFpcnNbMSxpXSx5bGFiPXRwYWlyc1syLGldLG1haW49cGFzdGUobiwnY2VsbHMnKSxjb2xyYW1wID0gZnVuY3Rpb24obikgYWRqdXN0Y29sb3IoY29sb3JSYW1wUGFsZXR0ZShjKCJ3aGl0ZSIsIGJsdWVzOSkpKG4pLGFscGhhPTAuNykpCiAgYWJsaW5lKGxtKHl+eCswLGRhdGE9ZGF0YS5mcmFtZShkZikpLGNvbD0xLGx0eT0zKQogIGlmKG49PSdIMTk3NScpIHsKICAgIGFibGluZSgxLGNvZWZmaWNpZW50cyhtKVsxXSxjb2w9J3JlZCcsbHR5PTIpCiAgICBhYmxpbmUoLTEsY29lZmZpY2llbnRzKG0pWzFdLGNvbD0nZGFya2dyZWVuJyxsdHk9MikKICB9CiAgdmcgPC0gcm93bmFtZXMoZGYpICVpbiUgZ2VuZXMudXAKICBwb2ludHMoZGZbdmcsMV0sZGZbdmcsMl0sY29sPSdyZWQnLHBjaD0xOSxjZXg9MC4yKQogIHZnIDwtIHJvd25hbWVzKGRmKSAlaW4lIGdlbmVzLmRvd24KICBwb2ludHMoZGZbdmcsMV0sZGZbdmcsMl0sY29sPSdkYXJrZ3JlZW4nLHBjaD0xOSxjZXg9MC4zKQp9KQpkZXYub2ZmKCkKYGBgCgo=