library(ggplot2)
library(mgcv)
Loading required package: nlme
This is mgcv 1.8-31. For overview type 'help("mgcv-package")'.
require(purrr)
Loading required package: purrr
library(dplyr)

Attaching package: ‘dplyr’

The following object is masked from ‘package:nlme’:

    collapse

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(ggpubr)
Loading required package: magrittr

Attaching package: ‘magrittr’

The following object is masked from ‘package:purrr’:

    set_names
library(reshape2)
library(Matrix)

Load simulation results

rl <- readRDS("pc2.rl.rds")

Make a simplified data frame

adf <- do.call(rbind,unlist(c(rl),recursive=F))
adf$auc[is.nan(adf$auc)] <- 0.5;
adf$auc[is.na(adf$auc)] <- 1;

adf %<>% group_by(n,c) %>% summarize(auc=mean(auc)) %>% as.data.frame  # add trim?

view raw data

ggplot(adf,aes(log10(n),log10(c),fill=auc))+geom_point(aes(col=auc)) + scale_color_gradient2(low = "blue", mid = "white", high = "red",midpoint=0.75) + theme_bw() + xlim(1,5)

2D regression fit

x <- adf; x$n <- log10(x$n); x$c <- log10(x$c)
sg <- gam(auc~s(n,c),data=x)

Predict on a regular grid

nbins <- 200;
pdf <- data.frame(n=seq(0,5,length.out=nbins),c=seq(min(x$c),max(x$c),length.out=nbins))
pdf <- data.frame(t(matrix(unlist(cross2(pdf$n,pdf$c)),nrow=2))); colnames(pdf) <- c('n','c')

pdf$auc <- predict(sg,newdata=pdf)
pdf$auc[pdf$auc>1] <- 1; pdf$auc[pdf$auc<0.5] <- 0.5
p <- ggplot(pdf,aes(n,c,z=auc))+geom_raster(aes(fill=auc),interpolate=T)  +
  scale_x_continuous(limits=c(1,5), expand = c(0, 0)) + 
  scale_y_continuous( expand = c(0, 0), limits=c(min(pdf$c),4.5)) +
  #scale_fill_gradientn(colours=adjustcolor(c("#0000FFFF","#0000FFFF","#0000FFFF","#FFFFFFFF","#FF0000FF","#FF0000FF"),alpha=0.65)) +
  scale_fill_gradientn(colours=adjustcolor(c("black","black",'black','white','darkgreen','darkgreen'),alpha=0.75),name='AUC') + 
  xlab('log10[ number of cells ]') +ylab('log10[ UMIs/cell ]') 
p

pdf(file='ncells.pdf',width=3.8,height=3); print(p); dev.off();
null device 
          1 

Clustering examples

Load sample data

cd <- readRDS(file=paste0('lcd_',1,'.rds'))
cell.sizes <- Matrix::colSums(cd)
gf <- attr(cd,'group')
cell.groups <- tapply(1:ncol(cd),gf,I)
quick.df <- function(n.cells,coverage) {
  # sample cells
  sampled.cells <- as.vector(t(matrix(unlist(lapply(cell.groups,sample,n.cells)),ncol=2)))
  sm <- cd[,sampled.cells,drop=F]
  sub.p <- min(1,coverage/mean(Matrix::colSums(sm)))
  # downsample coverage
  sm <- subsample.matrix(sm,p.sample=sub.p)
  sgf <- gf[colnames(sm)]
  # PCA/lda
  p2 <- Pagoda2$new(sm, n.cores = 1, trim = 0, log.scale =T, min.cells.per.gene = 1,  min.transcripts.per.cell = 0,verbose=F)
  p2$adjustVariance(plot = F, gam.k = 10,verbose=F)
  suppressWarnings(p2$calculatePcaReduction(nPcs = 2, n.odgenes = 1e3, maxit = 1000,verbose=F))
  lr <- lda(p2$reductions$PCA,sgf)
 
  # plot data frame
  df <- data.frame(p2$reductions$PCA); colnames(df) <- c('PC1','PC2'); df$type <- sgf
  df
}
quick.margin.plot <- function(df, alpha=0.5, size=1, trim.outliers=0) {
  # thanks to https://stackoverflow.com/questions/8545035/scatterplot-with-marginal-histograms-in-ggplot2
  require(cowplot)
  require(ggrastr)
  if(trim.outliers>0) {
    pc1t <- rank(abs(df$PC1)) <= nrow(df)*(1-trim.outliers);
    pc2t <- rank(abs(df$PC2)) <= nrow(df)*(1-trim.outliers);
    df <- df[pc1t & pc2t,]
  }
  pal <- scale_color_manual(values=c("blue","red"))
  # Set up scatterplot
  scatterplot <- ggplot(df, aes(x = PC1, y = PC2, color = type)) +
    geom_point_rast(size = size, alpha = alpha, width=1,height=1) +
    guides(color = FALSE) +
    theme_bw() +
    theme(plot.margin = margin()) + 
    scale_x_continuous(labels = NULL,breaks=NULL) +
    scale_y_continuous(labels = NULL,breaks=NULL) +
    pal
  
  
  # Define marginal histogram
  marginal_distribution <- function(x, var, group) {
    ggplot(x, aes_string(x = var, fill = group)) +
      #geom_histogram(bins = 30, alpha = 0.4, position = "identity") +
      geom_density(alpha = 0.4, size = 0.1) +
      guides(fill = FALSE) +
      theme_void() +
      theme(plot.margin = margin()) + scale_fill_manual(values=c("blue","red"))
  }
  
  # Set up marginal histograms
  x_hist <- marginal_distribution(df, "PC1", "type")
  y_hist <- marginal_distribution(df, "PC2", "type") +
    coord_flip()
  
  # Align histograms with scatterplot
  aligned_x_hist <- align_plots(x_hist, scatterplot, align = "v")[[1]]
  aligned_y_hist <- align_plots(y_hist, scatterplot, align = "h")[[1]]
  
  # Arrange plots
  plot_grid(
    aligned_x_hist,
    NULL,
    scatterplot,
    aligned_y_hist,
    ncol = 2,
    nrow = 2,
    rel_heights = c(0.15, 1),
    rel_widths = c(1, 0.15)
  )

}
df1 <- quick.df(round(10^(1.5)),round(10^(4.0)));
p1 <- quick.margin.plot(df1, alpha=0.2, size=1,trim.outliers=0.03)
Loading required package: cowplot

Attaching package: ‘cowplot’

The following object is masked from ‘package:ggpubr’:

    get_legend

The following object is masked from ‘package:ggplot2’:

    ggsave

Loading required package: ggrastr
p1

p2 <- quick.margin.plot(df2, alpha=0.1, size=0.5,trim.outliers=0.03)
p2

p3 <- quick.margin.plot(df3, alpha=0.01, size=0.1,trim.outliers=0.001)
p3

pdf(file='ex1.pdf',width=2,height=2); print(p1); dev.off()
null device 
          1 
pdf(file='ex2.pdf',width=2,height=2); print(p2); dev.off()
null device 
          1 
pdf(file='ex3.pdf',width=2,height=2); print(p3); dev.off()
null device 
          1 
LS0tCnRpdGxlOiAiTnVtYmVyIG9mIGNlbGwgZmlndXJlIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KG1nY3YpCnJlcXVpcmUocHVycnIpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoZ2dwdWJyKQpsaWJyYXJ5KHJlc2hhcGUyKQpsaWJyYXJ5KE1hdHJpeCkKYGBgCgoKTG9hZCBzaW11bGF0aW9uIHJlc3VsdHMKCmBgYHtyfQpybCA8LSByZWFkUkRTKCJwYzIucmwucmRzIikKYGBgCgpNYWtlIGEgc2ltcGxpZmllZCBkYXRhIGZyYW1lCmBgYHtyfQphZGYgPC0gZG8uY2FsbChyYmluZCx1bmxpc3QoYyhybCkscmVjdXJzaXZlPUYpKQphZGYkYXVjW2lzLm5hbihhZGYkYXVjKV0gPC0gMC41OwphZGYkYXVjW2lzLm5hKGFkZiRhdWMpXSA8LSAxOwoKYWRmICU8PiUgZ3JvdXBfYnkobixjKSAlPiUgc3VtbWFyaXplKGF1Yz1tZWFuKGF1YykpICU+JSBhcy5kYXRhLmZyYW1lICAjIGFkZCB0cmltPwpgYGAKCnZpZXcgcmF3IGRhdGEKYGBge3IgZmlnLmhlaWdodD00LGZpZy53aWR0aD01fQpnZ3Bsb3QoYWRmLGFlcyhsb2cxMChuKSxsb2cxMChjKSxmaWxsPWF1YykpK2dlb21fcG9pbnQoYWVzKGNvbD1hdWMpKSArIHNjYWxlX2NvbG9yX2dyYWRpZW50Mihsb3cgPSAiYmx1ZSIsIG1pZCA9ICJ3aGl0ZSIsIGhpZ2ggPSAicmVkIixtaWRwb2ludD0wLjc1KSArIHRoZW1lX2J3KCkgKyB4bGltKDEsNSkKYGBgCgoKMkQgcmVncmVzc2lvbiBmaXQKYGBge3J9CnggPC0gYWRmOyB4JG4gPC0gbG9nMTAoeCRuKTsgeCRjIDwtIGxvZzEwKHgkYykKc2cgPC0gZ2FtKGF1Y35zKG4sYyksZGF0YT14KQpgYGAKClByZWRpY3Qgb24gYSByZWd1bGFyIGdyaWQKYGBge3J9Cm5iaW5zIDwtIDIwMDsKcGRmIDwtIGRhdGEuZnJhbWUobj1zZXEoMCw1LGxlbmd0aC5vdXQ9bmJpbnMpLGM9c2VxKG1pbih4JGMpLG1heCh4JGMpLGxlbmd0aC5vdXQ9bmJpbnMpKQpwZGYgPC0gZGF0YS5mcmFtZSh0KG1hdHJpeCh1bmxpc3QoY3Jvc3MyKHBkZiRuLHBkZiRjKSksbnJvdz0yKSkpOyBjb2xuYW1lcyhwZGYpIDwtIGMoJ24nLCdjJykKCnBkZiRhdWMgPC0gcHJlZGljdChzZyxuZXdkYXRhPXBkZikKcGRmJGF1Y1twZGYkYXVjPjFdIDwtIDE7IHBkZiRhdWNbcGRmJGF1YzwwLjVdIDwtIDAuNQpgYGAKCmBgYHtyIGZpZy5oZWlnaHQ9MyxmaWcud2lkdGg9My44LCBtZXNzYWdlPUYsIHdhcm5pbmc9Rn0KcCA8LSBnZ3Bsb3QocGRmLGFlcyhuLGMsej1hdWMpKStnZW9tX3Jhc3RlcihhZXMoZmlsbD1hdWMpLGludGVycG9sYXRlPVQpICArCiAgc2NhbGVfeF9jb250aW51b3VzKGxpbWl0cz1jKDEsNSksIGV4cGFuZCA9IGMoMCwgMCkpICsgCiAgc2NhbGVfeV9jb250aW51b3VzKCBleHBhbmQgPSBjKDAsIDApLCBsaW1pdHM9YyhtaW4ocGRmJGMpLDQuNSkpICsKICAjc2NhbGVfZmlsbF9ncmFkaWVudG4oY29sb3Vycz1hZGp1c3Rjb2xvcihjKCIjMDAwMEZGRkYiLCIjMDAwMEZGRkYiLCIjMDAwMEZGRkYiLCIjRkZGRkZGRkYiLCIjRkYwMDAwRkYiLCIjRkYwMDAwRkYiKSxhbHBoYT0wLjY1KSkgKwogIHNjYWxlX2ZpbGxfZ3JhZGllbnRuKGNvbG91cnM9YWRqdXN0Y29sb3IoYygiYmxhY2siLCJibGFjayIsJ2JsYWNrJywnd2hpdGUnLCdkYXJrZ3JlZW4nLCdkYXJrZ3JlZW4nKSxhbHBoYT0wLjc1KSxuYW1lPSdBVUMnKSArIAogIHhsYWIoJ2xvZzEwWyBudW1iZXIgb2YgY2VsbHMgXScpICt5bGFiKCdsb2cxMFsgVU1Jcy9jZWxsIF0nKSAKcApgYGAKCmBgYHtyfQpwZGYoZmlsZT0nbmNlbGxzLnBkZicsd2lkdGg9My44LGhlaWdodD0zKTsgcHJpbnQocCk7IGRldi5vZmYoKTsKYGBgCgoKIyMgQ2x1c3RlcmluZyBleGFtcGxlcwoKCgpMb2FkIHNhbXBsZSBkYXRhCmBgYHtyfQpjZCA8LSByZWFkUkRTKGZpbGU9cGFzdGUwKCdsY2RfJywxLCcucmRzJykpCmNlbGwuc2l6ZXMgPC0gTWF0cml4Ojpjb2xTdW1zKGNkKQpnZiA8LSBhdHRyKGNkLCdncm91cCcpCmNlbGwuZ3JvdXBzIDwtIHRhcHBseSgxOm5jb2woY2QpLGdmLEkpCmBgYAoKCmBgYHtyIGZpZy53aWR0aD0yLCBmaWcuaGVpZ2h0PTJ9CnF1aWNrLmRmIDwtIGZ1bmN0aW9uKG4uY2VsbHMsY292ZXJhZ2UpIHsKICAjIHNhbXBsZSBjZWxscwogIHNhbXBsZWQuY2VsbHMgPC0gYXMudmVjdG9yKHQobWF0cml4KHVubGlzdChsYXBwbHkoY2VsbC5ncm91cHMsc2FtcGxlLG4uY2VsbHMpKSxuY29sPTIpKSkKICBzbSA8LSBjZFssc2FtcGxlZC5jZWxscyxkcm9wPUZdCiAgc3ViLnAgPC0gbWluKDEsY292ZXJhZ2UvbWVhbihNYXRyaXg6OmNvbFN1bXMoc20pKSkKICAjIGRvd25zYW1wbGUgY292ZXJhZ2UKICBzbSA8LSBzdWJzYW1wbGUubWF0cml4KHNtLHAuc2FtcGxlPXN1Yi5wKQogIHNnZiA8LSBnZltjb2xuYW1lcyhzbSldCiAgIyBQQ0EvbGRhCiAgcDIgPC0gUGFnb2RhMiRuZXcoc20sIG4uY29yZXMgPSAxLCB0cmltID0gMCwgbG9nLnNjYWxlID1ULCBtaW4uY2VsbHMucGVyLmdlbmUgPSAxLCAgbWluLnRyYW5zY3JpcHRzLnBlci5jZWxsID0gMCx2ZXJib3NlPUYpCiAgcDIkYWRqdXN0VmFyaWFuY2UocGxvdCA9IEYsIGdhbS5rID0gMTAsdmVyYm9zZT1GKQogIHN1cHByZXNzV2FybmluZ3MocDIkY2FsY3VsYXRlUGNhUmVkdWN0aW9uKG5QY3MgPSAyLCBuLm9kZ2VuZXMgPSAxZTMsIG1heGl0ID0gMTAwMCx2ZXJib3NlPUYpKQogIGxyIDwtIGxkYShwMiRyZWR1Y3Rpb25zJFBDQSxzZ2YpCiAKICAjIHBsb3QgZGF0YSBmcmFtZQogIGRmIDwtIGRhdGEuZnJhbWUocDIkcmVkdWN0aW9ucyRQQ0EpOyBjb2xuYW1lcyhkZikgPC0gYygnUEMxJywnUEMyJyk7IGRmJHR5cGUgPC0gc2dmCiAgZGYKfQpgYGAKCmBgYHtyfQpxdWljay5tYXJnaW4ucGxvdCA8LSBmdW5jdGlvbihkZiwgYWxwaGE9MC41LCBzaXplPTEsIHRyaW0ub3V0bGllcnM9MCkgewogICMgdGhhbmtzIHRvIGh0dHBzOi8vc3RhY2tvdmVyZmxvdy5jb20vcXVlc3Rpb25zLzg1NDUwMzUvc2NhdHRlcnBsb3Qtd2l0aC1tYXJnaW5hbC1oaXN0b2dyYW1zLWluLWdncGxvdDIKICByZXF1aXJlKGNvd3Bsb3QpCiAgcmVxdWlyZShnZ3Jhc3RyKQogIGlmKHRyaW0ub3V0bGllcnM+MCkgewogICAgcGMxdCA8LSByYW5rKGFicyhkZiRQQzEpKSA8PSBucm93KGRmKSooMS10cmltLm91dGxpZXJzKTsKICAgIHBjMnQgPC0gcmFuayhhYnMoZGYkUEMyKSkgPD0gbnJvdyhkZikqKDEtdHJpbS5vdXRsaWVycyk7CiAgICBkZiA8LSBkZltwYzF0ICYgcGMydCxdCiAgfQogIHBhbCA8LSBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzPWMoImJsdWUiLCJyZWQiKSkKICAjIFNldCB1cCBzY2F0dGVycGxvdAogIHNjYXR0ZXJwbG90IDwtIGdncGxvdChkZiwgYWVzKHggPSBQQzEsIHkgPSBQQzIsIGNvbG9yID0gdHlwZSkpICsKICAgIGdlb21fcG9pbnRfcmFzdChzaXplID0gc2l6ZSwgYWxwaGEgPSBhbHBoYSwgd2lkdGg9MSxoZWlnaHQ9MSkgKwogICAgZ3VpZGVzKGNvbG9yID0gRkFMU0UpICsKICAgIHRoZW1lX2J3KCkgKwogICAgdGhlbWUocGxvdC5tYXJnaW4gPSBtYXJnaW4oKSkgKyAKICAgIHNjYWxlX3hfY29udGludW91cyhsYWJlbHMgPSBOVUxMLGJyZWFrcz1OVUxMKSArCiAgICBzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzID0gTlVMTCxicmVha3M9TlVMTCkgKwogICAgcGFsCiAgCiAgCiAgIyBEZWZpbmUgbWFyZ2luYWwgaGlzdG9ncmFtCiAgbWFyZ2luYWxfZGlzdHJpYnV0aW9uIDwtIGZ1bmN0aW9uKHgsIHZhciwgZ3JvdXApIHsKICAgIGdncGxvdCh4LCBhZXNfc3RyaW5nKHggPSB2YXIsIGZpbGwgPSBncm91cCkpICsKICAgICAgI2dlb21faGlzdG9ncmFtKGJpbnMgPSAzMCwgYWxwaGEgPSAwLjQsIHBvc2l0aW9uID0gImlkZW50aXR5IikgKwogICAgICBnZW9tX2RlbnNpdHkoYWxwaGEgPSAwLjQsIHNpemUgPSAwLjEpICsKICAgICAgZ3VpZGVzKGZpbGwgPSBGQUxTRSkgKwogICAgICB0aGVtZV92b2lkKCkgKwogICAgICB0aGVtZShwbG90Lm1hcmdpbiA9IG1hcmdpbigpKSArIHNjYWxlX2ZpbGxfbWFudWFsKHZhbHVlcz1jKCJibHVlIiwicmVkIikpCiAgfQogIAogICMgU2V0IHVwIG1hcmdpbmFsIGhpc3RvZ3JhbXMKICB4X2hpc3QgPC0gbWFyZ2luYWxfZGlzdHJpYnV0aW9uKGRmLCAiUEMxIiwgInR5cGUiKQogIHlfaGlzdCA8LSBtYXJnaW5hbF9kaXN0cmlidXRpb24oZGYsICJQQzIiLCAidHlwZSIpICsKICAgIGNvb3JkX2ZsaXAoKQogIAogICMgQWxpZ24gaGlzdG9ncmFtcyB3aXRoIHNjYXR0ZXJwbG90CiAgYWxpZ25lZF94X2hpc3QgPC0gYWxpZ25fcGxvdHMoeF9oaXN0LCBzY2F0dGVycGxvdCwgYWxpZ24gPSAidiIpW1sxXV0KICBhbGlnbmVkX3lfaGlzdCA8LSBhbGlnbl9wbG90cyh5X2hpc3QsIHNjYXR0ZXJwbG90LCBhbGlnbiA9ICJoIilbWzFdXQogIAogICMgQXJyYW5nZSBwbG90cwogIHBsb3RfZ3JpZCgKICAgIGFsaWduZWRfeF9oaXN0LAogICAgTlVMTCwKICAgIHNjYXR0ZXJwbG90LAogICAgYWxpZ25lZF95X2hpc3QsCiAgICBuY29sID0gMiwKICAgIG5yb3cgPSAyLAogICAgcmVsX2hlaWdodHMgPSBjKDAuMTUsIDEpLAogICAgcmVsX3dpZHRocyA9IGMoMSwgMC4xNSkKICApCgp9CmBgYAoKYGBge3J9CmRmMSA8LSBxdWljay5kZihyb3VuZCgxMF4oMS41KSkscm91bmQoMTBeKDQuMCkpKTsKZGYyIDwtIHF1aWNrLmRmKHJvdW5kKDEwXigyLjUpKSxyb3VuZCgxMF4oMi4wKSkpOwpkZjMgPC0gcXVpY2suZGYocm91bmQoMTBeKDQuNSkpLHJvdW5kKDEwXigyLjcpKSk7CmBgYAoKYGBge3IgZmlnLndpZHRoPTIsZmlnLmhlaWdodD0yfQpwMSA8LSBxdWljay5tYXJnaW4ucGxvdChkZjEsIGFscGhhPTAuMiwgc2l6ZT0xLHRyaW0ub3V0bGllcnM9MC4wMykKcDEKYGBgCgpgYGB7ciBmaWcud2lkdGg9MixmaWcuaGVpZ2h0PTJ9CnAyIDwtIHF1aWNrLm1hcmdpbi5wbG90KGRmMiwgYWxwaGE9MC4xLCBzaXplPTAuNSx0cmltLm91dGxpZXJzPTAuMDMpCnAyCmBgYAoKYGBge3IgZmlnLndpZHRoPTIsZmlnLmhlaWdodD0yfQpwMyA8LSBxdWljay5tYXJnaW4ucGxvdChkZjMsIGFscGhhPTAuMDEsIHNpemU9MC4xLHRyaW0ub3V0bGllcnM9MC4wMDEpCnAzCmBgYAoKCmBgYHtyfQpwZGYoZmlsZT0nZXgxLnBkZicsd2lkdGg9MixoZWlnaHQ9Mik7IHByaW50KHAxKTsgZGV2Lm9mZigpCnBkZihmaWxlPSdleDIucGRmJyx3aWR0aD0yLGhlaWdodD0yKTsgcHJpbnQocDIpOyBkZXYub2ZmKCkKcGRmKGZpbGU9J2V4My5wZGYnLHdpZHRoPTIsaGVpZ2h0PTIpOyBwcmludChwMyk7IGRldi5vZmYoKQpgYGAKCgoKCgoK