library(ggplot2)
library(mgcv)
Loading required package: nlme
This is mgcv 1.8-31. For overview type 'help("mgcv-package")'.
require(purrr)
Loading required package: purrr
library(dplyr)
Attaching package: ‘dplyr’
The following object is masked from ‘package:nlme’:
collapse
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(ggpubr)
Loading required package: magrittr
Attaching package: ‘magrittr’
The following object is masked from ‘package:purrr’:
set_names
library(reshape2)
library(Matrix)
Load simulation results
rl <- readRDS("pc2.rl.rds")
Make a simplified data frame
adf <- do.call(rbind,unlist(c(rl),recursive=F))
adf$auc[is.nan(adf$auc)] <- 0.5;
adf$auc[is.na(adf$auc)] <- 1;
adf %<>% group_by(n,c) %>% summarize(auc=mean(auc)) %>% as.data.frame # add trim?
view raw data
ggplot(adf,aes(log10(n),log10(c),fill=auc))+geom_point(aes(col=auc)) + scale_color_gradient2(low = "blue", mid = "white", high = "red",midpoint=0.75) + theme_bw() + xlim(1,5)
2D regression fit
x <- adf; x$n <- log10(x$n); x$c <- log10(x$c)
sg <- gam(auc~s(n,c),data=x)
Predict on a regular grid
nbins <- 200;
pdf <- data.frame(n=seq(0,5,length.out=nbins),c=seq(min(x$c),max(x$c),length.out=nbins))
pdf <- data.frame(t(matrix(unlist(cross2(pdf$n,pdf$c)),nrow=2))); colnames(pdf) <- c('n','c')
pdf$auc <- predict(sg,newdata=pdf)
pdf$auc[pdf$auc>1] <- 1; pdf$auc[pdf$auc<0.5] <- 0.5
p <- ggplot(pdf,aes(n,c,z=auc))+geom_raster(aes(fill=auc),interpolate=T) +
scale_x_continuous(limits=c(1,5), expand = c(0, 0)) +
scale_y_continuous( expand = c(0, 0), limits=c(min(pdf$c),4.5)) +
#scale_fill_gradientn(colours=adjustcolor(c("#0000FFFF","#0000FFFF","#0000FFFF","#FFFFFFFF","#FF0000FF","#FF0000FF"),alpha=0.65)) +
scale_fill_gradientn(colours=adjustcolor(c("black","black",'black','white','darkgreen','darkgreen'),alpha=0.75),name='AUC') +
xlab('log10[ number of cells ]') +ylab('log10[ UMIs/cell ]')
p
pdf(file='ncells.pdf',width=3.8,height=3); print(p); dev.off();
null device
1
Clustering examples
Load sample data
cd <- readRDS(file=paste0('lcd_',1,'.rds'))
cell.sizes <- Matrix::colSums(cd)
gf <- attr(cd,'group')
cell.groups <- tapply(1:ncol(cd),gf,I)
quick.df <- function(n.cells,coverage) {
# sample cells
sampled.cells <- as.vector(t(matrix(unlist(lapply(cell.groups,sample,n.cells)),ncol=2)))
sm <- cd[,sampled.cells,drop=F]
sub.p <- min(1,coverage/mean(Matrix::colSums(sm)))
# downsample coverage
sm <- subsample.matrix(sm,p.sample=sub.p)
sgf <- gf[colnames(sm)]
# PCA/lda
p2 <- Pagoda2$new(sm, n.cores = 1, trim = 0, log.scale =T, min.cells.per.gene = 1, min.transcripts.per.cell = 0,verbose=F)
p2$adjustVariance(plot = F, gam.k = 10,verbose=F)
suppressWarnings(p2$calculatePcaReduction(nPcs = 2, n.odgenes = 1e3, maxit = 1000,verbose=F))
lr <- lda(p2$reductions$PCA,sgf)
# plot data frame
df <- data.frame(p2$reductions$PCA); colnames(df) <- c('PC1','PC2'); df$type <- sgf
df
}
quick.margin.plot <- function(df, alpha=0.5, size=1, trim.outliers=0) {
# thanks to https://stackoverflow.com/questions/8545035/scatterplot-with-marginal-histograms-in-ggplot2
require(cowplot)
require(ggrastr)
if(trim.outliers>0) {
pc1t <- rank(abs(df$PC1)) <= nrow(df)*(1-trim.outliers);
pc2t <- rank(abs(df$PC2)) <= nrow(df)*(1-trim.outliers);
df <- df[pc1t & pc2t,]
}
pal <- scale_color_manual(values=c("blue","red"))
# Set up scatterplot
scatterplot <- ggplot(df, aes(x = PC1, y = PC2, color = type)) +
geom_point_rast(size = size, alpha = alpha, width=1,height=1) +
guides(color = FALSE) +
theme_bw() +
theme(plot.margin = margin()) +
scale_x_continuous(labels = NULL,breaks=NULL) +
scale_y_continuous(labels = NULL,breaks=NULL) +
pal
# Define marginal histogram
marginal_distribution <- function(x, var, group) {
ggplot(x, aes_string(x = var, fill = group)) +
#geom_histogram(bins = 30, alpha = 0.4, position = "identity") +
geom_density(alpha = 0.4, size = 0.1) +
guides(fill = FALSE) +
theme_void() +
theme(plot.margin = margin()) + scale_fill_manual(values=c("blue","red"))
}
# Set up marginal histograms
x_hist <- marginal_distribution(df, "PC1", "type")
y_hist <- marginal_distribution(df, "PC2", "type") +
coord_flip()
# Align histograms with scatterplot
aligned_x_hist <- align_plots(x_hist, scatterplot, align = "v")[[1]]
aligned_y_hist <- align_plots(y_hist, scatterplot, align = "h")[[1]]
# Arrange plots
plot_grid(
aligned_x_hist,
NULL,
scatterplot,
aligned_y_hist,
ncol = 2,
nrow = 2,
rel_heights = c(0.15, 1),
rel_widths = c(1, 0.15)
)
}
df1 <- quick.df(round(10^(1.5)),round(10^(4.0)));
p1 <- quick.margin.plot(df1, alpha=0.2, size=1,trim.outliers=0.03)
Loading required package: cowplot
Attaching package: ‘cowplot’
The following object is masked from ‘package:ggpubr’:
get_legend
The following object is masked from ‘package:ggplot2’:
ggsave
Loading required package: ggrastr
p1
p2 <- quick.margin.plot(df2, alpha=0.1, size=0.5,trim.outliers=0.03)
p2
p3 <- quick.margin.plot(df3, alpha=0.01, size=0.1,trim.outliers=0.001)
p3
pdf(file='ex1.pdf',width=2,height=2); print(p1); dev.off()
null device
1
pdf(file='ex2.pdf',width=2,height=2); print(p2); dev.off()
null device
1
pdf(file='ex3.pdf',width=2,height=2); print(p3); dev.off()
null device
1
LS0tCnRpdGxlOiAiTnVtYmVyIG9mIGNlbGwgZmlndXJlIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KG1nY3YpCnJlcXVpcmUocHVycnIpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoZ2dwdWJyKQpsaWJyYXJ5KHJlc2hhcGUyKQpsaWJyYXJ5KE1hdHJpeCkKYGBgCgoKTG9hZCBzaW11bGF0aW9uIHJlc3VsdHMKCmBgYHtyfQpybCA8LSByZWFkUkRTKCJwYzIucmwucmRzIikKYGBgCgpNYWtlIGEgc2ltcGxpZmllZCBkYXRhIGZyYW1lCmBgYHtyfQphZGYgPC0gZG8uY2FsbChyYmluZCx1bmxpc3QoYyhybCkscmVjdXJzaXZlPUYpKQphZGYkYXVjW2lzLm5hbihhZGYkYXVjKV0gPC0gMC41OwphZGYkYXVjW2lzLm5hKGFkZiRhdWMpXSA8LSAxOwoKYWRmICU8PiUgZ3JvdXBfYnkobixjKSAlPiUgc3VtbWFyaXplKGF1Yz1tZWFuKGF1YykpICU+JSBhcy5kYXRhLmZyYW1lICAjIGFkZCB0cmltPwpgYGAKCnZpZXcgcmF3IGRhdGEKYGBge3IgZmlnLmhlaWdodD00LGZpZy53aWR0aD01fQpnZ3Bsb3QoYWRmLGFlcyhsb2cxMChuKSxsb2cxMChjKSxmaWxsPWF1YykpK2dlb21fcG9pbnQoYWVzKGNvbD1hdWMpKSArIHNjYWxlX2NvbG9yX2dyYWRpZW50Mihsb3cgPSAiYmx1ZSIsIG1pZCA9ICJ3aGl0ZSIsIGhpZ2ggPSAicmVkIixtaWRwb2ludD0wLjc1KSArIHRoZW1lX2J3KCkgKyB4bGltKDEsNSkKYGBgCgoKMkQgcmVncmVzc2lvbiBmaXQKYGBge3J9CnggPC0gYWRmOyB4JG4gPC0gbG9nMTAoeCRuKTsgeCRjIDwtIGxvZzEwKHgkYykKc2cgPC0gZ2FtKGF1Y35zKG4sYyksZGF0YT14KQpgYGAKClByZWRpY3Qgb24gYSByZWd1bGFyIGdyaWQKYGBge3J9Cm5iaW5zIDwtIDIwMDsKcGRmIDwtIGRhdGEuZnJhbWUobj1zZXEoMCw1LGxlbmd0aC5vdXQ9bmJpbnMpLGM9c2VxKG1pbih4JGMpLG1heCh4JGMpLGxlbmd0aC5vdXQ9bmJpbnMpKQpwZGYgPC0gZGF0YS5mcmFtZSh0KG1hdHJpeCh1bmxpc3QoY3Jvc3MyKHBkZiRuLHBkZiRjKSksbnJvdz0yKSkpOyBjb2xuYW1lcyhwZGYpIDwtIGMoJ24nLCdjJykKCnBkZiRhdWMgPC0gcHJlZGljdChzZyxuZXdkYXRhPXBkZikKcGRmJGF1Y1twZGYkYXVjPjFdIDwtIDE7IHBkZiRhdWNbcGRmJGF1YzwwLjVdIDwtIDAuNQpgYGAKCmBgYHtyIGZpZy5oZWlnaHQ9MyxmaWcud2lkdGg9My44LCBtZXNzYWdlPUYsIHdhcm5pbmc9Rn0KcCA8LSBnZ3Bsb3QocGRmLGFlcyhuLGMsej1hdWMpKStnZW9tX3Jhc3RlcihhZXMoZmlsbD1hdWMpLGludGVycG9sYXRlPVQpICArCiAgc2NhbGVfeF9jb250aW51b3VzKGxpbWl0cz1jKDEsNSksIGV4cGFuZCA9IGMoMCwgMCkpICsgCiAgc2NhbGVfeV9jb250aW51b3VzKCBleHBhbmQgPSBjKDAsIDApLCBsaW1pdHM9YyhtaW4ocGRmJGMpLDQuNSkpICsKICAjc2NhbGVfZmlsbF9ncmFkaWVudG4oY29sb3Vycz1hZGp1c3Rjb2xvcihjKCIjMDAwMEZGRkYiLCIjMDAwMEZGRkYiLCIjMDAwMEZGRkYiLCIjRkZGRkZGRkYiLCIjRkYwMDAwRkYiLCIjRkYwMDAwRkYiKSxhbHBoYT0wLjY1KSkgKwogIHNjYWxlX2ZpbGxfZ3JhZGllbnRuKGNvbG91cnM9YWRqdXN0Y29sb3IoYygiYmxhY2siLCJibGFjayIsJ2JsYWNrJywnd2hpdGUnLCdkYXJrZ3JlZW4nLCdkYXJrZ3JlZW4nKSxhbHBoYT0wLjc1KSxuYW1lPSdBVUMnKSArIAogIHhsYWIoJ2xvZzEwWyBudW1iZXIgb2YgY2VsbHMgXScpICt5bGFiKCdsb2cxMFsgVU1Jcy9jZWxsIF0nKSAKcApgYGAKCmBgYHtyfQpwZGYoZmlsZT0nbmNlbGxzLnBkZicsd2lkdGg9My44LGhlaWdodD0zKTsgcHJpbnQocCk7IGRldi5vZmYoKTsKYGBgCgoKIyMgQ2x1c3RlcmluZyBleGFtcGxlcwoKCgpMb2FkIHNhbXBsZSBkYXRhCmBgYHtyfQpjZCA8LSByZWFkUkRTKGZpbGU9cGFzdGUwKCdsY2RfJywxLCcucmRzJykpCmNlbGwuc2l6ZXMgPC0gTWF0cml4Ojpjb2xTdW1zKGNkKQpnZiA8LSBhdHRyKGNkLCdncm91cCcpCmNlbGwuZ3JvdXBzIDwtIHRhcHBseSgxOm5jb2woY2QpLGdmLEkpCmBgYAoKCmBgYHtyIGZpZy53aWR0aD0yLCBmaWcuaGVpZ2h0PTJ9CnF1aWNrLmRmIDwtIGZ1bmN0aW9uKG4uY2VsbHMsY292ZXJhZ2UpIHsKICAjIHNhbXBsZSBjZWxscwogIHNhbXBsZWQuY2VsbHMgPC0gYXMudmVjdG9yKHQobWF0cml4KHVubGlzdChsYXBwbHkoY2VsbC5ncm91cHMsc2FtcGxlLG4uY2VsbHMpKSxuY29sPTIpKSkKICBzbSA8LSBjZFssc2FtcGxlZC5jZWxscyxkcm9wPUZdCiAgc3ViLnAgPC0gbWluKDEsY292ZXJhZ2UvbWVhbihNYXRyaXg6OmNvbFN1bXMoc20pKSkKICAjIGRvd25zYW1wbGUgY292ZXJhZ2UKICBzbSA8LSBzdWJzYW1wbGUubWF0cml4KHNtLHAuc2FtcGxlPXN1Yi5wKQogIHNnZiA8LSBnZltjb2xuYW1lcyhzbSldCiAgIyBQQ0EvbGRhCiAgcDIgPC0gUGFnb2RhMiRuZXcoc20sIG4uY29yZXMgPSAxLCB0cmltID0gMCwgbG9nLnNjYWxlID1ULCBtaW4uY2VsbHMucGVyLmdlbmUgPSAxLCAgbWluLnRyYW5zY3JpcHRzLnBlci5jZWxsID0gMCx2ZXJib3NlPUYpCiAgcDIkYWRqdXN0VmFyaWFuY2UocGxvdCA9IEYsIGdhbS5rID0gMTAsdmVyYm9zZT1GKQogIHN1cHByZXNzV2FybmluZ3MocDIkY2FsY3VsYXRlUGNhUmVkdWN0aW9uKG5QY3MgPSAyLCBuLm9kZ2VuZXMgPSAxZTMsIG1heGl0ID0gMTAwMCx2ZXJib3NlPUYpKQogIGxyIDwtIGxkYShwMiRyZWR1Y3Rpb25zJFBDQSxzZ2YpCiAKICAjIHBsb3QgZGF0YSBmcmFtZQogIGRmIDwtIGRhdGEuZnJhbWUocDIkcmVkdWN0aW9ucyRQQ0EpOyBjb2xuYW1lcyhkZikgPC0gYygnUEMxJywnUEMyJyk7IGRmJHR5cGUgPC0gc2dmCiAgZGYKfQpgYGAKCmBgYHtyfQpxdWljay5tYXJnaW4ucGxvdCA8LSBmdW5jdGlvbihkZiwgYWxwaGE9MC41LCBzaXplPTEsIHRyaW0ub3V0bGllcnM9MCkgewogICMgdGhhbmtzIHRvIGh0dHBzOi8vc3RhY2tvdmVyZmxvdy5jb20vcXVlc3Rpb25zLzg1NDUwMzUvc2NhdHRlcnBsb3Qtd2l0aC1tYXJnaW5hbC1oaXN0b2dyYW1zLWluLWdncGxvdDIKICByZXF1aXJlKGNvd3Bsb3QpCiAgcmVxdWlyZShnZ3Jhc3RyKQogIGlmKHRyaW0ub3V0bGllcnM+MCkgewogICAgcGMxdCA8LSByYW5rKGFicyhkZiRQQzEpKSA8PSBucm93KGRmKSooMS10cmltLm91dGxpZXJzKTsKICAgIHBjMnQgPC0gcmFuayhhYnMoZGYkUEMyKSkgPD0gbnJvdyhkZikqKDEtdHJpbS5vdXRsaWVycyk7CiAgICBkZiA8LSBkZltwYzF0ICYgcGMydCxdCiAgfQogIHBhbCA8LSBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzPWMoImJsdWUiLCJyZWQiKSkKICAjIFNldCB1cCBzY2F0dGVycGxvdAogIHNjYXR0ZXJwbG90IDwtIGdncGxvdChkZiwgYWVzKHggPSBQQzEsIHkgPSBQQzIsIGNvbG9yID0gdHlwZSkpICsKICAgIGdlb21fcG9pbnRfcmFzdChzaXplID0gc2l6ZSwgYWxwaGEgPSBhbHBoYSwgd2lkdGg9MSxoZWlnaHQ9MSkgKwogICAgZ3VpZGVzKGNvbG9yID0gRkFMU0UpICsKICAgIHRoZW1lX2J3KCkgKwogICAgdGhlbWUocGxvdC5tYXJnaW4gPSBtYXJnaW4oKSkgKyAKICAgIHNjYWxlX3hfY29udGludW91cyhsYWJlbHMgPSBOVUxMLGJyZWFrcz1OVUxMKSArCiAgICBzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzID0gTlVMTCxicmVha3M9TlVMTCkgKwogICAgcGFsCiAgCiAgCiAgIyBEZWZpbmUgbWFyZ2luYWwgaGlzdG9ncmFtCiAgbWFyZ2luYWxfZGlzdHJpYnV0aW9uIDwtIGZ1bmN0aW9uKHgsIHZhciwgZ3JvdXApIHsKICAgIGdncGxvdCh4LCBhZXNfc3RyaW5nKHggPSB2YXIsIGZpbGwgPSBncm91cCkpICsKICAgICAgI2dlb21faGlzdG9ncmFtKGJpbnMgPSAzMCwgYWxwaGEgPSAwLjQsIHBvc2l0aW9uID0gImlkZW50aXR5IikgKwogICAgICBnZW9tX2RlbnNpdHkoYWxwaGEgPSAwLjQsIHNpemUgPSAwLjEpICsKICAgICAgZ3VpZGVzKGZpbGwgPSBGQUxTRSkgKwogICAgICB0aGVtZV92b2lkKCkgKwogICAgICB0aGVtZShwbG90Lm1hcmdpbiA9IG1hcmdpbigpKSArIHNjYWxlX2ZpbGxfbWFudWFsKHZhbHVlcz1jKCJibHVlIiwicmVkIikpCiAgfQogIAogICMgU2V0IHVwIG1hcmdpbmFsIGhpc3RvZ3JhbXMKICB4X2hpc3QgPC0gbWFyZ2luYWxfZGlzdHJpYnV0aW9uKGRmLCAiUEMxIiwgInR5cGUiKQogIHlfaGlzdCA8LSBtYXJnaW5hbF9kaXN0cmlidXRpb24oZGYsICJQQzIiLCAidHlwZSIpICsKICAgIGNvb3JkX2ZsaXAoKQogIAogICMgQWxpZ24gaGlzdG9ncmFtcyB3aXRoIHNjYXR0ZXJwbG90CiAgYWxpZ25lZF94X2hpc3QgPC0gYWxpZ25fcGxvdHMoeF9oaXN0LCBzY2F0dGVycGxvdCwgYWxpZ24gPSAidiIpW1sxXV0KICBhbGlnbmVkX3lfaGlzdCA8LSBhbGlnbl9wbG90cyh5X2hpc3QsIHNjYXR0ZXJwbG90LCBhbGlnbiA9ICJoIilbWzFdXQogIAogICMgQXJyYW5nZSBwbG90cwogIHBsb3RfZ3JpZCgKICAgIGFsaWduZWRfeF9oaXN0LAogICAgTlVMTCwKICAgIHNjYXR0ZXJwbG90LAogICAgYWxpZ25lZF95X2hpc3QsCiAgICBuY29sID0gMiwKICAgIG5yb3cgPSAyLAogICAgcmVsX2hlaWdodHMgPSBjKDAuMTUsIDEpLAogICAgcmVsX3dpZHRocyA9IGMoMSwgMC4xNSkKICApCgp9CmBgYAoKYGBge3J9CmRmMSA8LSBxdWljay5kZihyb3VuZCgxMF4oMS41KSkscm91bmQoMTBeKDQuMCkpKTsKZGYyIDwtIHF1aWNrLmRmKHJvdW5kKDEwXigyLjUpKSxyb3VuZCgxMF4oMi4wKSkpOwpkZjMgPC0gcXVpY2suZGYocm91bmQoMTBeKDQuNSkpLHJvdW5kKDEwXigyLjcpKSk7CmBgYAoKYGBge3IgZmlnLndpZHRoPTIsZmlnLmhlaWdodD0yfQpwMSA8LSBxdWljay5tYXJnaW4ucGxvdChkZjEsIGFscGhhPTAuMiwgc2l6ZT0xLHRyaW0ub3V0bGllcnM9MC4wMykKcDEKYGBgCgpgYGB7ciBmaWcud2lkdGg9MixmaWcuaGVpZ2h0PTJ9CnAyIDwtIHF1aWNrLm1hcmdpbi5wbG90KGRmMiwgYWxwaGE9MC4xLCBzaXplPTAuNSx0cmltLm91dGxpZXJzPTAuMDMpCnAyCmBgYAoKYGBge3IgZmlnLndpZHRoPTIsZmlnLmhlaWdodD0yfQpwMyA8LSBxdWljay5tYXJnaW4ucGxvdChkZjMsIGFscGhhPTAuMDEsIHNpemU9MC4xLHRyaW0ub3V0bGllcnM9MC4wMDEpCnAzCmBgYAoKCmBgYHtyfQpwZGYoZmlsZT0nZXgxLnBkZicsd2lkdGg9MixoZWlnaHQ9Mik7IHByaW50KHAxKTsgZGV2Lm9mZigpCnBkZihmaWxlPSdleDIucGRmJyx3aWR0aD0yLGhlaWdodD0yKTsgcHJpbnQocDIpOyBkZXYub2ZmKCkKcGRmKGZpbGU9J2V4My5wZGYnLHdpZHRoPTIsaGVpZ2h0PTIpOyBwcmludChwMyk7IGRldi5vZmYoKQpgYGAKCgoKCgoK