library(pagoda2)
library(Matrix)
library(ggplot2)
library(cowplot)
library(dplyr)
require(parallel)
library(ggrastr)
p2 <- readRDS("p2.rds")
library(keras)
K <- keras::backend()
#use_session_with_seed(0)
library(tensorflow)
tf$random$set_seed(42)

Autoencoder

# molecular data - use variance-normalized log scale counts
odgenes <- rownames(p2$misc[["varinfo"]])[(order(p2$misc[["varinfo"]]$lp, decreasing = FALSE)[1:3e3])]
x <- p2$counts[,odgenes]
x@x <- x@x * rep(p2$misc[["varinfo"]][colnames(x), "gsf"], diff(x@p))
set.seed(1)
x_test <- x[sample(1:nrow(x),1e3),]
x_train <- x[sample(1:nrow(x),1e3),]
input_size <- ncol(x_train)
model = keras_model_sequential() 
model %>%
  layer_dense(units = 200, input_shape = c(input_size)) %>%  layer_activation_leaky_relu() %>% layer_dropout(rate=0.1) %>%
  layer_dense(units = 2,   name = "bottleneck") %>% layer_activation_leaky_relu() %>%
  layer_dense(units = 15) %>% layer_activation_relu() %>%
  layer_dense(units = 300, activation='hard_sigmoid') %>%
  layer_dense(units = input_size, activation='sigmoid')


model %>% compile(loss='mean_squared_logarithmic_error', metrics='mae', optimizer = 'adam')
model %>% fit(x_train, x_train, shuffle=TRUE, epochs=150, batch_size=200, validation_data=list(x_test, x_test) )
encoder <- keras_model(inputs=model$input,outputs=get_layer(model,'bottleneck')$output)
emb2 <- predict(encoder,x)
rownames(emb2) <- rownames(x);
a2 <- sccore::embeddingPlot(emb2,groups=ann,alpha=0.4,size=0.2, font.size=c(3,4),plot.theme=theme_bw())
a2

Poisson autoencoder

odgenes <- rownames(p2$misc[["varinfo"]])[(order(p2$misc[["varinfo"]]$lp, decreasing = FALSE)[1:2e3])]
x <- p2$misc$rawCounts[,odgenes]
#x@x <- x@x * rep(p2$misc[["varinfo"]][colnames(x), "gsf"], diff(x@p))

x <- x/rowSums(x)*5e3

set.seed(0)
x_test <- x[sample(1:nrow(x),1e3),]
x_train <- x[sample(1:nrow(x),1e3),]
input_size <- ncol(x_train)
model = keras_model_sequential() 
model %>%
  layer_dense(units = 20, input_shape = c(input_size)) %>%  layer_activation_leaky_relu() %>% layer_dropout(rate=0.1) %>%
  layer_dense(units = 2,   name = "bottleneck") %>% layer_activation_leaky_relu() %>% 
  layer_dense(units = 20) %>% layer_activation_relu() %>%
  layer_dense(units = input_size, activation='sigmoid')


model %>% compile(loss='poisson', metrics='mae', optimizer = 'adam')
model %>% fit(x_train, x_train, shuffle=TRUE, epochs=150, batch_size=200, validation_data=list(x_test, x_test) )

encoder <- keras_model(inputs=model$input,outputs=get_layer(model,'bottleneck')$output)
emb3 <- predict(encoder,x)
rownames(emb3) <- rownames(x);
a3 <- sccore::embeddingPlot(emb3,groups=ann,alpha=0.4,size=0.2, font.size=c(3,4),plot.theme=theme_bw())
a3

Learning embedding

# embedding
emb <- p2$embeddings$PCA$tSNE
# normalize to be within (0,1) range
emb <- apply(emb,2,function(x) (x-min(x))/diff(range(x)))
# molecular data - use variance-normalized log scale counts
odgenes <- rownames(p2$misc[["varinfo"]])[(order(p2$misc[["varinfo"]]$lp, decreasing = FALSE)[1:2e3])]
x <- p2$counts[,odgenes]
x@x <- x@x * rep(p2$misc[["varinfo"]][colnames(x), "gsf"], diff(x@p))
x <- x[rownames(x) %in% rownames(emb), ]


set.seed(0)
x_test <- x[sample(1:nrow(x),3e3),]
x_train <- x[sample(1:nrow(x),3e3),]
input_size <- ncol(x_test)
model = keras_model_sequential() 
model %>%
  layer_dense(units = 256, input_shape = c(input_size)) %>%  layer_activation_leaky_relu() %>% layer_dropout(rate=0.1) %>%
  layer_dense(units = 512, activation='relu') %>%
  layer_dense(units = 256, activation='relu') %>%
  layer_dense(units = 2,   name = "bottleneck", activation='sigmoid')



model %>% compile(loss='mean_absolute_error', metrics='mae', optimizer = 'adam')

model %>% fit(x_train, emb[rownames(x_train),], shuffle=TRUE, epochs=200, batch_size=200, validation_data=list(x_test, emb[rownames(x_test),]) )

emb4 <- predict(model,x)
rownames(emb4) <- rownames(x);
vc <- setdiff(rownames(emb),rownames(x_test))
p1 <- sccore::embeddingPlot(emb4[vc,],groups=ann,alpha=0.4,size=0.2, font.size=c(3,4),plot.theme=theme_bw())
p2 <- sccore::embeddingPlot(emb[vc,],groups=ann,alpha=0.4,size=0.2, font.size=c(3,4),plot.theme=theme_bw())
plot_grid(plotlist=list(p1,p2),nrow=1)

saveRDS(list(emb2,emb3,emb4,emb),file='auto.emb.rds')
LS0tCnRpdGxlOiAiQXV0b2VuY29kZXIgcGxheSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkocGFnb2RhMikKbGlicmFyeShNYXRyaXgpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShjb3dwbG90KQpsaWJyYXJ5KGRwbHlyKQpyZXF1aXJlKHBhcmFsbGVsKQpsaWJyYXJ5KGdncmFzdHIpCmBgYAoKCmBgYHtyfQpwMiA8LSByZWFkUkRTKCJwMi5yZHMiKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KGtlcmFzKQpLIDwtIGtlcmFzOjpiYWNrZW5kKCkKI3VzZV9zZXNzaW9uX3dpdGhfc2VlZCgwKQpsaWJyYXJ5KHRlbnNvcmZsb3cpCnRmJHJhbmRvbSRzZXRfc2VlZCg0MikKYGBgCiMgQXV0b2VuY29kZXIKYGBge3J9CiMgbW9sZWN1bGFyIGRhdGEgLSB1c2UgdmFyaWFuY2Utbm9ybWFsaXplZCBsb2cgc2NhbGUgY291bnRzCm9kZ2VuZXMgPC0gcm93bmFtZXMocDIkbWlzY1tbInZhcmluZm8iXV0pWyhvcmRlcihwMiRtaXNjW1sidmFyaW5mbyJdXSRscCwgZGVjcmVhc2luZyA9IEZBTFNFKVsxOjNlM10pXQp4IDwtIHAyJGNvdW50c1ssb2RnZW5lc10KeEB4IDwtIHhAeCAqIHJlcChwMiRtaXNjW1sidmFyaW5mbyJdXVtjb2xuYW1lcyh4KSwgImdzZiJdLCBkaWZmKHhAcCkpCnNldC5zZWVkKDEpCnhfdGVzdCA8LSB4W3NhbXBsZSgxOm5yb3coeCksMWUzKSxdCnhfdHJhaW4gPC0geFtzYW1wbGUoMTpucm93KHgpLDFlMyksXQppbnB1dF9zaXplIDwtIG5jb2woeF90cmFpbikKYGBgCgpgYGB7cn0KbW9kZWwgPSBrZXJhc19tb2RlbF9zZXF1ZW50aWFsKCkgCm1vZGVsICU+JQogIGxheWVyX2RlbnNlKHVuaXRzID0gMjAwLCBpbnB1dF9zaGFwZSA9IGMoaW5wdXRfc2l6ZSkpICU+JSAgbGF5ZXJfYWN0aXZhdGlvbl9sZWFreV9yZWx1KCkgJT4lIGxheWVyX2Ryb3BvdXQocmF0ZT0wLjEpICU+JQogIGxheWVyX2RlbnNlKHVuaXRzID0gMiwgICBuYW1lID0gImJvdHRsZW5lY2siKSAlPiUgbGF5ZXJfYWN0aXZhdGlvbl9sZWFreV9yZWx1KCkgJT4lCiAgbGF5ZXJfZGVuc2UodW5pdHMgPSAxNSkgJT4lIGxheWVyX2FjdGl2YXRpb25fcmVsdSgpICU+JQogIGxheWVyX2RlbnNlKHVuaXRzID0gMzAwLCBhY3RpdmF0aW9uPSdoYXJkX3NpZ21vaWQnKSAlPiUKICBsYXllcl9kZW5zZSh1bml0cyA9IGlucHV0X3NpemUsIGFjdGl2YXRpb249J3NpZ21vaWQnKQoKCm1vZGVsICU+JSBjb21waWxlKGxvc3M9J21lYW5fc3F1YXJlZF9sb2dhcml0aG1pY19lcnJvcicsIG1ldHJpY3M9J21hZScsIG9wdGltaXplciA9ICdhZGFtJykKYGBgCgoKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KbW9kZWwgJT4lIGZpdCh4X3RyYWluLCB4X3RyYWluLCBzaHVmZmxlPVRSVUUsIGVwb2Nocz0xNTAsIGJhdGNoX3NpemU9MjAwLCB2YWxpZGF0aW9uX2RhdGE9bGlzdCh4X3Rlc3QsIHhfdGVzdCkgKQpgYGAKCmBgYHtyfQplbmNvZGVyIDwtIGtlcmFzX21vZGVsKGlucHV0cz1tb2RlbCRpbnB1dCxvdXRwdXRzPWdldF9sYXllcihtb2RlbCwnYm90dGxlbmVjaycpJG91dHB1dCkKZW1iMiA8LSBwcmVkaWN0KGVuY29kZXIseCkKcm93bmFtZXMoZW1iMikgPC0gcm93bmFtZXMoeCk7CmBgYAoKYGBge3IgZmlnLndpZHRoPTQsIGZpZy5oZWlnaHQ9NH0KYTIgPC0gc2Njb3JlOjplbWJlZGRpbmdQbG90KGVtYjIsZ3JvdXBzPWFubixhbHBoYT0wLjQsc2l6ZT0wLjIsIGZvbnQuc2l6ZT1jKDMsNCkscGxvdC50aGVtZT10aGVtZV9idygpKQphMgpgYGAKCiMgUG9pc3NvbiBhdXRvZW5jb2RlcgpgYGB7cn0Kb2RnZW5lcyA8LSByb3duYW1lcyhwMiRtaXNjW1sidmFyaW5mbyJdXSlbKG9yZGVyKHAyJG1pc2NbWyJ2YXJpbmZvIl1dJGxwLCBkZWNyZWFzaW5nID0gRkFMU0UpWzE6MmUzXSldCnggPC0gcDIkbWlzYyRyYXdDb3VudHNbLG9kZ2VuZXNdCiN4QHggPC0geEB4ICogcmVwKHAyJG1pc2NbWyJ2YXJpbmZvIl1dW2NvbG5hbWVzKHgpLCAiZ3NmIl0sIGRpZmYoeEBwKSkKCnggPC0geC9yb3dTdW1zKHgpKjVlMwoKc2V0LnNlZWQoMCkKeF90ZXN0IDwtIHhbc2FtcGxlKDE6bnJvdyh4KSwxZTMpLF0KeF90cmFpbiA8LSB4W3NhbXBsZSgxOm5yb3coeCksMWUzKSxdCmlucHV0X3NpemUgPC0gbmNvbCh4X3RyYWluKQpgYGAKCmBgYHtyfQptb2RlbCA9IGtlcmFzX21vZGVsX3NlcXVlbnRpYWwoKSAKbW9kZWwgJT4lCiAgbGF5ZXJfZGVuc2UodW5pdHMgPSAyMCwgaW5wdXRfc2hhcGUgPSBjKGlucHV0X3NpemUpKSAlPiUgIGxheWVyX2FjdGl2YXRpb25fbGVha3lfcmVsdSgpICU+JSBsYXllcl9kcm9wb3V0KHJhdGU9MC4xKSAlPiUKICBsYXllcl9kZW5zZSh1bml0cyA9IDIsICAgbmFtZSA9ICJib3R0bGVuZWNrIikgJT4lIGxheWVyX2FjdGl2YXRpb25fbGVha3lfcmVsdSgpICU+JSAKICBsYXllcl9kZW5zZSh1bml0cyA9IDIwKSAlPiUgbGF5ZXJfYWN0aXZhdGlvbl9yZWx1KCkgJT4lCiAgbGF5ZXJfZGVuc2UodW5pdHMgPSBpbnB1dF9zaXplLCBhY3RpdmF0aW9uPSdzaWdtb2lkJykKCgptb2RlbCAlPiUgY29tcGlsZShsb3NzPSdwb2lzc29uJywgbWV0cmljcz0nbWFlJywgb3B0aW1pemVyID0gJ2FkYW0nKQpgYGAKCmBgYHtyfQptb2RlbCAlPiUgZml0KHhfdHJhaW4sIHhfdHJhaW4sIHNodWZmbGU9VFJVRSwgZXBvY2hzPTE1MCwgYmF0Y2hfc2l6ZT0yMDAsIHZhbGlkYXRpb25fZGF0YT1saXN0KHhfdGVzdCwgeF90ZXN0KSApCgplbmNvZGVyIDwtIGtlcmFzX21vZGVsKGlucHV0cz1tb2RlbCRpbnB1dCxvdXRwdXRzPWdldF9sYXllcihtb2RlbCwnYm90dGxlbmVjaycpJG91dHB1dCkKZW1iMyA8LSBwcmVkaWN0KGVuY29kZXIseCkKcm93bmFtZXMoZW1iMykgPC0gcm93bmFtZXMoeCk7CmBgYAoKYGBge3IgZmlnLndpZHRoPTQsIGZpZy5oZWlnaHQ9NH0KYTMgPC0gc2Njb3JlOjplbWJlZGRpbmdQbG90KGVtYjMsZ3JvdXBzPWFubixhbHBoYT0wLjQsc2l6ZT0wLjIsIGZvbnQuc2l6ZT1jKDMsNCkscGxvdC50aGVtZT10aGVtZV9idygpKQphMwpgYGAKCgojIExlYXJuaW5nIGVtYmVkZGluZwoKYGBge3J9CiMgZW1iZWRkaW5nCmVtYiA8LSBwMiRlbWJlZGRpbmdzJFBDQSR0U05FCiMgbm9ybWFsaXplIHRvIGJlIHdpdGhpbiAoMCwxKSByYW5nZQplbWIgPC0gYXBwbHkoZW1iLDIsZnVuY3Rpb24oeCkgKHgtbWluKHgpKS9kaWZmKHJhbmdlKHgpKSkKIyBtb2xlY3VsYXIgZGF0YSAtIHVzZSB2YXJpYW5jZS1ub3JtYWxpemVkIGxvZyBzY2FsZSBjb3VudHMKb2RnZW5lcyA8LSByb3duYW1lcyhwMiRtaXNjW1sidmFyaW5mbyJdXSlbKG9yZGVyKHAyJG1pc2NbWyJ2YXJpbmZvIl1dJGxwLCBkZWNyZWFzaW5nID0gRkFMU0UpWzE6MmUzXSldCnggPC0gcDIkY291bnRzWyxvZGdlbmVzXQp4QHggPC0geEB4ICogcmVwKHAyJG1pc2NbWyJ2YXJpbmZvIl1dW2NvbG5hbWVzKHgpLCAiZ3NmIl0sIGRpZmYoeEBwKSkKeCA8LSB4W3Jvd25hbWVzKHgpICVpbiUgcm93bmFtZXMoZW1iKSwgXQoKCnNldC5zZWVkKDApCnhfdGVzdCA8LSB4W3NhbXBsZSgxOm5yb3coeCksM2UzKSxdCnhfdHJhaW4gPC0geFtzYW1wbGUoMTpucm93KHgpLDNlMyksXQppbnB1dF9zaXplIDwtIG5jb2woeF90ZXN0KQpgYGAKCmBgYHtyfQptb2RlbCA9IGtlcmFzX21vZGVsX3NlcXVlbnRpYWwoKSAKbW9kZWwgJT4lCiAgbGF5ZXJfZGVuc2UodW5pdHMgPSAyNTYsIGlucHV0X3NoYXBlID0gYyhpbnB1dF9zaXplKSkgJT4lICBsYXllcl9hY3RpdmF0aW9uX2xlYWt5X3JlbHUoKSAlPiUgbGF5ZXJfZHJvcG91dChyYXRlPTAuMSkgJT4lCiAgbGF5ZXJfZGVuc2UodW5pdHMgPSA1MTIsIGFjdGl2YXRpb249J3JlbHUnKSAlPiUKICBsYXllcl9kZW5zZSh1bml0cyA9IDI1NiwgYWN0aXZhdGlvbj0ncmVsdScpICU+JQogIGxheWVyX2RlbnNlKHVuaXRzID0gMiwgICBuYW1lID0gImJvdHRsZW5lY2siLCBhY3RpdmF0aW9uPSdzaWdtb2lkJykKCgoKbW9kZWwgJT4lIGNvbXBpbGUobG9zcz0nbWVhbl9hYnNvbHV0ZV9lcnJvcicsIG1ldHJpY3M9J21hZScsIG9wdGltaXplciA9ICdhZGFtJykKCm1vZGVsICU+JSBmaXQoeF90cmFpbiwgZW1iW3Jvd25hbWVzKHhfdHJhaW4pLF0sIHNodWZmbGU9VFJVRSwgZXBvY2hzPTIwMCwgYmF0Y2hfc2l6ZT0yMDAsIHZhbGlkYXRpb25fZGF0YT1saXN0KHhfdGVzdCwgZW1iW3Jvd25hbWVzKHhfdGVzdCksXSkgKQoKZW1iNCA8LSBwcmVkaWN0KG1vZGVsLHgpCnJvd25hbWVzKGVtYjQpIDwtIHJvd25hbWVzKHgpOwpgYGAKCmBgYHtyIGZpZy53aWR0aD04LCBmaWcuaGVpZ2h0PTR9CnZjIDwtIHNldGRpZmYocm93bmFtZXMoZW1iKSxyb3duYW1lcyh4X3Rlc3QpKQpwMSA8LSBzY2NvcmU6OmVtYmVkZGluZ1Bsb3QoZW1iNFt2YyxdLGdyb3Vwcz1hbm4sYWxwaGE9MC40LHNpemU9MC4yLCBmb250LnNpemU9YygzLDQpLHBsb3QudGhlbWU9dGhlbWVfYncoKSkKcDIgPC0gc2Njb3JlOjplbWJlZGRpbmdQbG90KGVtYlt2YyxdLGdyb3Vwcz1hbm4sYWxwaGE9MC40LHNpemU9MC4yLCBmb250LnNpemU9YygzLDQpLHBsb3QudGhlbWU9dGhlbWVfYncoKSkKcGxvdF9ncmlkKHBsb3RsaXN0PWxpc3QocDEscDIpLG5yb3c9MSkKYGBgCgpgYGB7cn0Kc2F2ZVJEUyhsaXN0KGVtYjIsZW1iMyxlbWI0LGVtYiksZmlsZT0nYXV0by5lbWIucmRzJykKYGBgCgo=