Identify genes to use for building gene expression models

#load previous data object, enables you to start tutorial from Part 2.
load("~/seurat_files/output_part1.Robj")

#recalculate a set of variable genes, now that EVL are removed
zf <- mean.var.plot(zf, y.cutoff = 2, do.plot=FALSE, x.low.cutoff=1, x.high.cutoff=7, fxn.x = expMean, fxn.y=logVarDivMean, set.var.genes = TRUE)
markers.remove=batch.gene(zf,idents.use = c("zf1","zf2","zf3"),genes.use=zf@var.genes)
zf@var.genes=zf@var.genes[!(zf@var.genes%in%markers.remove)]

#redo the PCA on the variable genes.
zf <- pca(zf, do.print = FALSE)

#Run a 'random' PCA 1,000 times - scrambling a random 2.5% of the data each time
#This enables us to identify statistically significant PCs (in this case, 1:3), and genes with significant PC scores
zf <- jackStraw(zf, num.replicate=1000, prop.freq=0.025)
jackStrawPlot(zf)

zf <- project.pca(zf, do.print = FALSE,do.center=FALSE)
genes.sig <- pca.sig.genes(zf,pcs.use = c(1,2,3), pval.cut = 1e-2, use.full = TRUE)

plot.1=pca.plot(zf,do.return = TRUE); plot.2=pca.plot(zf,1,3,do.return = TRUE);
multiplotList(list(plot.1,plot.2),cols = 2)

viz.pca(zf,pcs.use = 1:3,num.genes = 10,nCol = 3)