Setup Seurat Object

# Load in dataset from Pollen et al. 2014
nbt.data=read.table("~/seurat_files/HiSeq301-RSEM-linear values.txt",sep="\t",header=TRUE,row.names=1)

# transform data to log scale
nbt.data=log(nbt.data+1)

# Look at the transformed data matrix
corner(nbt.data)
##          Hi_2338_1  Hi_2338_2 Hi_2338_3 Hi_2338_4 Hi_2338_5
## A1BG      2.310553 0.00000000  0.000000 1.0116009         0
## A1BG-AS1  0.000000 0.00000000  1.497388 0.3074847         0
## A1CF      0.000000 0.04879016  0.000000 0.0000000         0
## A2LD1     0.000000 0.00000000  0.000000 0.2546422         0
## A2M       0.000000 0.00000000  0.000000 0.0000000         0
dim(nbt.data)
## [1] 23730   301
nbt=new("seurat",raw.data=nbt.data)

# Take all genes in > 3 cells, all cells with > 1k genes, use an expression threshold of 1
# Cell type is encoded in the second _ field, will be stored in nbt@ident and also placed in the "orig.ident" field of object@data.info
nbt=setup(nbt,project="NBT",min.cells = 3,names.field = 2,names.delim = "_",min.genes = 1000,is.expr=1,)

nbt
## An object of class seurat in project NBT
##  16842 genes across 301 samples.

Basic exploration of data

# Look at some canonical marker genes and metrics
vlnPlot(nbt,c("DPPA4","GATA1","BMP3","nGene"))