This vignette demonstrates some useful features for interacting with the Seurat object. For demonstration purposes, we will be using the 2,700 PBMC object that is created in the first guided tutorial. You can download the pre-computed object here. To simulate the scenario where we have two replicates, we will randomly assign half the cells in each cluster to be from “rep1” and other half from “rep2”.
library(Seurat)
library(dplyr)
pbmc <- readRDS(file = "~/Projects/datasets/pbmc3k_final.rds")
# pretend that cells were originally assigned to one of two replicates (we
# assign randomly here) if your cells do belong to multiple replicates, and
# you want to add this info to the Seurat object create a data frame with
# this information (similar to replicate.info below)
set.seed(seed = 42)
replicate.info <- data.frame(replicate = sample(x = c("rep1", "rep2"), size = length(x = pbmc@cell.names),
replace = TRUE), row.names = pbmc@cell.names)
pbmc <- AddMetaData(object = pbmc, metadata = replicate.info)
# Plot tSNE, coloring cells by cell type (currently stored in object@ident)
TSNEPlot(object = pbmc)
# How do I create a tSNE plot where cells are colored by replicate? First,
# store the current identities in a new column of meta.data called CellType
pbmc <- StashIdent(object = pbmc, save.name = "CellType")
# Next, switch the identity class of all cells to reflect replicate ID
pbmc <- SetAllIdent(object = pbmc, id = "replicate")
TSNEPlot(object = pbmc)
# alternately : TSNEPlot(pbmc, group.by = 'replicate') you can pass the
# pt.shape to label points by both replicate and cell type
# Switch back to cell type labels
pbmc <- SetAllIdent(object = pbmc, id = "CellType")
# How many cells are in each cluster
table(pbmc@ident)
##
## B cells CD4 T cells CD8 T cells CD14+ Monocytes
## 342 1151 308 479
## Dendritic cells FCGR3A+ Monocytes Megakaryocytes NK cells
## 32 157 14 155
# How many cells are in each replicate?
table(pbmc@meta.data$replicate)
##
## rep1 rep2
## 1327 1311
# restore cluster ID
pbmc <- SetAllIdent(object = pbmc, id = "CellType")
# What proportion of cells are in each cluster?
prop.table(x = table(pbmc@ident))
##
## B cells CD4 T cells CD8 T cells CD14+ Monocytes
## 0.129643669 0.436315390 0.116755118 0.181576952
## Dendritic cells FCGR3A+ Monocytes Megakaryocytes NK cells
## 0.012130402 0.059514784 0.005307051 0.058756634
# How does cluster membership vary by replicate?
table(pbmc@ident, pbmc@meta.data$replicate)
##
## rep1 rep2
## B cells 164 178
## CD4 T cells 563 588
## CD8 T cells 176 132
## CD14+ Monocytes 240 239
## Dendritic cells 19 13
## FCGR3A+ Monocytes 80 77
## Megakaryocytes 4 10
## NK cells 81 74
prop.table(x = table(pbmc@ident, pbmc@meta.data$replicate), margin = 2)
##
## rep1 rep2
## B cells 0.123587038 0.135774218
## CD4 T cells 0.424265260 0.448512586
## CD8 T cells 0.132629992 0.100686499
## CD14+ Monocytes 0.180859081 0.182303585
## Dendritic cells 0.014318011 0.009916095
## FCGR3A+ Monocytes 0.060286360 0.058733791
## Megakaryocytes 0.003014318 0.007627765
## NK cells 0.061039940 0.056445461
# What are the cell names of all NK cells?
WhichCells(object = pbmc, ident = "NK cells")
## [1] "AAACCGTGTATGCG" "AAATTCGATTCTCA" "AACCTTACGCGAGA" "AACGCCCTCGTACA"
## [5] "AACGTCGAGTATCG" "AAGATTACCTCAAG" "AAGCAAGAGGTGTT" "AAGTAGGATACAGC"
## [9] "AATACTGAATTGGC" "AATCCTTGGTGAGG" "AATCTCTGCTTTAC" "ACAAATTGTTGCGA"
## [13] "ACAACCGAGGGATG" "ACAATTGATGACTG" "ACACCCTGGTGTTG" "ACAGGTACTGGTGT"
## [17] "ACCTGGCTAAGTAG" "ACGAACACCTTGTT" "ACGATCGAGGACTT" "ACGCAATGGTTCAG"
## [21] "ACGCTGCTGTTCTT" "ACGGAACTCAGATC" "ACGTGATGTGACAC" "ACGTTGGAGCCAAT"
## [25] "ACTGCCACTCCGTC" "ACTGGCCTTCAGTG" "ACTTCAACGTAGGG" "AGAACAGAAATGCC"
## [29] "AGATATACCCGTAA" "AGATTCCTGTTCAG" "AGCCTCTGCCAATG" "AGCGATTGAGATCC"
## [33] "AGGATGCTTTAGGC" "AGGGACGAGTCAAC" "AGTAATACATCACG" "AGTCACGATGAGCT"
## [37] "AGTTTGCTACTGGT" "ATACCACTGCCAAT" "ATACTCTGGTATGC" "ATCCCGTGCAGTCA"
## [41] "ATCTTTCTTGTCCC" "ATGAAGGACTTGCC" "ATGATAACTTCACT" "ATGATATGGTGCTA"
## [45] "ATGGACACGCATCA" "ATGGGTACATCGGT" "ATTAACGATGAGAA" "ATTCCAACTTAGGC"
## [49] "CAAGGTTGTCTGGA" "CAATCTACTGACTG" "CACCACTGGCGAAG" "CACGGGTGGAGGAC"
## [53] "CAGATGACATTCTC" "CAGCAATGGAGGGT" "CAGCGGACCTTTAC" "CAGCTCTGTGTGGT"
## [57] "CAGTTTACACACGT" "CATCAGGACTTCCG" "CATCAGGATAGCCA" "CATGAGACGTTGAC"
## [61] "CATTACACCAACTG" "CATTTCGAGATACC" "CCTCGAACACTTTC" "CGACCACTAAAGTG"
## [65] "CGACCACTGCCAAT" "CGAGGCTGACGCTA" "CGCCGAGAGCTTAG" "CGGCGAACGACAAA"
## [69] "CGGCGAACTACTTC" "CGGGCATGTCTCTA" "CGTACCTGGCATCA" "CGTGTAGACGATAC"
## [73] "CGTGTAGAGTTACG" "CGTGTAGATTCGGA" "CTAAACCTCTGACA" "CTAACGGAACCGAT"
## [77] "CTACGCACTGGTCA" "CTACTCCTATGTCG" "CTAGTTACGAAACA" "CTATACTGCTACGA"
## [81] "CTATACTGTCTCAT" "CTCGACTGGTTGAC" "CTGAGAACGTAAAG" "CTTTAGTGACGGGA"
## [85] "GAACCAACTTCCGC" "GAAGTGCTAAACGA" "GAATGCACCTTCGC" "GAATTAACGTCGTA"
## [89] "GACGGCACACGGGA" "GAGCGCTGAAGATG" "GAGGTACTGACACT" "GAGGTGGATCCTCG"
## [93] "GATAGAGAAGGGTG" "GATCCCTGACCTTT" "GCACACCTGTGCTA" "GCACCACTTCCTTA"
## [97] "GCACTAGAGTCGTA" "GCAGGGCTATCGAC" "GCCGGAACGTTCTT" "GCCTACACAGTTCG"
## [101] "GCGCATCTTGCTCC" "GCGCGATGGTGCAT" "GGAAGGTGGCGAGA" "GGACGCTGTCCTCG"
## [105] "GGAGGCCTCGTTGA" "GGCAAGGAAAAAGC" "GGCATATGCTTATC" "GGCCGAACTCTAGG"
## [109] "GGCTAAACACCTGA" "GGGTTAACGTGCAT" "GGTGGAGAAACGGG" "GGTGGAGACAGATC"
## [113] "GTAGTGTGAGCGGA" "GTCGACCTGAATGA" "GTGATTCTGGTTCA" "GTTAAAACCGAGAG"
## [117] "GTTCAACTGGGACA" "GTTGACGAGCCCTT" "GTTGACGATATCGG" "TAACTCACTCTACT"
## [121] "TAAGAGGACTTGTT" "TAATGCCTCGTCTC" "TACGCAGAGAATCC" "TACGGCCTGGGACA"
## [125] "TACTCTGAATCGAC" "TACTGTTGAGGCGA" "TAGCATCTCAGCTA" "TAGCCCACAGCTAC"
## [129] "TAGTGGTGAAGTGA" "TAGTTAGAACCACA" "TATCGACTACTAGC" "TATGAATGGAGGAC"
## [133] "TATGGGTGCATCAG" "TATTTCCTGGAGGT" "TCAACACTGTTTGG" "TCAGACGACGTTAG"
## [137] "TCCCGAACACAGTC" "TCCTAAACCGCATA" "TCGATTTGCAGCTA" "TCTAACACCAGTTG"
## [141] "TGATAAACTCCGTC" "TGCACAGACGACAT" "TGCCACTGCGATAC" "TGCTGAGAGAGCAG"
## [145] "TGGAACACAAACAG" "TGGTAGACCCTCAC" "TGTAATGACACAAC" "TGTAATGAGGTAAA"
## [149] "TTACTCGATCTACT" "TTAGTCTGCCAACA" "TTCCAAACTCCCAC" "TTCCCACTTGAGGG"
## [153] "TTCTAGTGGAGAGC" "TTCTGATGGAGACG" "TTGTCATGGACGGA"
# How can I extract expression matrix for all NK cells (perhaps, to load
# into another package)
nk.raw.data <- as.matrix(x = pbmc@raw.data[, WhichCells(object = pbmc, ident = "NK cells")])
# Can I create a Seurat object of just the NK cells and B cells?
nk.subset <- SubsetData(object = pbmc, ident.use = c("NK cells", "B cells"))
# Can I create a Seurat object of all cells except the NK cells and B cells?
other.subset <- SubsetData(object = pbmc, ident.remove = c("NK cells", "B cells"))
# note that if you wish to perform additional rounds of clustering after
# SubsetData we recommend re-running FindVariableGenes() and ScaleData()
# How can I calculate the average expression of all cells within a cluster?
cluster.averages <- AverageExpression(object = pbmc)
## [1] "Finished averaging RNA for cluster B cells"
## [1] "Finished averaging RNA for cluster CD4 T cells"
## [1] "Finished averaging RNA for cluster CD8 T cells"
## [1] "Finished averaging RNA for cluster CD14+ Monocytes"
## [1] "Finished averaging RNA for cluster Dendritic cells"
## [1] "Finished averaging RNA for cluster FCGR3A+ Monocytes"
## [1] "Finished averaging RNA for cluster Megakaryocytes"
## [1] "Finished averaging RNA for cluster NK cells"
head(x = cluster.averages[, 1:5])
## B cells CD4 T cells CD8 T cells CD14+ Monocytes
## AL627309.1 0.00000000 0.006198564 0.01807769 0.04864473
## AP006222.2 0.00000000 0.003443558 0.01048354 0.01090743
## RP11-206L10.2 0.02077107 0.004513297 0.00000000 0.00000000
## RP11-206L10.9 0.00000000 0.000000000 0.00000000 0.01052309
## LINC00115 0.03911281 0.018897675 0.02850651 0.03761574
## NOC2L 0.55560804 0.457757658 0.49811901 0.27827017
## Dendritic cells
## AL627309.1 0.00000000
## AP006222.2 0.00000000
## RP11-206L10.2 0.08462847
## RP11-206L10.9 0.00000000
## LINC00115 0.00000000
## NOC2L 0.27913384
# Return this information as a Seurat object (enables downstream plotting
# and analysis)
cluster.averages <- AverageExpression(object = pbmc, return.seurat = TRUE, show.progress = FALSE)
# How can I plot the average expression of NK cells vs. CD8 T cells? Pass
# do.hover = T for an interactive plot to identify gene outliers
CellPlot(object = cluster.averages, cell1 = "NK cells", cell2 = "CD8 T cells")
# How can I calculate expression averages separately for each replicate?
cluster.averages <- AverageExpression(object = pbmc, return.seurat = TRUE, add.ident = "replicate",
show.progress = FALSE)
CellPlot(object = cluster.averages, cell1 = "CD14+ Monocytes_rep1", cell2 = "CD14+ Monocytes_rep2")
# You can also plot heatmaps of these 'in silico' bulk datasets to visualize
# agreement between replicates
DoHeatmap(object = cluster.averages, genes.use = PCTopGenes(object = pbmc, pc.use = 1,
do.balanced = TRUE), group.label.rot = TRUE, group.cex = 0)