In this vignette we will analyse a single-cell co-assay dataset measuring gene expression and DNA accessibility in the same cells. This vignette is similar to the PBMC multiomic vignette, but demonstrates a similar joint analysis in a different species and with data gathered using a different technology.

This dataset was published by Chen, Lake, and Zhang (2019) and uses a technology called SNARE-seq. We will look at a dataset from the adult mouse brain, and the raw data can be downloaded from NCBI GEO here: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE126074

As the fragment files for this dataset are not publicly available we have re-mapped the raw data to the mm10 genome and created a fragment file using Sinto.

The fragment file can be downloaded here: https://www.dropbox.com/s/se3uag0cvld4xqg/fragments.sort.bed.gz

The fragment file index can be downloaded here: https://www.dropbox.com/s/kvb8fiewkjz3kzf/fragments.sort.bed.gz.tbi

Code used to create the fragment file from raw data is available here: https://github.com/timoast/SNARE-seq

Data loading

First we create a Seurat object containing two different assays, one containing the gene expression data and one containing the DNA accessibility data.

To load the count data, we can use the Read10X() function from Seurat by first placing the barcodes.tsv.gz, matrix.mtx.gz, and features.tsv.gz files into a separate folder.

library(Signac)
library(Seurat)
library(ggplot2)
library(EnsDb.Mmusculus.v79)
set.seed(1234)

# load processed data matrices for each assay
rna <- Read10X("../vignette_data/snare-seq/GSE126074_AdBrainCortex_rna/", gene.column = 1)
atac <- Read10X("../vignette_data/snare-seq/GSE126074_AdBrainCortex_atac/", gene.column = 1)
fragments <- "../vignette_data/snare-seq/fragments.sort.bed.gz"

# create a Seurat object and add the assays
snare <- CreateSeuratObject(counts = rna)
snare[['ATAC']] <- CreateChromatinAssay(
  counts = atac,
  sep = c(":", "-"),
  genome = "mm10",
  fragments = fragments
)

# extract gene annotations from EnsDb
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Mmusculus.v79)

# change to UCSC style since the data was mapped to hg19
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- "mm10"

# add the gene information to the object
Annotation(snare[["ATAC"]]) <- annotations

Quality control

DefaultAssay(snare) <- "ATAC"
snare <- TSSEnrichment(snare)
snare <- NucleosomeSignal(snare)
snare$blacklist_fraction <- FractionCountsInRegion(
  object = snare,
  assay = 'ATAC',
  regions = blacklist_mm10
)
Idents(snare) <- "all"  # group all cells together, rather than by replicate
VlnPlot(
  snare,
  features = c("nCount_RNA", "nCount_ATAC", "TSS.enrichment",
               "nucleosome_signal", "blacklist_fraction"),
  pt.size = 0.1,
  ncol = 5
)

snare <- subset(
  x = snare,
  subset = blacklist_fraction < 0.03 &
    TSS.enrichment < 20 &
    nCount_RNA > 800 &
    nCount_ATAC > 500
)
snare
## An object of class Seurat 
## 277704 features across 8055 samples within 2 assays 
## Active assay: ATAC (244544 features, 0 variable features)
##  1 other assay present: RNA

Gene expression data processing

Process gene expression data using Seurat

DefaultAssay(snare) <- "RNA"

snare <- FindVariableFeatures(snare, nfeatures = 3000)
snare <- NormalizeData(snare)
snare <- ScaleData(snare)
snare <- RunPCA(snare, npcs = 30)
snare <- RunUMAP(snare, dims = 1:30, reduction.name = "umap.rna")
snare <- FindNeighbors(snare, dims = 1:30)
snare <- FindClusters(snare, resolution = 0.5, algorithm = 3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8055
## Number of edges: 324904
## 
## Running smart local moving algorithm...
## Maximum modularity in 10 random starts: 0.8902
## Number of communities: 14
## Elapsed time: 4 seconds
p1 <- DimPlot(snare, label = TRUE) + NoLegend() + ggtitle("RNA UMAP")

DNA accessibility data processing

Process the DNA accessibility data using Signac

DefaultAssay(snare) <- 'ATAC'

snare <- FindTopFeatures(snare, min.cutoff = 10)
snare <- RunTFIDF(snare)
snare <- RunSVD(snare)
snare <- RunUMAP(snare, reduction = 'lsi', dims = 2:30, reduction.name = 'umap.atac')
p2 <- DimPlot(snare, reduction = 'umap.atac', label = TRUE) + NoLegend() + ggtitle("ATAC UMAP")
p1 + p2

Integration with scRNA-seq data

Next we can annotate cell types in the dataset by transferring labels from an existing scRNA-seq dataset for the adult mouse brain, produced by the Allen Institute.

# label transfer from Allen brain
allen <- readRDS("../vignette_data/allen_brain.rds")

# use the RNA assay in the SNARE-seq data for integration with scRNA-seq
DefaultAssay(snare) <- 'RNA'

transfer.anchors <- FindTransferAnchors(
  reference = allen,
  query = snare,
  dims = 1:30,
  reduction = 'cca'
)

predicted.labels <- TransferData(
  anchorset = transfer.anchors,
  refdata = allen$subclass,
  weight.reduction = snare[['pca']],
  dims = 1:30
)

snare <- AddMetaData(object = snare, metadata = predicted.labels)
# label clusters based on predicted ID
new.cluster.ids <- c(
  "L2/3 IT",
  "L4",
  "L6 IT",
  "L5 CT",
  "L4",
  "L5 PT",
  "Pvalb",
  "Sst",
  "Astro",
  "Oligo",
  "Vip/Lamp5",
  "L6 IT.2",
  "L6b",
  "NP"
)
names(x = new.cluster.ids) <- levels(x = snare)
snare <- RenameIdents(object = snare, new.cluster.ids)
snare$celltype <- Idents(snare)
DimPlot(snare, group.by = 'celltype', label = TRUE, reduction = 'umap.rna')

Jointly visualizing gene expression and DNA accessibility

We can visualize both the gene expression and DNA accessibility information at the same time using the CoveragePlot() function. This makes it easy to compare DNA accessibility in a given region for different cell types and overlay gene expression information for different genes.

DefaultAssay(snare) <- "ATAC"
CoveragePlot(snare, region = "chr2-22620000-22660000", features = "Gad2")

Session Info

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] EnsDb.Mmusculus.v79_2.99.0 ensembldb_2.16.4          
##  [3] AnnotationFilter_1.16.0    GenomicFeatures_1.44.2    
##  [5] AnnotationDbi_1.54.1       Biobase_2.52.0            
##  [7] GenomicRanges_1.44.0       GenomeInfoDb_1.28.4       
##  [9] IRanges_2.26.0             S4Vectors_0.30.0          
## [11] BiocGenerics_0.38.0        ggplot2_3.3.5             
## [13] SeuratObject_4.0.2         Seurat_4.0.4              
## [15] Signac_1.4.0              
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2                  reticulate_1.22            
##   [3] tidyselect_1.1.1            RSQLite_2.2.8              
##   [5] htmlwidgets_1.5.4           docopt_0.7.1               
##   [7] grid_4.1.0                  BiocParallel_1.26.2        
##   [9] Rtsne_0.15                  munsell_0.5.0              
##  [11] codetools_0.2-18            ragg_1.1.3                 
##  [13] ica_1.0-2                   future_1.22.1              
##  [15] miniUI_0.1.1.1              withr_2.4.2                
##  [17] colorspace_2.0-2            filelock_1.0.2             
##  [19] highr_0.9                   knitr_1.34                 
##  [21] rstudioapi_0.13             ROCR_1.0-11                
##  [23] tensor_1.5                  listenv_0.8.0              
##  [25] labeling_0.4.2              MatrixGenerics_1.4.3       
##  [27] slam_0.1-48                 GenomeInfoDbData_1.2.6     
##  [29] polyclip_1.10-0             bit64_4.0.5                
##  [31] farver_2.1.0                rprojroot_2.0.2            
##  [33] parallelly_1.28.1           vctrs_0.3.8                
##  [35] generics_0.1.0              xfun_0.26                  
##  [37] biovizBase_1.40.0           BiocFileCache_2.0.0        
##  [39] lsa_0.73.2                  ggseqlogo_0.1              
##  [41] R6_2.5.1                    bitops_1.0-7               
##  [43] spatstat.utils_2.2-0        cachem_1.0.6               
##  [45] DelayedArray_0.18.0         assertthat_0.2.1           
##  [47] promises_1.2.0.1            BiocIO_1.2.0               
##  [49] scales_1.1.1                nnet_7.3-16                
##  [51] gtable_0.3.0                globals_0.14.0             
##  [53] goftest_1.2-2               rlang_0.4.11               
##  [55] systemfonts_1.0.2           RcppRoll_0.3.0             
##  [57] splines_4.1.0               rtracklayer_1.52.1         
##  [59] lazyeval_0.2.2              dichromat_2.0-0            
##  [61] checkmate_2.0.0             spatstat.geom_2.2-2        
##  [63] yaml_2.2.1                  reshape2_1.4.4             
##  [65] abind_1.4-5                 backports_1.2.1            
##  [67] httpuv_1.6.3                Hmisc_4.5-0                
##  [69] tools_4.1.0                 ellipsis_0.3.2             
##  [71] spatstat.core_2.3-0         jquerylib_0.1.4            
##  [73] RColorBrewer_1.1-2          ggridges_0.5.3             
##  [75] Rcpp_1.0.7                  plyr_1.8.6                 
##  [77] base64enc_0.1-3             progress_1.2.2             
##  [79] zlibbioc_1.38.0             purrr_0.3.4                
##  [81] RCurl_1.98-1.5              prettyunits_1.1.1          
##  [83] rpart_4.1-15                deldir_0.2-10              
##  [85] pbapply_1.5-0               cowplot_1.1.1              
##  [87] zoo_1.8-9                   SummarizedExperiment_1.22.0
##  [89] ggrepel_0.9.1               cluster_2.1.2              
##  [91] fs_1.5.0                    magrittr_2.0.1             
##  [93] RSpectra_0.16-0             data.table_1.14.0          
##  [95] scattermore_0.7             lmtest_0.9-38              
##  [97] RANN_2.6.1                  SnowballC_0.7.0            
##  [99] ProtGenerics_1.24.0         fitdistrplus_1.1-5         
## [101] matrixStats_0.61.0          hms_1.1.0                  
## [103] patchwork_1.1.1             mime_0.11                  
## [105] evaluate_0.14               xtable_1.8-4               
## [107] XML_3.99-0.8                jpeg_0.1-9                 
## [109] sparsesvd_0.2               gridExtra_2.3              
## [111] compiler_4.1.0              biomaRt_2.48.3             
## [113] tibble_3.1.4                KernSmooth_2.23-20         
## [115] crayon_1.4.1                htmltools_0.5.2            
## [117] mgcv_1.8-36                 later_1.3.0                
## [119] Formula_1.2-4               tidyr_1.1.3                
## [121] DBI_1.1.1                   tweenr_1.0.2               
## [123] dbplyr_2.1.1                MASS_7.3-54                
## [125] rappdirs_0.3.3              Matrix_1.3-4               
## [127] igraph_1.2.6                pkgconfig_2.0.3            
## [129] pkgdown_1.6.1.9001          GenomicAlignments_1.28.0   
## [131] foreign_0.8-81              plotly_4.9.4.1             
## [133] spatstat.sparse_2.0-0       xml2_1.3.2                 
## [135] bslib_0.3.0                 XVector_0.32.0             
## [137] VariantAnnotation_1.38.0    stringr_1.4.0              
## [139] digest_0.6.27               sctransform_0.3.2          
## [141] RcppAnnoy_0.0.19            spatstat.data_2.1-0        
## [143] Biostrings_2.60.2           rmarkdown_2.11             
## [145] leiden_0.3.9                fastmatch_1.1-3            
## [147] htmlTable_2.2.1             uwot_0.1.10                
## [149] restfulr_0.0.13             curl_4.3.2                 
## [151] shiny_1.6.0                 Rsamtools_2.8.0            
## [153] rjson_0.2.20                lifecycle_1.0.0            
## [155] nlme_3.1-152                jsonlite_1.7.2             
## [157] BSgenome_1.60.0             desc_1.3.0                 
## [159] viridisLite_0.4.0           fansi_0.5.0                
## [161] pillar_1.6.2                lattice_0.20-44            
## [163] KEGGREST_1.32.0             fastmap_1.1.0              
## [165] httr_1.4.2                  survival_3.2-11            
## [167] glue_1.4.2                  qlcMatrix_0.9.7            
## [169] png_0.1-7                   bit_4.0.4                  
## [171] ggforce_0.3.3               stringi_1.7.4              
## [173] sass_0.4.0                  blob_1.2.2                 
## [175] textshaping_0.3.5           latticeExtra_0.6-29        
## [177] memoise_2.0.0               dplyr_1.0.7                
## [179] irlba_2.3.3                 future.apply_1.8.1