Quality control on Chromium CRISPR Guide Capture libraries
Kayla Jackson and A. Sina Booeshaghi
2024-05-17
Source:vignettes/vig12_crispr.Rmd
vig12_crispr.Rmd
Introduction
The data in this vignette is shipped with the cellatlas
repository. The count matrix and metadata are provided in the
cellatlas/examples
folder as an AnnData
object. We will begin by loading the object and converting it to a
SingleCellExperiment
object.
library(stringr)
library(Matrix)
library(DropletUtils)
library(SpatialExperiment)
library(SpatialFeatureExperiment)
library(scater)
library(scuttle)
library(Voyager)
library(ggplot2)
theme_set(theme_bw())
if (!file.exists("10xcrispr.rds"))
download.file("https://github.com/pachterlab/voyager/raw/documentation-devel/vignettes/10xcrispr.rds", destfile = "10xcrispr.rds")
sce <- readRDS("10xcrispr.rds")
is_mito <- str_detect(rowData(sce)$gene_name, regex("^mt-", ignore_case=TRUE))
sum(is_mito)
#> [1] 0
sce <- addPerCellQCMetrics(sce, subsets = list(mito = is_mito))
names(colData(sce))
#> [1] "sum" "detected" "subsets_mito_sum"
#> [4] "subsets_mito_detected" "subsets_mito_percent" "total"
plotColData(sce, "sum") +
plotColData(sce, "detected")
plotColData(sce, x = "sum", y = "detected", bins = 100) +
scale_fill_distiller(palette = "Blues", direction = 1)
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.
plotColData(sce, x = "sum", y = "subsets_mito_detected", bins = 100) +
scale_fill_distiller(palette = "Blues", direction = 1)
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.
#> Warning: Computation failed in `stat_bin2d()`.
#> Caused by error in `bin2d_breaks()`:
#> ! `origin` must be a number, not `NaN`.
bcrank <- barcodeRanks(counts(sce))
knee <- metadata(bcrank)$knee
inflection <- metadata(bcrank)$inflection
plot(bcrank$rank, bcrank$total, log="xy",
xlab="Rank", ylab="Total ClickTags count", cex.lab=1.2)
#> Warning in xy.coords(x, y, xlabel, ylabel, log): 3 y values <= 0 omitted from
#> logarithmic plot
sce <- sce[, which(sce$total > inflection)]
sce <- sce[rowSums(counts(sce)) > 0,]
sce
#> class: SingleCellExperiment
#> dim: 89 293
#> metadata(0):
#> assays(1): counts
#> rownames(89): Non-Targeting-5 Non-Targeting-7 ... HDAC1-1 HDAC1-2
#> rowData names(1): feature_name
#> colnames(293): AAAGAACAGAAACGAA AAAGAACGTTTGTCGA ... TTTGATCCAGGAGAAA
#> TTTGATCGTGGTAGTG
#> colData names(6): sum detected ... subsets_mito_percent total
#> reducedDimNames(0):
#> mainExpName: NULL
#> altExpNames(0):
sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: x86_64-apple-darwin20
#> Running under: macOS Ventura 13.6.6
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> time zone: UTC
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] Voyager_1.6.0 scater_1.32.0
#> [3] ggplot2_3.5.1 scuttle_1.14.0
#> [5] SpatialFeatureExperiment_1.6.1 SpatialExperiment_1.14.0
#> [7] DropletUtils_1.24.0 SingleCellExperiment_1.26.0
#> [9] SummarizedExperiment_1.34.0 Biobase_2.64.0
#> [11] GenomicRanges_1.56.0 GenomeInfoDb_1.40.0
#> [13] IRanges_2.38.0 S4Vectors_0.42.0
#> [15] BiocGenerics_0.50.0 MatrixGenerics_1.16.0
#> [17] matrixStats_1.3.0 Matrix_1.7-0
#> [19] stringr_1.5.1
#>
#> loaded via a namespace (and not attached):
#> [1] RColorBrewer_1.1-3 jsonlite_1.8.8
#> [3] wk_0.9.1 magrittr_2.0.3
#> [5] ggbeeswarm_0.7.2 magick_2.8.3
#> [7] farver_2.1.2 rmarkdown_2.27
#> [9] fs_1.6.4 zlibbioc_1.50.0
#> [11] ragg_1.3.2 vctrs_0.6.5
#> [13] spdep_1.3-3 memoise_2.0.1
#> [15] DelayedMatrixStats_1.26.0 RCurl_1.98-1.14
#> [17] terra_1.7-71 htmltools_0.5.8.1
#> [19] S4Arrays_1.4.0 BiocNeighbors_1.22.0
#> [21] Rhdf5lib_1.26.0 s2_1.1.6
#> [23] SparseArray_1.4.3 rhdf5_2.48.0
#> [25] sass_0.4.9 spData_2.3.0
#> [27] KernSmooth_2.23-24 bslib_0.7.0
#> [29] htmlwidgets_1.6.4 desc_1.4.3
#> [31] cachem_1.1.0 igraph_2.0.3
#> [33] lifecycle_1.0.4 pkgconfig_2.0.3
#> [35] rsvd_1.0.5 R6_2.5.1
#> [37] fastmap_1.2.0 GenomeInfoDbData_1.2.12
#> [39] digest_0.6.35 ggnewscale_0.4.10
#> [41] colorspace_2.1-0 patchwork_1.2.0
#> [43] RSpectra_0.16-1 dqrng_0.4.0
#> [45] irlba_2.3.5.1 textshaping_0.3.7
#> [47] beachmat_2.20.0 labeling_0.4.3
#> [49] fansi_1.0.6 httr_1.4.7
#> [51] abind_1.4-5 compiler_4.4.0
#> [53] proxy_0.4-27 withr_3.0.0
#> [55] tiff_0.1-12 BiocParallel_1.38.0
#> [57] viridis_0.6.5 DBI_1.2.2
#> [59] highr_0.10 HDF5Array_1.32.0
#> [61] R.utils_2.12.3 DelayedArray_0.30.1
#> [63] bluster_1.14.0 rjson_0.2.21
#> [65] classInt_0.4-10 tools_4.4.0
#> [67] units_0.8-5 vipor_0.4.7
#> [69] beeswarm_0.4.0 R.oo_1.26.0
#> [71] glue_1.7.0 EBImage_4.46.0
#> [73] rhdf5filters_1.16.0 grid_4.4.0
#> [75] sf_1.0-16 cluster_2.1.6
#> [77] memuse_4.2-3 generics_0.1.3
#> [79] gtable_0.3.5 R.methodsS3_1.8.2
#> [81] class_7.3-22 data.table_1.15.4
#> [83] BiocSingular_1.20.0 ScaledMatrix_1.12.0
#> [85] sp_2.1-4 utf8_1.2.4
#> [87] XVector_0.44.0 ggrepel_0.9.5
#> [89] pillar_1.9.0 limma_3.60.0
#> [91] dplyr_1.1.4 lattice_0.22-6
#> [93] deldir_2.0-4 tidyselect_1.2.1
#> [95] locfit_1.5-9.9 sfheaders_0.4.4
#> [97] knitr_1.46 gridExtra_2.3
#> [99] edgeR_4.2.0 xfun_0.44
#> [101] statmod_1.5.0 stringi_1.8.4
#> [103] UCSC.utils_1.0.0 fftwtools_0.9-11
#> [105] yaml_2.3.8 boot_1.3-30
#> [107] evaluate_0.23 codetools_0.2-20
#> [109] tibble_3.2.1 cli_3.6.2
#> [111] systemfonts_1.1.0 munsell_0.5.1
#> [113] jquerylib_0.1.4 Rcpp_1.0.12
#> [115] zeallot_0.1.0 png_0.1-8
#> [117] parallel_4.4.0 pkgdown_2.0.9
#> [119] jpeg_0.1-10 sparseMatrixStats_1.16.0
#> [121] bitops_1.0-7 viridisLite_0.4.2
#> [123] scales_1.3.0 e1071_1.7-14
#> [125] purrr_1.0.2 crayon_1.5.2
#> [127] scico_1.5.0 rlang_1.1.3
#> [129] cowplot_1.1.3