These functions compute univariate spatial statistics, both global and local,
on matrices, data frames, and SFE objects. For SFE objects, the statistics
can be computed for numeric columns of colData
, colGeometries
,
and annotGeometries
, and the results are stored within the SFE object.
calculateMoransI
and runMoransI
are convenience wrappers for
calculateUnivariate
and runUnivariate
respectively.
Usage
# S4 method for class 'ANY,SFEMethod'
calculateUnivariate(
x,
type,
listw = NULL,
coords_df = NULL,
BPPARAM = SerialParam(),
zero.policy = NULL,
returnDF = TRUE,
p.adjust.method = "BH",
name = NULL,
...
)
# S4 method for class 'ANY,character'
calculateUnivariate(
x,
type,
listw = NULL,
coords_df = NULL,
BPPARAM = SerialParam(),
zero.policy = NULL,
returnDF = TRUE,
p.adjust.method = "BH",
name = NULL,
...
)
# S4 method for class 'SpatialFeatureExperiment,ANY'
calculateUnivariate(
x,
type,
features = NULL,
colGraphName = 1L,
colGeometryName = 1L,
sample_id = "all",
exprs_values = "logcounts",
BPPARAM = SerialParam(),
zero.policy = NULL,
returnDF = TRUE,
include_self = FALSE,
p.adjust.method = "BH",
swap_rownames = NULL,
name = NULL,
...
)
# S4 method for class 'ANY'
calculateMoransI(
x,
...,
BPPARAM = SerialParam(),
zero.policy = NULL,
name = "moran"
)
# S4 method for class 'SpatialFeatureExperiment'
calculateMoransI(
x,
features = NULL,
colGraphName = 1L,
colGeometryName = 1L,
sample_id = "all",
exprs_values = "logcounts",
BPPARAM = SerialParam(),
zero.policy = NULL,
returnDF = TRUE,
include_self = FALSE,
p.adjust.method = "BH",
swap_rownames = NULL,
name = NULL,
...
)
colDataUnivariate(
x,
type,
features,
colGraphName = 1L,
sample_id = "all",
BPPARAM = SerialParam(),
zero.policy = NULL,
include_self = FALSE,
p.adjust.method = "BH",
name = NULL,
...
)
colDataMoransI(
x,
features,
colGraphName = 1L,
sample_id = "all",
BPPARAM = SerialParam(),
zero.policy = NULL,
include_self = FALSE,
p.adjust.method = "BH",
name = NULL,
...
)
colGeometryUnivariate(
x,
type,
features,
colGeometryName = 1L,
colGraphName = 1L,
sample_id = "all",
BPPARAM = SerialParam(),
zero.policy = NULL,
include_self = FALSE,
p.adjust.method = "BH",
name = NULL,
...
)
colGeometryMoransI(
x,
features,
colGeometryName = 1L,
colGraphName = 1L,
sample_id = "all",
BPPARAM = SerialParam(),
zero.policy = NULL,
include_self = FALSE,
p.adjust.method = "BH",
name = NULL,
...
)
annotGeometryUnivariate(
x,
type,
features,
annotGeometryName = 1L,
annotGraphName = 1L,
sample_id = "all",
BPPARAM = SerialParam(),
zero.policy = NULL,
include_self = FALSE,
p.adjust.method = "BH",
name = NULL,
...
)
annotGeometryMoransI(
x,
features,
annotGeometryName = 1L,
annotGraphName = 1L,
sample_id = "all",
BPPARAM = SerialParam(),
zero.policy = NULL,
include_self = FALSE,
p.adjust.method = "BH",
name = NULL,
...
)
runUnivariate(
x,
type,
features = NULL,
colGraphName = 1L,
colGeometryName = 1L,
sample_id = "all",
exprs_values = "logcounts",
BPPARAM = SerialParam(),
swap_rownames = NULL,
zero.policy = NULL,
include_self = FALSE,
p.adjust.method = "BH",
name = NULL,
overwrite = FALSE,
...
)
runMoransI(
x,
features = NULL,
colGraphName = 1L,
colGeometryName = 1L,
sample_id = "all",
exprs_values = "logcounts",
BPPARAM = SerialParam(),
swap_rownames = NULL,
zero.policy = NULL,
include_self = FALSE,
p.adjust.method = "BH",
name = NULL,
...
)
reducedDimUnivariate(
x,
type,
dimred = 1L,
components = 1L,
colGraphName = 1L,
sample_id = "all",
BPPARAM = SerialParam(),
zero.policy = NULL,
include_self = FALSE,
p.adjust.method = "BH",
name = NULL,
...
)
reducedDimMoransI(
x,
dimred = 1L,
components = 1L,
colGraphName = 1L,
sample_id = "all",
BPPARAM = SerialParam(),
zero.policy = NULL,
include_self = FALSE,
p.adjust.method = "BH",
name = NULL,
...
)
Arguments
- x
A numeric matrix whose rows are features/genes, or a
SpatialFeatureExperiment
(SFE) object with such a matrix in an assay.- type
An
SFEMethod
object, or a string matching the name of anSFEMethod
object. The methods mentioned above correspond toSFEMethod
objects already implemented in the Voyager package. UselistSFEMethods
to see which methods are available. You can implement newSFEMethod
objects to apply Voyager functions to other spatial analysis methods. This is in part inspired by thecaret
,parsnip
, andBiocSingular
packages.- listw
Weighted neighborhood graph as a
spdep
listw
object. Not used when the method specified intype
does not use a spatial neighborhood graph, such as the variogram.- coords_df
A
sf
data frame specifying location of each cell. Not used when the method specified intype
uses a spatial neighborhood graph. Must be specified otherwise.- BPPARAM
A
BiocParallelParam
object specifying whether and how computing the metric for numerous genes shall be parallelized.- zero.policy
default
attr(listw, "zero.policy")
as set whenlistw
was created, if attribute not set, use global option value; if TRUE assign zero to the lagged value of zones without neighbours, if FALSE assign NA- returnDF
Logical, when the results are not added to a SFE object, whether the results should be formatted as a
DataFrame
.- p.adjust.method
Method to correct for multiple testing, passed to
p.adjustSP
. Methods allowed are inp.adjust.methods
.- name
Name to use to store the results, defaults to the name in the
SFEMethod
object passed to argumenttype
. Can be set to distinguish between results from the same method but with different parameters.- ...
Other arguments passed to S4 method (for convenience wrappers like
calculateMoransI
) or method used to compute metrics as specified by the argumenttype
(as in more general functions likecalculateUnivariate
). See documentation of functions with the same name as specified intype
in thespdep
package for the method specific arguments. For variograms, see.variogram
.- features
Genes (
calculate*
SFE method andrun*
) or numeric columns ofcolData(x)
(colData*
) or anycolGeometry
(colGeometry*
) orannotGeometry
(annotGeometry*
) for which the univariate metric is to be computed. Default toNULL
. WhenNULL
, then the metric is computed for all genes with the values in the assay specified in the argumentexprs_values
. This can be parallelized with the argumentBPPARAM
. For genes, if the row names of the SFE object are Ensembl IDs, then the gene symbol can be used and converted to IDs behind the scene with a column inrowData
can be specified inswap_rownames
. However, if one symbol matches multiple IDs, a warning will be given and the first match will be used. Internally, the results are always stored by the Ensembl ID rather than symbol.- colGraphName
Name of the listw graph in the SFE object that corresponds to entities represented by columns of the gene count matrix. Use
colGraphNames
to look up names of the available graphs for cells/spots. Note that for multiplesample_id
s, it is assumed that all of them have a graph of this same name.- colGeometryName
Name of a
colGeometry
sf
data frame whose numeric columns of interest are to be used to compute the metric. UsecolGeometryNames
to look up names of thesf
data frames associated with cells/spots. In the SFE method ofcalculateUnivariate
, this is to specify location of cells for methods that don't take a spatial neighborhood graph such as the variogram. If the geometry is not of typePOINT
, thenspatialCoords(x)
is used instead.- sample_id
Sample(s) in the SFE object whose cells/spots to use. Can be "all" to compute metric for all samples; the metric is computed separately for each sample.
- exprs_values
Integer scalar or string indicating which assay of x contains the expression values.
- include_self
Logical, whether the spatial neighborhood graph should include edges from each location to itself. This is for Getis-Ord Gi* as in
localG
andlocalG_perm
, not to be used for any other method.- swap_rownames
Column name of
rowData(object)
to be used to identify features instead ofrownames(object)
when labeling plot elements. If not found inrowData
, then rownames of the gene count matrix will be used.- annotGeometryName
Name of a
annotGeometry
sf
data frame whose numeric columns of interest are to be used to compute the metric. UseannotGeometryNames
to look up names of thesf
data frames associated with annotations.- annotGraphName
Name of the listw graph in the SFE object that corresponds to the
annotGeometry
of interest. UseannotGraphNames
to look up names of available annotation graphs.- overwrite
Logical, whether to overwrite existing results with the same name. Defaults to
FALSE
.- dimred
Name of a dimension reduction, can be seen in
reducedDimNames
.- components
Numeric vector of which components in the dimension reduction to compute spatial statistics on.
Value
In calculateUnivariate
, if returnDF = TRUE
, then a
DataFrame
, otherwise a list each element of which is the results for
each feature. For run*
, a SpatialFeatureExperiment
object
with the results added. See Details for where the results are stored.
Details
Most univariate methods in the package spdep
are supported here. These
methods are global, meaning returning one result for all spatial locations in
the dataset: moran
, geary
,
moran.mc
, geary.mc
,
moran.test
, geary.test
,
globalG.test
, sp.correlogram
. The
variogram and variogram map from the gstat
package are also supported.
The following methods are local, meaning each location has its own results:
moran.plot
, localmoran
,
localmoran_perm
, localC
,
localC_perm
, localG
,
localG_perm
, LOSH
,
LOSH.mc
, LOSH.cs
. The
GWmodel::gwss
method will be supported soon, but is not supported yet.
Global results for genes are stored in rowData
. For colGeometry
and annotGeometry
, the results are added to an attribute of the data
frame called featureData
, which is a DataFrame analogous to
rowData
for the gene count matrix, and can be accessed with the
geometryFeatureData
function. New column names in
featureData
would follow the same rules as in rowData
. For
colData
, the results can be accessed with the
colFeatureData
function.
Local results are stored in the field localResults
field of the SFE
object, which can be accessed with
localResults
or
localResult
. If the results have
p-values, then -log10 p and adjusted -log10 p are added. Note that in the
multiple testing correction, p.adjustSP
is used.
When the results are stored in the SFE object, parameters used to compute the
results as well as to construct the spatial neighborhood graph are also
added. For localResults
, the parameters are added to the metadata
field params
of the localResults
sorted by name
, which
defaults to the name in the SFEMethod
object as specified in the
type
argument. For global methods, parameters for results for genes
are in the metadata of rowData(x)
, organized by name
(metadata(rowData(x))$params[[name]]
). For colData
, the global
method parameters are stored in metadata of colData
in the field
params
(metadata(colData(x))$params[[name]]
). For geometries,
the global method parameters are in an attribute named "params" of the
corresponding sf
data frame (attr(df, "params")[[name]]
).
References
Cliff, A. D., Ord, J. K. 1981 Spatial processes, Pion, p. 17.
Anselin, L. (1995), Local Indicators of Spatial Association-LISA. Geographical Analysis, 27: 93-115. doi:10.1111/j.1538-4632.1995.tb00338.x
Ord, J. K., & Getis, A. 2012. Local spatial heteroscedasticity (LOSH), The Annals of Regional Science, 48 (2), 529-539.
Ord, J. K. and Getis, A. 1995 Local spatial autocorrelation statistics: distributional issues and an application. Geographical Analysis, 27, 286-306
Examples
library(SpatialFeatureExperiment)
library(SingleCellExperiment)
library(SFEData)
sfe <- McKellarMuscleData("small")
#> see ?SFEData and browseVignettes('SFEData') for documentation
#> loading from cache
colGraph(sfe, "visium") <- findVisiumGraph(sfe)
features_use <- rownames(sfe)[1:5]
# Moran's I
moran_results <- calculateMoransI(sfe,
features = features_use,
colGraphName = "visium",
exprs_values = "counts"
)
# This does not advocate for computing Moran's I on raw counts.
# Just an example for function usage.
sfe <- runMoransI(sfe,
features = features_use, colGraphName = "visium",
exprs_values = "counts"
)
# Look at the results
head(rowData(sfe))
#> DataFrame with 6 rows and 8 columns
#> Ensembl symbol type means
#> <character> <character> <character> <numeric>
#> ENSMUSG00000025902 ENSMUSG00000025902 Sox17 Gene Expression 0.007612179
#> ENSMUSG00000096126 ENSMUSG00000096126 Gm22307 Gene Expression 0.000200321
#> ENSMUSG00000033845 ENSMUSG00000033845 Mrpl15 Gene Expression 0.075921474
#> ENSMUSG00000025903 ENSMUSG00000025903 Lypla1 Gene Expression 0.057491987
#> ENSMUSG00000033813 ENSMUSG00000033813 Tcea1 Gene Expression 0.052283654
#> ENSMUSG00000002459 ENSMUSG00000002459 Rgs20 Gene Expression 0.000200321
#> vars cv2 moran_Vis5A K_Vis5A
#> <numeric> <numeric> <numeric> <numeric>
#> ENSMUSG00000025902 0.008757912 151.1411 -0.0424335 13.32749
#> ENSMUSG00000096126 0.000200321 4992.0000 NaN NaN
#> ENSMUSG00000033845 0.114250804 19.8212 0.2485804 5.41594
#> ENSMUSG00000025903 0.080645121 24.3985 0.0070062 9.46309
#> ENSMUSG00000033813 0.073603279 26.9256 0.1592157 8.51384
#> ENSMUSG00000002459 0.000200321 4992.0000 NA NA
# Local Moran's I
sfe <- runUnivariate(sfe,
type = "localmoran", features = features_use,
colGraphName = "visium", exprs_values = "counts"
)
head(localResult(sfe, "localmoran", features_use[1]))
#> Ii E.Ii Var.Ii Z.Ii Pr(z != E(Ii))
#> AAATTACCTATCGATG -0.02897069 -0.001345388 0.01609308 -0.2177647 0.82761246
#> AACATATCAACTGGTG -0.29141104 -0.001345388 0.01609308 -2.2865292 0.02222332
#> AAGATTGGCGGAACGT 0.10224949 -0.001345388 0.01958757 0.7401981 0.45917982
#> AAGGGACAGATTCTGT -0.02897069 -0.001345388 0.01609308 -0.2177647 0.82761246
#> AATATCGAGGGTTCTC 0.10224949 -0.001345388 0.01609308 0.8166176 0.41414701
#> AATGATGATACGCTAT 0.10224949 -0.001345388 0.01609308 0.8166176 0.41414701
#> mean median pysal -log10p -log10p_adj
#> AAATTACCTATCGATG Low-High Low-High Low-High 0.08217298 0.0000000
#> AACATATCAACTGGTG Low-High Low-High Low-High 1.65319110 0.8080931
#> AAGATTGGCGGAACGT Low-Low Low-Low Low-Low 0.33801720 0.0000000
#> AAGGGACAGATTCTGT Low-High Low-High Low-High 0.08217298 0.0000000
#> AATATCGAGGGTTCTC Low-Low Low-Low Low-Low 0.38284547 0.0000000
#> AATGATGATACGCTAT Low-Low Low-Low Low-Low 0.38284547 0.0000000
# For colData
sfe <- colDataUnivariate(sfe,
type = "localmoran", features = "nCounts",
colGraphName = "visium"
)
head(localResult(sfe, "localmoran", "nCounts"))
#> Ii E.Ii Var.Ii Z.Ii
#> AAATTACCTATCGATG 0.53682603 -0.0073375879 0.087243111 1.8423152
#> AACATATCAACTGGTG 0.20017125 -0.0008174853 0.009783652 2.0319883
#> AAGATTGGCGGAACGT 0.13533683 -0.0002992400 0.004361215 2.0538630
#> AAGGGACAGATTCTGT 0.67946203 -0.0182482408 0.214584793 1.5061757
#> AATATCGAGGGTTCTC -0.01287299 -0.0009633914 0.011528171 -0.1109218
#> AATGATGATACGCTAT 0.15331553 -0.0306802864 0.356207210 0.3082880
#> Pr(z != E(Ii)) mean median pysal -log10p
#> AAATTACCTATCGATG 0.06542906 High-High High-High High-High 1.18422931
#> AACATATCAACTGGTG 0.04215484 High-High High-High High-High 1.37515260
#> AAGATTGGCGGAACGT 0.03998896 High-High Low-High High-High 1.39805992
#> AAGGGACAGATTCTGT 0.13202207 High-High High-High High-High 0.87935347
#> AATATCGAGGGTTCTC 0.91167838 High-Low High-Low High-Low 0.04015835
#> AATGATGATACGCTAT 0.75786321 High-High High-Low High-High 0.12040917
#> -log10p_adj
#> AAATTACCTATCGATG 0.33913127
#> AACATATCAACTGGTG 0.53005456
#> AAGATTGGCGGAACGT 0.61990867
#> AAGGGACAGATTCTGT 0.03425543
#> AATATCGAGGGTTCTC 0.00000000
#> AATGATGATACGCTAT 0.00000000
# For annotGeometries
annotGraph(sfe, "myofiber_tri2nb") <-
findSpatialNeighbors(sfe,
type = "myofiber_simplified", MARGIN = 3L,
method = "tri2nb", dist_type = "idw",
zero.policy = TRUE
)
sfe <- annotGeometryUnivariate(sfe,
type = "localG", features = "area",
annotGraphName = "myofiber_tri2nb",
annotGeometryName = "myofiber_simplified",
zero.policy = TRUE
)
head(localResult(sfe, "localG", "area",
annotGeometryName = "myofiber_simplified"
))
#> localG Gi E(Gi) V(Gi) Z(Gi)
#> 1018 -2.3083710 0.0001426229 0.0002238002 1.236681e-09 -2.3083710
#> 1021 -0.8140180 0.0002393084 0.0002665443 1.119477e-09 -0.8140180
#> 1024 0.0508039 0.0002301134 0.0002280492 1.650888e-09 0.0508039
#> 1041 -0.1700897 0.0002715145 0.0002773569 1.179830e-09 -0.1700897
#> 1052 0.1547597 0.0002185310 0.0002133753 1.109810e-09 0.1547597
#> 1058 -0.3688569 0.0002047116 0.0002174315 1.189189e-09 -0.3688569
#> Pr(z != E(Gi)) -log10p -log10p_adj cluster
#> 1018 0.02097851 1.67822538 0.9000741 High
#> 1021 0.41563466 0.38128824 0.0000000 High
#> 1024 0.95948178 0.01796327 0.0000000 High
#> 1041 0.86493956 0.06301424 0.0000000 High
#> 1052 0.87701073 0.05699509 0.0000000 Low
#> 1058 0.71223439 0.14737706 0.0000000 Low