Univariate spatial stiatistics — calculateUnivariate • Voyager

These functions compute univariate spatial statistics, both global and local, on matrices, data frames, and SFE objects. For SFE objects, the statistics can be computed for numeric columns of colData, colGeometries, and annotGeometries, and the results are stored within the SFE object. calculateMoransI and runMoransI are convenience wrappers for calculateUnivariate and runUnivariate respectively.

Usage

# S4 method for class 'ANY,SFEMethod'
calculateUnivariate(
  x,
  type,
  listw = NULL,
  coords_df = NULL,
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  returnDF = TRUE,
  p.adjust.method = "BH",
  name = NULL,
  ...
)

# S4 method for class 'ANY,character'
calculateUnivariate(
  x,
  type,
  listw = NULL,
  coords_df = NULL,
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  returnDF = TRUE,
  p.adjust.method = "BH",
  name = NULL,
  ...
)

# S4 method for class 'SpatialFeatureExperiment,ANY'
calculateUnivariate(
  x,
  type,
  features = NULL,
  colGraphName = 1L,
  colGeometryName = 1L,
  sample_id = "all",
  exprs_values = "logcounts",
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  returnDF = TRUE,
  include_self = FALSE,
  p.adjust.method = "BH",
  swap_rownames = NULL,
  name = NULL,
  ...
)

# S4 method for class 'ANY'
calculateMoransI(
  x,
  ...,
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  name = "moran"
)

# S4 method for class 'SpatialFeatureExperiment'
calculateMoransI(
  x,
  features = NULL,
  colGraphName = 1L,
  colGeometryName = 1L,
  sample_id = "all",
  exprs_values = "logcounts",
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  returnDF = TRUE,
  include_self = FALSE,
  p.adjust.method = "BH",
  swap_rownames = NULL,
  name = NULL,
  ...
)

colDataUnivariate(
  x,
  type,
  features,
  colGraphName = 1L,
  sample_id = "all",
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  include_self = FALSE,
  p.adjust.method = "BH",
  name = NULL,
  ...
)

colDataMoransI(
  x,
  features,
  colGraphName = 1L,
  sample_id = "all",
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  include_self = FALSE,
  p.adjust.method = "BH",
  name = NULL,
  ...
)

colGeometryUnivariate(
  x,
  type,
  features,
  colGeometryName = 1L,
  colGraphName = 1L,
  sample_id = "all",
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  include_self = FALSE,
  p.adjust.method = "BH",
  name = NULL,
  ...
)

colGeometryMoransI(
  x,
  features,
  colGeometryName = 1L,
  colGraphName = 1L,
  sample_id = "all",
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  include_self = FALSE,
  p.adjust.method = "BH",
  name = NULL,
  ...
)

annotGeometryUnivariate(
  x,
  type,
  features,
  annotGeometryName = 1L,
  annotGraphName = 1L,
  sample_id = "all",
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  include_self = FALSE,
  p.adjust.method = "BH",
  name = NULL,
  ...
)

annotGeometryMoransI(
  x,
  features,
  annotGeometryName = 1L,
  annotGraphName = 1L,
  sample_id = "all",
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  include_self = FALSE,
  p.adjust.method = "BH",
  name = NULL,
  ...
)

runUnivariate(
  x,
  type,
  features = NULL,
  colGraphName = 1L,
  colGeometryName = 1L,
  sample_id = "all",
  exprs_values = "logcounts",
  BPPARAM = SerialParam(),
  swap_rownames = NULL,
  zero.policy = NULL,
  include_self = FALSE,
  p.adjust.method = "BH",
  name = NULL,
  overwrite = FALSE,
  ...
)

runMoransI(
  x,
  features = NULL,
  colGraphName = 1L,
  colGeometryName = 1L,
  sample_id = "all",
  exprs_values = "logcounts",
  BPPARAM = SerialParam(),
  swap_rownames = NULL,
  zero.policy = NULL,
  include_self = FALSE,
  p.adjust.method = "BH",
  name = NULL,
  ...
)

reducedDimUnivariate(
  x,
  type,
  dimred = 1L,
  components = 1L,
  colGraphName = 1L,
  sample_id = "all",
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  include_self = FALSE,
  p.adjust.method = "BH",
  name = NULL,
  ...
)

reducedDimMoransI(
  x,
  dimred = 1L,
  components = 1L,
  colGraphName = 1L,
  sample_id = "all",
  BPPARAM = SerialParam(),
  zero.policy = NULL,
  include_self = FALSE,
  p.adjust.method = "BH",
  name = NULL,
  ...
)

Arguments

x: A numeric matrix whose rows are features/genes, or a SpatialFeatureExperiment (SFE) object with such a matrix in an assay.
type: An SFEMethod object, or a string matching the name of an SFEMethod object. The methods mentioned above correspond to SFEMethod objects already implemented in the Voyager package. Use listSFEMethods to see which methods are available. You can implement new SFEMethod objects to apply Voyager functions to other spatial analysis methods. This is in part inspired by the caret, parsnip, and BiocSingular packages.
listw: Weighted neighborhood graph as a spdep listw object. Not used when the method specified in type does not use a spatial neighborhood graph, such as the variogram.
coords_df: A sf data frame specifying location of each cell. Not used when the method specified in type uses a spatial neighborhood graph. Must be specified otherwise.
BPPARAM: A BiocParallelParam object specifying whether and how computing the metric for numerous genes shall be parallelized.
zero.policy: default attr(listw, "zero.policy") as set when listw was created, if attribute not set, use global option value; if TRUE assign zero to the lagged value of zones without neighbours, if FALSE assign NA
returnDF: Logical, when the results are not added to a SFE object, whether the results should be formatted as a DataFrame.
p.adjust.method: Method to correct for multiple testing, passed to p.adjustSP. Methods allowed are in p.adjust.methods.
name: Name to use to store the results, defaults to the name in the SFEMethod object passed to argument type. Can be set to distinguish between results from the same method but with different parameters.
...: Other arguments passed to S4 method (for convenience wrappers like calculateMoransI) or method used to compute metrics as specified by the argument type (as in more general functions like calculateUnivariate). See documentation of functions with the same name as specified in type in the spdep package for the method specific arguments. For variograms, see .variogram.
features: Genes (calculate* SFE method and run*) or numeric columns of colData(x) (colData*) or any colGeometry (colGeometry*) or annotGeometry (annotGeometry*) for which the univariate metric is to be computed. Default to NULL. When NULL, then the metric is computed for all genes with the values in the assay specified in the argument exprs_values. This can be parallelized with the argument BPPARAM. For genes, if the row names of the SFE object are Ensembl IDs, then the gene symbol can be used and converted to IDs behind the scene with a column in rowData can be specified in swap_rownames. However, if one symbol matches multiple IDs, a warning will be given and the first match will be used. Internally, the results are always stored by the Ensembl ID rather than symbol.
colGraphName: Name of the listw graph in the SFE object that corresponds to entities represented by columns of the gene count matrix. Use colGraphNames to look up names of the available graphs for cells/spots. Note that for multiple sample_ids, it is assumed that all of them have a graph of this same name.
colGeometryName: Name of a colGeometry sf data frame whose numeric columns of interest are to be used to compute the metric. Use colGeometryNames to look up names of the sf data frames associated with cells/spots. In the SFE method of calculateUnivariate, this is to specify location of cells for methods that don't take a spatial neighborhood graph such as the variogram. If the geometry is not of type POINT, then spatialCoords(x) is used instead.
sample_id: Sample(s) in the SFE object whose cells/spots to use. Can be "all" to compute metric for all samples; the metric is computed separately for each sample.
exprs_values: Integer scalar or string indicating which assay of x contains the expression values.
include_self: Logical, whether the spatial neighborhood graph should include edges from each location to itself. This is for Getis-Ord Gi* as in localG and localG_perm, not to be used for any other method.
swap_rownames: Column name of rowData(object) to be used to identify features instead of rownames(object) when labeling plot elements. If not found in rowData, then rownames of the gene count matrix will be used.
annotGeometryName: Name of a annotGeometry sf data frame whose numeric columns of interest are to be used to compute the metric. Use annotGeometryNames to look up names of the sf data frames associated with annotations.
annotGraphName: Name of the listw graph in the SFE object that corresponds to the annotGeometry of interest. Use annotGraphNames to look up names of available annotation graphs.
overwrite: Logical, whether to overwrite existing results with the same name. Defaults to FALSE.
dimred: Name of a dimension reduction, can be seen in reducedDimNames.
components: Numeric vector of which components in the dimension reduction to compute spatial statistics on.

Value

In calculateUnivariate, if returnDF = TRUE, then a DataFrame, otherwise a list each element of which is the results for each feature. For run*, a SpatialFeatureExperiment object with the results added. See Details for where the results are stored.

Details

Most univariate methods in the package spdep are supported here. These methods are global, meaning returning one result for all spatial locations in the dataset: moran, geary, moran.mc, geary.mc, moran.test, geary.test, globalG.test, sp.correlogram. The variogram and variogram map from the gstat package are also supported.

The following methods are local, meaning each location has its own results: moran.plot, localmoran, localmoran_perm, localC, localC_perm, localG, localG_perm, LOSH, LOSH.mc, LOSH.cs. The GWmodel::gwss method will be supported soon, but is not supported yet.

Global results for genes are stored in rowData. For colGeometry and annotGeometry, the results are added to an attribute of the data frame called featureData, which is a DataFrame analogous to rowData for the gene count matrix, and can be accessed with the geometryFeatureData function. New column names in featureData would follow the same rules as in rowData. For colData, the results can be accessed with the colFeatureData function.

Local results are stored in the field localResults field of the SFE object, which can be accessed with localResults or localResult. If the results have p-values, then -log10 p and adjusted -log10 p are added. Note that in the multiple testing correction, p.adjustSP is used.

When the results are stored in the SFE object, parameters used to compute the results as well as to construct the spatial neighborhood graph are also added. For localResults, the parameters are added to the metadata field params of the localResults sorted by name, which defaults to the name in the SFEMethod object as specified in the type argument. For global methods, parameters for results for genes are in the metadata of rowData(x), organized by name (metadata(rowData(x))$params[[name]]). For colData, the global method parameters are stored in metadata of colData in the field params (metadata(colData(x))$params[[name]]). For geometries, the global method parameters are in an attribute named "params" of the corresponding sf data frame (attr(df, "params")[[name]]).

References

Cliff, A. D., Ord, J. K. 1981 Spatial processes, Pion, p. 17.

Anselin, L. (1995), Local Indicators of Spatial Association-LISA. Geographical Analysis, 27: 93-115. doi:10.1111/j.1538-4632.1995.tb00338.x

Ord, J. K., & Getis, A. 2012. Local spatial heteroscedasticity (LOSH), The Annals of Regional Science, 48 (2), 529-539.

Ord, J. K. and Getis, A. 1995 Local spatial autocorrelation statistics: distributional issues and an application. Geographical Analysis, 27, 286-306

Examples

library(SpatialFeatureExperiment)
library(SingleCellExperiment)
library(SFEData)
sfe <- McKellarMuscleData("small")
#> see ?SFEData and browseVignettes('SFEData') for documentation
#> loading from cache
colGraph(sfe, "visium") <- findVisiumGraph(sfe)
features_use <- rownames(sfe)[1:5]

# Moran's I
moran_results <- calculateMoransI(sfe,
    features = features_use,
    colGraphName = "visium",
    exprs_values = "counts"
)

# This does not advocate for computing Moran's I on raw counts.
# Just an example for function usage.

sfe <- runMoransI(sfe,
    features = features_use, colGraphName = "visium",
    exprs_values = "counts"
)
# Look at the results
head(rowData(sfe))
#> DataFrame with 6 rows and 8 columns
#>                               Ensembl      symbol            type       means
#>                           <character> <character>     <character>   <numeric>
#> ENSMUSG00000025902 ENSMUSG00000025902       Sox17 Gene Expression 0.007612179
#> ENSMUSG00000096126 ENSMUSG00000096126     Gm22307 Gene Expression 0.000200321
#> ENSMUSG00000033845 ENSMUSG00000033845      Mrpl15 Gene Expression 0.075921474
#> ENSMUSG00000025903 ENSMUSG00000025903      Lypla1 Gene Expression 0.057491987
#> ENSMUSG00000033813 ENSMUSG00000033813       Tcea1 Gene Expression 0.052283654
#> ENSMUSG00000002459 ENSMUSG00000002459       Rgs20 Gene Expression 0.000200321
#>                           vars       cv2 moran_Vis5A   K_Vis5A
#>                      <numeric> <numeric>   <numeric> <numeric>
#> ENSMUSG00000025902 0.008757912  151.1411  -0.0424335  13.32749
#> ENSMUSG00000096126 0.000200321 4992.0000         NaN       NaN
#> ENSMUSG00000033845 0.114250804   19.8212   0.2485804   5.41594
#> ENSMUSG00000025903 0.080645121   24.3985   0.0070062   9.46309
#> ENSMUSG00000033813 0.073603279   26.9256   0.1592157   8.51384
#> ENSMUSG00000002459 0.000200321 4992.0000          NA        NA

# Local Moran's I
sfe <- runUnivariate(sfe,
    type = "localmoran", features = features_use,
    colGraphName = "visium", exprs_values = "counts"
)
head(localResult(sfe, "localmoran", features_use[1]))
#>                           Ii         E.Ii     Var.Ii       Z.Ii Pr(z != E(Ii))
#> AAATTACCTATCGATG -0.02897069 -0.001345388 0.01609308 -0.2177647     0.82761246
#> AACATATCAACTGGTG -0.29141104 -0.001345388 0.01609308 -2.2865292     0.02222332
#> AAGATTGGCGGAACGT  0.10224949 -0.001345388 0.01958757  0.7401981     0.45917982
#> AAGGGACAGATTCTGT -0.02897069 -0.001345388 0.01609308 -0.2177647     0.82761246
#> AATATCGAGGGTTCTC  0.10224949 -0.001345388 0.01609308  0.8166176     0.41414701
#> AATGATGATACGCTAT  0.10224949 -0.001345388 0.01609308  0.8166176     0.41414701
#>                      mean   median    pysal    -log10p -log10p_adj
#> AAATTACCTATCGATG Low-High Low-High Low-High 0.08217298   0.0000000
#> AACATATCAACTGGTG Low-High Low-High Low-High 1.65319110   0.8080931
#> AAGATTGGCGGAACGT  Low-Low  Low-Low  Low-Low 0.33801720   0.0000000
#> AAGGGACAGATTCTGT Low-High Low-High Low-High 0.08217298   0.0000000
#> AATATCGAGGGTTCTC  Low-Low  Low-Low  Low-Low 0.38284547   0.0000000
#> AATGATGATACGCTAT  Low-Low  Low-Low  Low-Low 0.38284547   0.0000000

# For colData
sfe <- colDataUnivariate(sfe,
    type = "localmoran", features = "nCounts",
    colGraphName = "visium"
)
head(localResult(sfe, "localmoran", "nCounts"))
#>                           Ii          E.Ii      Var.Ii       Z.Ii
#> AAATTACCTATCGATG  0.53682603 -0.0073375879 0.087243111  1.8423152
#> AACATATCAACTGGTG  0.20017125 -0.0008174853 0.009783652  2.0319883
#> AAGATTGGCGGAACGT  0.13533683 -0.0002992400 0.004361215  2.0538630
#> AAGGGACAGATTCTGT  0.67946203 -0.0182482408 0.214584793  1.5061757
#> AATATCGAGGGTTCTC -0.01287299 -0.0009633914 0.011528171 -0.1109218
#> AATGATGATACGCTAT  0.15331553 -0.0306802864 0.356207210  0.3082880
#>                  Pr(z != E(Ii))      mean    median     pysal    -log10p
#> AAATTACCTATCGATG     0.06542906 High-High High-High High-High 1.18422931
#> AACATATCAACTGGTG     0.04215484 High-High High-High High-High 1.37515260
#> AAGATTGGCGGAACGT     0.03998896 High-High  Low-High High-High 1.39805992
#> AAGGGACAGATTCTGT     0.13202207 High-High High-High High-High 0.87935347
#> AATATCGAGGGTTCTC     0.91167838  High-Low  High-Low  High-Low 0.04015835
#> AATGATGATACGCTAT     0.75786321 High-High  High-Low High-High 0.12040917
#>                  -log10p_adj
#> AAATTACCTATCGATG  0.33913127
#> AACATATCAACTGGTG  0.53005456
#> AAGATTGGCGGAACGT  0.61990867
#> AAGGGACAGATTCTGT  0.03425543
#> AATATCGAGGGTTCTC  0.00000000
#> AATGATGATACGCTAT  0.00000000

# For annotGeometries
annotGraph(sfe, "myofiber_tri2nb") <-
    findSpatialNeighbors(sfe,
        type = "myofiber_simplified", MARGIN = 3L,
        method = "tri2nb", dist_type = "idw",
        zero.policy = TRUE
    )
sfe <- annotGeometryUnivariate(sfe,
    type = "localG", features = "area",
    annotGraphName = "myofiber_tri2nb",
    annotGeometryName = "myofiber_simplified",
    zero.policy = TRUE
)
head(localResult(sfe, "localG", "area",
    annotGeometryName = "myofiber_simplified"
))
#>          localG           Gi        E(Gi)        V(Gi)      Z(Gi)
#> 1018 -2.3083710 0.0001426229 0.0002238002 1.236681e-09 -2.3083710
#> 1021 -0.8140180 0.0002393084 0.0002665443 1.119477e-09 -0.8140180
#> 1024  0.0508039 0.0002301134 0.0002280492 1.650888e-09  0.0508039
#> 1041 -0.1700897 0.0002715145 0.0002773569 1.179830e-09 -0.1700897
#> 1052  0.1547597 0.0002185310 0.0002133753 1.109810e-09  0.1547597
#> 1058 -0.3688569 0.0002047116 0.0002174315 1.189189e-09 -0.3688569
#>      Pr(z != E(Gi))    -log10p -log10p_adj cluster
#> 1018     0.02097851 1.67822538   0.9000741    High
#> 1021     0.41563466 0.38128824   0.0000000    High
#> 1024     0.95948178 0.01796327   0.0000000    High
#> 1041     0.86493956 0.06301424   0.0000000    High
#> 1052     0.87701073 0.05699509   0.0000000     Low
#> 1058     0.71223439 0.14737706   0.0000000     Low