Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions r/R/clustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,13 @@ knn_to_geodesic_graph <- function(knn, return_type = c("matrix", "list"), thread
return(res)
}

#' Cluster an adjacency matrix
#' Cluster an adjacency matrix or graph edge list
#' @rdname cluster_graph
#' @details **cluster_graph_leiden**: Leiden clustering algorithm `igraph::cluster_leiden()`.
#' Note that when using `objective_function = "CPM"` the number of clusters empirically scales with `cells * resolution`,
#' so 1e-3 is a good resolution for 10k cells, but 1M cells is better with a 1e-5 resolution. A resolution of 1 is a
#' good default when `objective_function = "modularity"` per the default.
#' @param mat Symmetric adjacency matrix (dgCMatrix) output from e.g. `knn_to_snn_graph()` or `knn_to_geodesic_graph()`. Only the lower triangle is used.
#' @param mat Symmetric adjacency matrix (dgCMatrix) or graph list output from e.g. `knn_to_snn_graph()` or `knn_to_geodesic_graph()`. Only the lower triangle is used for matrix inputs.
#' @param resolution Resolution parameter. Higher values result in more clusters
#' @param objective_function Graph statistic to optimize during clustering. Modularity is the default as it keeps resolution independent of dataset size (see details below).
#' For the meaning of each option, see `igraph::cluster_leiden()`.
Expand All @@ -261,7 +261,7 @@ cluster_graph_leiden <- function(

objective_function <- match.arg(objective_function)

igraph::graph_from_adjacency_matrix(mat, weighted = TRUE, diag = FALSE, mode = "lower") %>%
graph_from_clustering_input(mat) %>%
igraph::cluster_leiden(resolution_parameter = resolution, objective_function=objective_function, ...) %>%
igraph::membership() %>%
as.factor()
Expand All @@ -282,12 +282,26 @@ cluster_graph_louvain <- function(
on.exit(restore_seed(prev_seed), add = TRUE)
set.seed(seed)

igraph::graph_from_adjacency_matrix(mat, weighted = TRUE, diag = FALSE, mode = "lower") %>%
graph_from_clustering_input(mat) %>%
igraph::cluster_louvain(resolution = resolution) %>%
igraph::membership() %>%
as.factor()
}

graph_from_clustering_input <- function(mat) {
if (is.list(mat) && all(c("i", "j", "weight", "dim") %in% names(mat))) {
graph <- igraph::make_empty_graph(n = mat$dim, directed = FALSE)
keepers <- mat$i != mat$j
if (!any(keepers)) return(graph)
return(igraph::add_edges(
graph,
as.vector(rbind(mat$i[keepers] + 1L, mat$j[keepers] + 1L)),
attr = list(weight = mat$weight[keepers])
))
}
igraph::graph_from_adjacency_matrix(mat, weighted = TRUE, diag = FALSE, mode = "lower")
}

#' @rdname cluster_graph
#' @details **cluster_graph_seurat**: Seurat's clustering algorithm `Seurat::FindClusters()`
#' @export
Expand Down Expand Up @@ -425,4 +439,4 @@ knn_annoy <- function(data, query = NULL, k = 10, metric = c("euclidean", "cosin
if (metric == "cosine") dist <- 0.5 * (dist * dist)
res <- list(idx = idx, dist = dist)
return(res)
}
}
6 changes: 3 additions & 3 deletions r/man/cluster_graph.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion r/tests/testthat/test-clustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ test_that("igraph clustering doesn't crash", {
test_data <- readRDS("../data/iris_geodesic_graph.rds")
knn <- test_data$knn
graph <- knn_to_geodesic_graph(knn)
graph_list <- knn_to_geodesic_graph(knn, return_type="list")

# The `resolution_parameter` param in igraph `cluster_leiden()` is deprecated,
# causing `expect_no_condition()` to fail. This workaround avoids test failures from
Expand All @@ -70,8 +71,14 @@ test_that("igraph clustering doesn't crash", {
expect_no_error(cluster_graph_leiden(graph))
expect_no_error(cluster_graph_leiden(graph, objective_function="CPM"))
})
expect_identical(
suppressWarnings(cluster_graph_leiden(graph)),
suppressWarnings(cluster_graph_leiden(graph_list))
)

expect_no_condition(cluster_graph_louvain(graph))
expect_no_condition(cluster_graph_louvain(graph_list))
expect_identical(cluster_graph_louvain(graph), cluster_graph_louvain(graph_list))
})

test_that("knn_hnsw rownames come from query", {
Expand Down Expand Up @@ -100,4 +107,4 @@ test_that("cluster_cells_graph works", {
))
expect_true(class(res) == "factor")
expect_equal(nrow(mat), length(res))
})
})