diff --git a/R/Trajectory_CellTypes_v12.R b/R/Trajectory_CellTypes_v12.R new file mode 100644 index 0000000..6cc59ab --- /dev/null +++ b/R/Trajectory_CellTypes_v12.R @@ -0,0 +1,327 @@ +#' Trajectory analysis with TSCAN +#' +#' Perform the CCBR single-cell RNA-seq trajectory workflow using TSCAN. +#' +#' @param Seurat_Object A Seurat object or path to an RDS file containing one. +#' @param MetaData A data.frame with per-cell metadata; must contain the clustering column. +#' @param Clusters_to_Use Character scalar naming the metadata column to use for clusters. +#' @param Custom_Gene_List Optional character vector or delimited string of genes to highlight. +#' +#' @return Invisibly returns `NULL`. Generates trajectory and pseudotime plots as side effects. +#' @examples +#' \donttest{ +#' if (requireNamespace("Seurat", quietly = TRUE)) { +#' seurat_example <- Seurat::CreateSeuratObject( +#' counts = matrix(rpois(200, lambda = 5), nrow = 20, +#' dimnames = list(paste0("g", 1:20), paste0("c", 1:10))) +#' ) +#' seurat_example <- Seurat::AddMetaData( +#' object = seurat_example, +#' metadata = data.frame(cluster = sample(letters[1:2], 10, replace = TRUE), +#' row.names = colnames(seurat_example)) +#' ) +#' Trajectory_CellTypes( +#' Seurat_Object = seurat_example, +#' MetaData = Seurat::FetchData(seurat_example, vars = c("cluster")), +#' Clusters_to_Use = "cluster" +#' ) +#' } +#' } +#' @export +Trajectory_CellTypes <- function(Seurat_Object, + MetaData, + Clusters_to_Use, + Custom_Gene_List = NULL) { + required_packages <- c( + "Seurat", "scater", "scran", "scuttle", "TrajectoryUtils", "TSCAN", + "ggplot2", "gridExtra", "mclust", + "SingleCellExperiment", "SummarizedExperiment" + ) + ensure_namespace(required_packages) + + seurat_object <- load_seurat_object(Seurat_Object) + + if (!is.data.frame(MetaData)) { + stop("MetaData must be a data.frame.", call. = FALSE) + } + if (!is.character(Clusters_to_Use) || length(Clusters_to_Use) != 1L || + is.na(Clusters_to_Use)) { + stop("Clusters_to_Use must be a non-missing character scalar.", call. = FALSE) + } + if (!Clusters_to_Use %in% colnames(MetaData)) { + stop(sprintf("Column '%s' not found in MetaData.", Clusters_to_Use), call. = FALSE) + } + + cache_original <- Sys.getenv("R_USER_CACHE_DIR", unset = NA_character_) + on.exit({ + if (is.na(cache_original)) { + Sys.unsetenv("R_USER_CACHE_DIR") + } else { + Sys.setenv(R_USER_CACHE_DIR = cache_original) + } + }, add = TRUE) + cache_dir <- tools::R_user_dir("Trajectory_CellTypes", which = "cache") + dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE) + Sys.setenv(R_USER_CACHE_DIR = cache_dir) + + if (!is.null(rownames(MetaData)) && all(colnames(seurat_object) %in% rownames(MetaData))) { + MetaData <- MetaData[colnames(seurat_object), , drop = FALSE] + } + + user_clusters <- MetaData[[Clusters_to_Use]] + if (length(user_clusters) != ncol(seurat_object)) { + warning("MetaData rows do not match Seurat object cells; attempting to align by cell names.") + shared_cells <- intersect(colnames(seurat_object), rownames(MetaData)) + if (length(shared_cells) == 0) { + stop("Unable to align MetaData with Seurat object cells.", call. = FALSE) + } + MetaData <- MetaData[shared_cells, , drop = FALSE] + MetaData <- MetaData[colnames(seurat_object), , drop = FALSE] + user_clusters <- MetaData[[Clusters_to_Use]] + } + if (length(user_clusters) != ncol(seurat_object)) { + stop("MetaData does not align with the Seurat object cells after attempted matching.", call. = FALSE) + } + + label_column <- resolve_cluster_column(seurat_object, MetaData, Clusters_to_Use) + if (!label_column %in% colnames(seurat_object@meta.data)) { + seurat_object[[label_column]] <- user_clusters + } + cluster_labels <- seurat_object[[label_column, drop = TRUE]] + Seurat::Idents(seurat_object) <- cluster_labels + + custom_gene_list <- normalise_gene_list(Custom_Gene_List) + + message("The following genes were used:") + gene_universe <- rownames(seurat_object) + genes_present <- intersect(custom_gene_list, gene_universe) + if (length(genes_present) == 0) { + message("None") + } else { + message(paste(genes_present, collapse = ", ")) + } + + message("The following genes were not found:") + missing_genes <- setdiff(custom_gene_list, gene_universe) + if (length(missing_genes) == 0) { + message("None") + } else { + message(paste(missing_genes, collapse = ", ")) + } + + custom_gene_list <- genes_present + + sce <- Seurat::as.SingleCellExperiment(seurat_object) + SummarizedExperiment::colData(sce)$label <- cluster_labels + + aggregated <- scuttle::aggregateAcrossCells(sce, ids = cluster_labels) + centroids <- SingleCellExperiment::reducedDim(aggregated, "PCA") + mst <- TSCAN::createClusterMST(centroids, clusters = NULL) + + line_data_tsne <- TSCAN::reportEdges(aggregated, mst = mst, clusters = NULL, use.dimred = "TSNE") + line_data_tsne <- harmonise_edge_axes(line_data_tsne, "tSNE") + plot_tsne1 <- scater::plotTSNE(sce, colour_by = "label") + + ggplot2::geom_line(data = line_data_tsne, ggplot2::aes(x = tSNE_1, y = tSNE_2, group = edge)) + + line_data_umap <- TSCAN::reportEdges(aggregated, mst = mst, clusters = NULL, use.dimred = "UMAP") + line_data_umap <- harmonise_edge_axes(line_data_umap, "UMAP") + plot_umap1 <- scater::plotUMAP(sce, colour_by = "label") + + ggplot2::geom_line(data = line_data_umap, ggplot2::aes(x = UMAP_1, y = UMAP_2, group = edge)) + + mapping <- TSCAN::mapCellsToEdges(sce, mst = mst, use.dimred = "PCA") + ordered <- TSCAN::orderCells(mapping, mst) + common_pseudo <- TrajectoryUtils::averagePseudotime(ordered) + + plot_tsne2 <- scater::plotTSNE(sce, colour_by = I(common_pseudo), text_by = "label", text_colour = "red") + + ggplot2::geom_line(data = line_data_tsne, ggplot2::aes(x = tSNE_1, y = tSNE_2, group = edge)) + + ggplot2::ggtitle("TSCAN-based MST") + + plot_umap2 <- scater::plotUMAP(sce, colour_by = I(common_pseudo), text_by = "label", text_colour = "red") + + ggplot2::geom_line(data = line_data_umap, ggplot2::aes(x = UMAP_1, y = UMAP_2, group = edge)) + + ggplot2::ggtitle("TSCAN-based MST") + + pseudo_all <- TSCAN::quickPseudotime(sce, use.dimred = "PCA") + pseudo_mnn <- TSCAN::quickPseudotime(sce, use.dimred = "PCA", dist.method = "mnn") + if (!is.null(pseudo_mnn$connected$TSNE)) { + pseudo_mnn$connected$TSNE <- harmonise_edge_axes(pseudo_mnn$connected$TSNE, "tSNE") + } + if (!is.null(pseudo_mnn$connected$UMAP)) { + pseudo_mnn$connected$UMAP <- harmonise_edge_axes(pseudo_mnn$connected$UMAP, "UMAP") + } + mnn_pseudo <- TrajectoryUtils::averagePseudotime(pseudo_mnn$ordering) + + plot_tsne3 <- scater::plotTSNE(sce, colour_by = I(mnn_pseudo), text_by = "label", text_colour = "red") + + ggplot2::geom_line(data = pseudo_mnn$connected$TSNE, ggplot2::aes(x = tSNE_1, y = tSNE_2, group = edge)) + + ggplot2::ggtitle("TSCAN with MNN distances-based MST") + + plot_umap3 <- scater::plotUMAP(sce, colour_by = I(mnn_pseudo), text_by = "label", text_colour = "red") + + ggplot2::geom_line(data = pseudo_mnn$connected$UMAP, ggplot2::aes(x = UMAP_1, y = UMAP_2, group = edge)) + + ggplot2::ggtitle("TSCAN with MNN distances-based MST") + + sce$Pseudotime <- TrajectoryUtils::pathStat(ordered)[, 1] + pseudo_tests <- TSCAN::testPseudotime(sce, pseudotime = ordered[, 1])[[1]] + sorted_tests <- pseudo_tests[order(pseudo_tests$p.value), ] + + downregulated <- sorted_tests[sorted_tests$logFC < 0, , drop = FALSE] + downregulated$SYMBOL <- rownames(downregulated) + top_down <- head(downregulated$SYMBOL, 10) + plot_decrease <- scater::plotExpression( + sce, + features = top_down, + x = "Pseudotime", + colour_by = "label", + ncol = 5 + ) + ggplot2::ggtitle("Expression of the top 10 genes that decrease with pseudotime") + + upregulated <- sorted_tests[sorted_tests$logFC > 0, , drop = FALSE] + upregulated$SYMBOL <- rownames(upregulated) + top_up <- head(upregulated$SYMBOL, 10) + plot_increase <- scater::plotExpression( + sce, + features = top_up, + x = "Pseudotime", + colour_by = "label", + ncol = 5 + ) + ggplot2::ggtitle("Expression of the top 10 genes that increase with pseudotime") + + on_first_path <- !is.na(sce$Pseudotime) + + print(Seurat::DimPlot(seurat_object)) + set.seed(1) + plot(pseudo_all$mst) + print(plot_tsne1) + gridExtra::grid.arrange(plot_tsne2, plot_tsne3, ncol = 2) + print(plot_umap1) + gridExtra::grid.arrange(plot_umap2, plot_umap3, ncol = 2) + plot(plot_decrease) + plot(plot_increase) + scater::plotHeatmap( + sce[, on_first_path], + order_columns_by = "Pseudotime", + colour_columns_by = "label", + features = head(top_up, 50), + center = TRUE, + main = "Expression of the top 50 genes that increase with pseudotime" + ) + scater::plotHeatmap( + sce[, on_first_path], + order_columns_by = "Pseudotime", + colour_columns_by = "label", + features = head(top_down, 50), + center = TRUE, + main = "Expression of the top 50 genes that decrease with pseudotime" + ) + + if (length(custom_gene_list) > 0) { + custom_scatter <- scater::plotExpression( + sce, + features = custom_gene_list, + x = "Pseudotime", + colour_by = "label", + ncol = 5 + ) + ggplot2::ggtitle("Expression of custom genes across pseudotime") + plot(custom_scatter) + + scater::plotHeatmap( + sce[, on_first_path], + order_columns_by = "Pseudotime", + colour_columns_by = "label", + features = custom_gene_list, + center = TRUE, + main = "Expression of custom genes across pseudotime" + ) + } + + invisible(NULL) +} + +utils::globalVariables(c( + "label", "edge", "Pseudotime", "SYMBOL", "UMAP_1", "UMAP_2", + "tSNE_1", "tSNE_2", "mst" +)) + +ensure_namespace <- function(pkgs) { + missing <- vapply(pkgs, function(pkg) !requireNamespace(pkg, quietly = TRUE), logical(1)) + if (any(missing)) { + stop( + sprintf( + "Missing required packages: %s", + paste(pkgs[missing], collapse = ", ") + ), + call. = FALSE + ) + } + invisible(TRUE) +} + +load_seurat_object <- function(input) { + if (inherits(input, "Seurat")) { + return(input) + } + if (is.character(input) && length(input) == 1L && file.exists(input)) { + return(readRDS(input)) + } + if (requireNamespace("nidapFunctions", quietly = TRUE)) { + path <- tryCatch( + nidapFunctions::nidapGetPath(input, "seurat_object.rds"), + error = function(e) NULL + ) + if (!is.null(path) && file.exists(path)) { + return(readRDS(path)) + } + } + stop( + "Provide a Seurat object or a path to an RDS file containing one.", + call. = FALSE + ) +} + +resolve_cluster_column <- function(seurat_object, metadata, selected) { + seurat_columns <- colnames(seurat_object@meta.data) + if (selected %in% seurat_columns) { + return(selected) + } + idx <- match(selected, colnames(metadata)) + if (!is.na(idx) && idx <= length(seurat_columns)) { + candidate <- seurat_columns[idx] + if (!is.na(candidate) && nzchar(candidate)) { + return(candidate) + } + } + normalised_selected <- make.names(selected) + matches <- seurat_columns[make.names(seurat_columns) == normalised_selected] + if (length(matches) == 1) { + return(matches) + } + selected +} + +normalise_gene_list <- function(genes) { + if (is.null(genes) || length(genes) == 0) { + return(character()) + } + if (length(genes) == 1L) { + genes <- unlist(strsplit(gsub(",", " ", genes), " ")) + } + genes <- genes[genes != ""] + unique(trimws(genes)) +} + +harmonise_edge_axes <- function(edge_df, prefix) { + if (is.null(edge_df) || nrow(edge_df) == 0) { + return(edge_df) + } + targets <- c(sprintf("%s_1", prefix), sprintf("%s_2", prefix)) + lower_targets <- tolower(targets) + current_names <- names(edge_df) + for (i in seq_along(targets)) { + required <- targets[i] + if (!required %in% current_names) { + candidates <- current_names[tolower(current_names) == lower_targets[i]] + if (length(candidates) == 1L) { + names(edge_df)[names(edge_df) == candidates] <- required + } + } + } + edge_df +} diff --git a/tests/testthat/test-Trajectory_CellTypes_v12.R b/tests/testthat/test-Trajectory_CellTypes_v12.R new file mode 100644 index 0000000..e7afd4d --- /dev/null +++ b/tests/testthat/test-Trajectory_CellTypes_v12.R @@ -0,0 +1,97 @@ +skip_if_packages_missing <- function(pkgs) { + for (pkg in pkgs) { + testthat::skip_if_not_installed(pkg) + } +} + +source(testthat::test_path("..", "..", "R", "Trajectory_CellTypes_v12.R")) + +build_test_seurat <- function() { + set.seed(42) + counts <- matrix( + rpois(200, lambda = 5), + nrow = 20, + dimnames = list(paste0("g", seq_len(20)), paste0("c", seq_len(10))) + ) + seurat_obj <- Seurat::CreateSeuratObject(counts = counts) + seurat_obj <- Seurat::NormalizeData(seurat_obj, verbose = FALSE) + seurat_obj <- suppressWarnings( + Seurat::FindVariableFeatures( + seurat_obj, + selection.method = "vst", + nfeatures = 10, + verbose = FALSE + ) + ) + seurat_obj <- Seurat::ScaleData(seurat_obj, features = rownames(seurat_obj), verbose = FALSE) + seurat_obj <- Seurat::RunPCA(seurat_obj, npcs = 5, verbose = FALSE) + seurat_obj <- Seurat::RunTSNE( + seurat_obj, + dims = 1:5, + perplexity = 2, + check_duplicates = FALSE, + verbose = FALSE + ) + seurat_obj <- Seurat::RunUMAP( + seurat_obj, + dims = 1:5, + n.neighbors = 5, + verbose = FALSE + ) + clusters <- sample(letters[1:2], ncol(seurat_obj), replace = TRUE) + names(clusters) <- colnames(seurat_obj) + Seurat::AddMetaData( + object = seurat_obj, + metadata = data.frame(cluster = clusters, row.names = names(clusters)) + ) +} + +required_pkgs <- c( + "Seurat", "scater", "scran", "scuttle", "TrajectoryUtils", "TSCAN", + "ggplot2", "gridExtra", "mclust", + "SingleCellExperiment", "SummarizedExperiment" +) + + + testthat::test_that("Trajectory_CellTypes errors when clustering column missing", { + skip_if_packages_missing(required_pkgs) + seurat_obj <- build_test_seurat() + metadata <- data.frame(other = rep("a", ncol(seurat_obj)), row.names = colnames(seurat_obj)) + testthat::expect_error( + Trajectory_CellTypes( + Seurat_Object = seurat_obj, + MetaData = metadata, + Clusters_to_Use = "cluster" + ), + "Column 'cluster' not found" + ) +}) + + testthat::test_that("Trajectory_CellTypes validates argument types", { + skip_if_packages_missing(required_pkgs) + seurat_obj <- build_test_seurat() + metadata <- data.frame(cluster = seurat_obj$cluster, row.names = colnames(seurat_obj)) + testthat::expect_error( + Trajectory_CellTypes( + Seurat_Object = seurat_obj, + MetaData = metadata, + Clusters_to_Use = 123 + ), + "non-missing character scalar" + ) +}) + + testthat::test_that("Trajectory_CellTypes runs on minimal input", { + skip_if_packages_missing(required_pkgs) + seurat_obj <- build_test_seurat() + metadata <- data.frame(cluster = seurat_obj$cluster, row.names = colnames(seurat_obj)) + set.seed(7) + testthat::expect_invisible( + Trajectory_CellTypes( + Seurat_Object = seurat_obj, + MetaData = metadata, + Clusters_to_Use = "cluster", + Custom_Gene_List = "g1,g2" + ) + ) +})