+extract_counts(moo, count_type, sub_count_type = NULL)
+}
+\arguments{
+\item{moo}{multiOmicDataSet containing \code{count_type} & \code{sub_count_type} in the counts slot}
+
+\item{count_type}{the type of counts to use -- must be a name in the counts slot (\code{moo@counts[[count_type]]})}
+
+\item{sub_count_type}{if \code{count_type} is a list, specify the sub count type within the list
+(\code{moo@counts[[count_type]][[sub_count_type]]}). (Default: \code{NULL})}
+}
+\description{
+Extract count data
+}
+\examples{
+moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts),
+ "norm" = list(
+ "voom" = as.data.frame(nidap_norm_counts)
+ )
+ )
+)
+
+moo |>
+ extract_counts("filt") |>
+ head()
+
+moo |>
+ extract_counts("norm", "voom") |>
+ head()
+
+}
diff --git a/code/MOSuite/man/figures/development-plan.png b/code/MOSuite/man/figures/development-plan.png
new file mode 100644
index 0000000..8cd9f45
Binary files /dev/null and b/code/MOSuite/man/figures/development-plan.png differ
diff --git a/code/MOSuite/man/figures/logo.png b/code/MOSuite/man/figures/logo.png
new file mode 100644
index 0000000..24983da
Binary files /dev/null and b/code/MOSuite/man/figures/logo.png differ
diff --git a/code/MOSuite/man/filter_counts.Rd b/code/MOSuite/man/filter_counts.Rd
new file mode 100644
index 0000000..984545b
--- /dev/null
+++ b/code/MOSuite/man/filter_counts.Rd
@@ -0,0 +1,195 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/filter.R
+\name{filter_counts}
+\alias{filter_counts}
+\title{Filter low counts}
+\usage{
+filter_counts(
+ moo,
+ count_type = "clean",
+ feature_id_colname = NULL,
+ sample_id_colname = NULL,
+ group_colname = "Group",
+ label_colname = NULL,
+ samples_to_include = NULL,
+ minimum_count_value_to_be_considered_nonzero = 8,
+ minimum_number_of_samples_with_nonzero_counts_in_total = 7,
+ minimum_number_of_samples_with_nonzero_counts_in_a_group = 3,
+ use_cpm_counts_to_filter = TRUE,
+ use_group_based_filtering = FALSE,
+ principal_component_on_x_axis = 1,
+ principal_component_on_y_axis = 2,
+ legend_position_for_pca = "top",
+ point_size_for_pca = 1,
+ add_label_to_pca = TRUE,
+ label_font_size = 3,
+ label_offset_y_ = 2,
+ label_offset_x_ = 2,
+ samples_to_rename = c(""),
+ color_histogram_by_group = FALSE,
+ set_min_max_for_x_axis_for_histogram = FALSE,
+ minimum_for_x_axis_for_histogram = -1,
+ maximum_for_x_axis_for_histogram = 1,
+ legend_position_for_histogram = "top",
+ legend_font_size_for_histogram = 10,
+ number_of_histogram_legend_columns = 6,
+ colors_for_plots = NULL,
+ plot_corr_matrix_heatmap = TRUE,
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ interactive_plots = FALSE,
+ plots_subdir = "filt"
+)
+}
+\arguments{
+\item{moo}{multiOmicDataSet object (see \code{create_multiOmicDataSet_from_dataframes()})}
+
+\item{count_type}{the type of counts to use -- must be a name in the counts slot (\code{moo@counts})}
+
+\item{feature_id_colname}{The column from the counts data containing the Feature IDs (Usually Gene or Protein ID).
+This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts
+Matrix will be available to select for this parameter. (Default: \code{NULL} - first column in the counts matrix will be
+used.)}
+
+\item{sample_id_colname}{The column from the sample metadata containing the sample names. The names in this column
+must exactly match the names used as the sample column names of your input Counts Matrix. (Default: \code{NULL} - first
+column in the sample metadata will be used.)}
+
+\item{group_colname}{The column from the sample metadata containing the sample group information. This is usually a
+column showing to which experimental treatments each sample belongs (e.g. WildType, Knockout, Tumor, Normal,
+Before, After, etc.).}
+
+\item{label_colname}{The column from the sample metadata containing the sample labels as you wish them to appear in
+the plots produced by this template. This can be the same Sample Names Column. However, you may desire different
+labels to display on your figure (e.g. shorter labels are sometimes preferred on plots). In that case, select the
+column with your preferred Labels here. The selected column should contain unique names for each sample. (Default:
+\code{NULL} -- \code{sample_id_colname} will be used.)}
+
+\item{samples_to_include}{Which samples would you like to include? Usually, you will choose all sample columns, or
+you could choose to remove certain samples. Samples excluded here will be removed in this step and from further
+analysis downstream of this step. (Default: \code{NULL} - all sample IDs in \code{moo@sample_meta} will be used.)}
+
+\item{minimum_count_value_to_be_considered_nonzero}{Minimum count value to be considered non-zero for a sample}
+
+\item{minimum_number_of_samples_with_nonzero_counts_in_total}{Minimum number of samples (total) with non-zero counts}
+
+\item{minimum_number_of_samples_with_nonzero_counts_in_a_group}{Only keeps genes that have at least this number of
+samples with nonzero CPM counts in at least one group}
+
+\item{use_cpm_counts_to_filter}{If no transformation has been been performed on counts matrix (eg Raw Counts) set to
+TRUE. If TRUE counts will be transformed to CPM and filtered based on given criteria. If gene counts matrix has
+been transformed (eg log2, CPM, FPKM or some form of Normalization) set to FALSE. If FALSE no further
+transformation will be applied and features will be filtered as is. For RNAseq data RAW counts should be
+transformed to CPM in order to properly filter.}
+
+\item{use_group_based_filtering}{If TRUE, only keeps features (e.g. genes) that have at least a certain number of
+samples with nonzero CPM counts in at least one group}
+
+\item{principal_component_on_x_axis}{The principal component to plot on the x-axis for the PCA plot. Choices include
+1, 2, 3, ... (default: 1)}
+
+\item{principal_component_on_y_axis}{The principal component to plot on the y-axis for the PCA plot. Choices include
+1, 2, 3, ... (default: 2)}
+
+\item{legend_position_for_pca}{legend position for the PCA plot}
+
+\item{point_size_for_pca}{geom point size for the PCA plot}
+
+\item{add_label_to_pca}{label points on the PCA plot}
+
+\item{label_font_size}{label font size for the PCA plot}
+
+\item{label_offset_y_}{label offset y for the PCA plot}
+
+\item{label_offset_x_}{label offset x for the PCA plot}
+
+\item{samples_to_rename}{If you do not have a Plot Labels Column in your sample metadata table, you can use this
+parameter to rename samples manually for display on the PCA plot. Use "Add item" to add each additional sample for
+renaming. Use the following format to describe which old name (in your sample metadata table) you want to rename to
+which new name: old_name: new_name}
+
+\item{color_histogram_by_group}{Set to FALSE to label histogram by Sample Names, or set to TRUE to label histogram by
+the column you select in the "Group Column Used to Color Histogram" parameter (below). Default is FALSE.}
+
+\item{set_min_max_for_x_axis_for_histogram}{whether to set min/max value for histogram x-axis}
+
+\item{minimum_for_x_axis_for_histogram}{x-axis minimum for histogram plot}
+
+\item{maximum_for_x_axis_for_histogram}{x-axis maximum for histogram plot}
+
+\item{legend_position_for_histogram}{legend position for the histogram plot. consider setting to 'none' for a large
+number of samples.}
+
+\item{legend_font_size_for_histogram}{legend font size for the histogram plot}
+
+\item{number_of_histogram_legend_columns}{number of columns for the histogram legend}
+
+\item{colors_for_plots}{Colors for the PCA and histogram will be picked, in order, from this list.
+Colors must either be names in \code{grDevices::colors()} or valid hex codes.}
+
+\item{plot_corr_matrix_heatmap}{Datasets with a large number of samples may be too large to create a correlation
+matrix heatmap. If this function takes longer than 5 minutes to run, Set to \code{FALSE} and the correlation matrix will
+not be be created. Default is \code{TRUE}.}
+
+\item{print_plots}{Whether to print plots during analysis (Defaults to \code{FALSE}, overwritable using option 'moo_print_plots' or environment variable 'MOO_PRINT_PLOTS')}
+
+\item{save_plots}{Whether to save plots to files during analysis (Defaults to \code{TRUE}, overwritable using option 'moo_save_plots' or environment variable 'MOO_SAVE_PLOTS')}
+
+\item{interactive_plots}{set to TRUE to make PCA and Histogram plots interactive with \code{plotly}, allowing you to hover
+your mouse over a point or line to view sample information. The similarity heat map will not display if this toggle
+is set to \code{TRUE}. Default is \code{FALSE}.}
+
+\item{plots_subdir}{subdirectory in \verb{figures/} where plots will be saved if \code{save_plots} is \code{TRUE}}
+}
+\value{
+\code{multiOmicDataSet} with filtered counts
+}
+\description{
+This is often the first step in the QC portion of an analysis to filter out
+features that have very low raw counts across most or all of your samples.
+}
+\details{
+This function takes a multiOmicDataSet containing clean raw counts and a sample
+metadata table, and returns the multiOmicDataSet object with filtered counts.
+It also produces an image consisting of three QC plots.
+
+You can tune the threshold for tuning how low counts for a given gene are
+before they are deemed "too low" and filtered out of downstream analysis. By
+default, this parameter is set to 1, meaning any raw count value less than 1
+will count as "too low".
+
+The QC plots are provided to help you assess: (1) PCA Plot: the within and
+between group variance in expression after dimensionality reduction; (2)
+Count Density Histogram: the dis/similarity of count distributions between
+samples; and (3) Similarity Heatmap: the overall similarity of samples to one
+another based on unsupervised clustering.
+}
+\examples{
+moo <- create_multiOmicDataSet_from_dataframes(
+ as.data.frame(nidap_sample_metadata),
+ as.data.frame(nidap_clean_raw_counts),
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene"
+) |>
+ filter_counts(
+ count_type = "raw"
+ )
+head(moo@counts$filt)
+
+}
+\seealso{
+Other moo methods:
+\code{\link[=batch_correct_counts]{batch_correct_counts()}},
+\code{\link[=clean_raw_counts]{clean_raw_counts()}},
+\code{\link[=diff_counts]{diff_counts()}},
+\code{\link[=filter_diff]{filter_diff()}},
+\code{\link[=normalize_counts]{normalize_counts()}},
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=run_deseq2]{run_deseq2()}},
+\code{\link[=set_color_pal]{set_color_pal()}}
+}
+\concept{moo methods}
diff --git a/code/MOSuite/man/filter_diff.Rd b/code/MOSuite/man/filter_diff.Rd
new file mode 100644
index 0000000..2a82849
--- /dev/null
+++ b/code/MOSuite/man/filter_diff.Rd
@@ -0,0 +1,144 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/differential.R
+\name{filter_diff}
+\alias{filter_diff}
+\title{Filter features from differential analysis based on statistical significance}
+\usage{
+filter_diff(
+ moo,
+ feature_id_colname = NULL,
+ significance_column = "adjpval",
+ significance_cutoff = 0.05,
+ change_column = "logFC",
+ change_cutoff = 1,
+ filtering_mode = "any",
+ include_estimates = c("FC", "logFC", "tstat", "pval", "adjpval"),
+ round_estimates = TRUE,
+ rounding_decimal_for_percent_cells = 0,
+ contrast_filter = "none",
+ contrasts = c(),
+ groups = c(),
+ groups_filter = "none",
+ label_font_size = 6,
+ label_distance = 1,
+ y_axis_expansion = 0.08,
+ fill_colors = c("steelblue1", "whitesmoke"),
+ pie_chart_in_3d = TRUE,
+ bar_width = 0.4,
+ draw_bar_border = TRUE,
+ plot_type = "bar",
+ plot_titles_fontsize = 12,
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = file.path("diff", "filt")
+)
+}
+\arguments{
+\item{moo}{multiOmicDataSet object (see \code{create_multiOmicDataSet_from_dataframes()})}
+
+\item{feature_id_colname}{The column from the counts data containing the Feature IDs (Usually Gene or Protein ID).
+This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts
+Matrix will be available to select for this parameter. (Default: \code{NULL} - first column in the counts matrix will be
+used.)}
+
+\item{significance_column}{Column name for significance, e.g. \code{"pval"} or \code{"pvaladj"} (default)}
+
+\item{significance_cutoff}{Features will only be kept if their \code{significance_column} is less then this cutoff
+threshold}
+
+\item{change_column}{Column name for change, e.g. \code{"logFC"} (default)}
+
+\item{change_cutoff}{Features will only be kept if the absolute value of their \code{change_column} is greater than or
+equal to this cutoff threshold}
+
+\item{filtering_mode}{Accepted values: \code{"any"} or \code{"all"} to include features that meet the criteria in \emph{any}
+contrast or in \emph{all} contrasts}
+
+\item{include_estimates}{Column names of estimates to include. Default: \code{c("FC", "logFC", "tstat", "pval", "adjpval")}}
+
+\item{round_estimates}{Whether to round estimates. Default: \code{TRUE}}
+
+\item{rounding_decimal_for_percent_cells}{Decimal place to use when rounding Percent cells}
+
+\item{contrast_filter}{Whether to filter \code{contrasts} in or our of analysis. If \code{"keep"}, only the contrast names
+listed in \code{contrasts} will be included. If \verb{"remove}, the contrast names listed by \code{contrasts} will be removed. If
+\code{"none"}, all contrasts in the dataset are used. Options: \code{"keep"}, \code{"remove"}, or \code{"none"}}
+
+\item{contrasts}{Contrast names to filter by \code{contrast_filter}. If \code{contrast_filter} is \code{"none"}, this parameter has
+no effect.}
+
+\item{groups}{Group names to filter by \code{groups_filter}. If \code{groups_filter} is \code{"none"}, this parameter has no effect.
+Options: \code{"keep"}, \code{"remove"}, or \code{"none"}}
+
+\item{groups_filter}{Whether to filter \code{groups} in or out of analysis. If \code{"keep"}, only the group names listed in
+\code{groups} will be included. If \code{"remove"}, the group names listed by \code{groups} will be removed. If \code{"none"}, all
+groups in the dataset are used.}
+
+\item{label_font_size}{Font size for labels in the plot (default: 6)}
+
+\item{label_distance}{Distance of labels from the bars (default: 1)}
+
+\item{y_axis_expansion}{Expansion of the y-axis (default: 0.08)}
+
+\item{fill_colors}{Fill colors for the bars (default: c("steelblue1", "whitesmoke"))}
+
+\item{pie_chart_in_3d}{Whether to draw pie charts in 3D (default: TRUE)}
+
+\item{bar_width}{Width of the bars (default: 0.4)}
+
+\item{draw_bar_border}{Whether to draw borders around bars (default: TRUE)}
+
+\item{plot_type}{"bar" or "pie"}
+
+\item{plot_titles_fontsize}{Font size for plot titles (default: 12)}
+
+\item{print_plots}{Whether to print plots during analysis (Defaults to \code{FALSE}, overwritable using option 'moo_print_plots' or environment variable 'MOO_PRINT_PLOTS')}
+
+\item{save_plots}{Whether to save plots to files during analysis (Defaults to \code{TRUE}, overwritable using option 'moo_save_plots' or environment variable 'MOO_SAVE_PLOTS')}
+
+\item{plots_subdir}{subdirectory in where plots will be saved if \code{save_plots} is \code{TRUE}}
+}
+\description{
+Outputs dataset of significant genes from DEG table; filters genes based on statistical significance (p-value or
+adjusted p-value) and change (fold change, log2 fold change, or t-statistic); in addition allows for selection of DEG
+estimates and for sub-setting of contrasts and groups included in the output gene list.
+}
+\examples{
+moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts)
+ )
+) |>
+ diff_counts(
+ count_type = "filt",
+ sub_count_type = NULL,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"),
+ contrasts = c("B-A", "C-A", "B-C"),
+ voom_normalization_method = "quantile",
+ ) |>
+ filter_diff()
+head(moo@analyses$diff_filt)
+}
+\seealso{
+Other moo methods:
+\code{\link[=batch_correct_counts]{batch_correct_counts()}},
+\code{\link[=clean_raw_counts]{clean_raw_counts()}},
+\code{\link[=diff_counts]{diff_counts()}},
+\code{\link[=filter_counts]{filter_counts()}},
+\code{\link[=normalize_counts]{normalize_counts()}},
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=run_deseq2]{run_deseq2()}},
+\code{\link[=set_color_pal]{set_color_pal()}}
+}
+\concept{moo methods}
diff --git a/code/MOSuite/man/gene_counts.Rd b/code/MOSuite/man/gene_counts.Rd
new file mode 100644
index 0000000..9fc18b9
--- /dev/null
+++ b/code/MOSuite/man/gene_counts.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{gene_counts}
+\alias{gene_counts}
+\title{RSEM expected gene counts}
+\format{
+\subsection{\code{gene_counts}}{
+
+A data frame with columns 'gene_id', 'GeneName', and a column for each sample's expected count.
+}
+}
+\source{
+Generated by running RENEE v2.5.8 on the
+\href{https://github.com/CCBR/RENEE/tree/e08f7db6c6e638cfd330caa182f64665d2ef37fa/.tests}{test dataset}
+}
+\usage{
+gene_counts
+}
+\description{
+RSEM expected gene counts
+}
+\keyword{data}
diff --git a/code/MOSuite/man/get_colors_lst.Rd b/code/MOSuite/man/get_colors_lst.Rd
new file mode 100644
index 0000000..fbe22fa
--- /dev/null
+++ b/code/MOSuite/man/get_colors_lst.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/colors.R
+\name{get_colors_lst}
+\alias{get_colors_lst}
+\title{Create named list of default colors for plotting}
+\usage{
+get_colors_lst(sample_metadata, palette_fun = grDevices::palette.colors, ...)
+}
+\arguments{
+\item{sample_metadata}{sample metadata as a data frame or tibble. The first column is assumed to contain the sample
+IDs which must correspond to column names in the raw counts.}
+
+\item{palette_fun}{Function for selecting colors. Assumed to contain \code{n} for the number of colors. Default:
+\code{grDevices::palette.colors()}}
+
+\item{...}{additional arguments forwarded to \code{palette_fun}}
+}
+\value{
+named list, with each column in \code{sample_metadata} containing entry with a named vector of colors
+}
+\description{
+Create named list of default colors for plotting
+}
+\examples{
+get_colors_lst(nidap_sample_metadata)
+\dontrun{
+get_colors_lst(nidap_sample_metadata, palette_fun = RColorBrewer::brewer.pal, name = "Set3")
+}
+}
diff --git a/code/MOSuite/man/get_colors_vctr.Rd b/code/MOSuite/man/get_colors_vctr.Rd
new file mode 100644
index 0000000..adef32e
--- /dev/null
+++ b/code/MOSuite/man/get_colors_vctr.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/colors.R
+\name{get_colors_vctr}
+\alias{get_colors_vctr}
+\title{Get vector of colors for observations in one column of a data frame}
+\usage{
+get_colors_vctr(dat, colname, palette_fun = grDevices::palette.colors, ...)
+}
+\arguments{
+\item{dat}{data frame}
+
+\item{colname}{column name in \code{dat}}
+
+\item{palette_fun}{Function for selecting colors. Assumed to contain \code{n} for the number of colors. Default:
+\code{grDevices::palette.colors()}}
+
+\item{...}{additional arguments forwarded to \code{palette_fun}}
+}
+\value{
+named vector of colors for each unique observation in \code{dat$colname}
+}
+\description{
+Get vector of colors for observations in one column of a data frame
+}
diff --git a/code/MOSuite/man/get_pc_percent_lab.Rd b/code/MOSuite/man/get_pc_percent_lab.Rd
new file mode 100644
index 0000000..a317d66
--- /dev/null
+++ b/code/MOSuite/man/get_pc_percent_lab.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_pca.R
+\name{get_pc_percent_lab}
+\alias{get_pc_percent_lab}
+\title{Get label for Principal Component with percent of variation}
+\usage{
+get_pc_percent_lab(pca_df, pc)
+}
+\arguments{
+\item{pca_df}{data frame from \code{calc_pca()}}
+
+\item{pc}{which principal component to report (e.g. \code{1})}
+}
+\value{
+glue string formatted with PC's percent of variation
+}
+\description{
+Get label for Principal Component with percent of variation
+}
+\examples{
+\dontrun{
+data.frame(PC = c(1, 2, 3), percent = c(40, 10, 0.5)) |>
+ get_pc_percent_lab(2)
+}
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/get_random_colors.Rd b/code/MOSuite/man/get_random_colors.Rd
new file mode 100644
index 0000000..3b6fd5a
--- /dev/null
+++ b/code/MOSuite/man/get_random_colors.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/colors.R
+\name{get_random_colors}
+\alias{get_random_colors}
+\title{Get random colors.}
+\usage{
+get_random_colors(num_colors, n = 2000)
+}
+\arguments{
+\item{num_colors}{number of colors to select.}
+
+\item{n}{number of random RGB values to generate in the color space.}
+}
+\value{
+vector of random colors in hex format.
+}
+\description{
+Note: this function is not guaranteed to create a color blind friendly palette.
+Consider using other palettes such as \code{RColorBrewer::display.brewer.all(colorblindFriendly = TRUE)}.
+}
+\examples{
+\dontrun{
+set.seed(10)
+get_random_colors(5)
+}
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/glue_gene_symbols.Rd b/code/MOSuite/man/glue_gene_symbols.Rd
new file mode 100644
index 0000000..0eece4a
--- /dev/null
+++ b/code/MOSuite/man/glue_gene_symbols.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{glue_gene_symbols}
+\alias{glue_gene_symbols}
+\title{Glue gene_id and GeneName columns into one column}
+\usage{
+glue_gene_symbols(counts_dat)
+}
+\arguments{
+\item{counts_dat}{data frame containing gene_id and GeneName columns}
+}
+\value{
+counts_dat with gene_id and GeneName joined with \code{|} as the new gene_id column
+}
+\description{
+Glue gene_id and GeneName columns into one column
+}
+\examples{
+\dontrun{
+gene_counts |>
+ glue_gene_symbols() |>
+ head()
+}
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/join_dfs_wide.Rd b/code/MOSuite/man/join_dfs_wide.Rd
new file mode 100644
index 0000000..19590f4
--- /dev/null
+++ b/code/MOSuite/man/join_dfs_wide.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{join_dfs_wide}
+\alias{join_dfs_wide}
+\title{Join dataframes in named list to wide dataframe}
+\usage{
+join_dfs_wide(df_list, join_fn = dplyr::left_join)
+}
+\arguments{
+\item{df_list}{named list of dataframes}
+
+\item{join_fn}{join function to use (Default: \code{dplyr::left_join})}
+}
+\value{
+wide dataframe
+}
+\description{
+The first column is assumed to be shared by all dataframes
+}
+\examples{
+
+dfs <- list(
+ "a_vs_b" = data.frame(id = c("a1", "b2", "c3"), score = runif(3)),
+ "b_vs_c" = data.frame(id = c("a1", "b2", "c3"), score = rnorm(3))
+)
+dfs |> join_dfs_wide()
+
+}
+\keyword{utilities}
diff --git a/code/MOSuite/man/load_moo_from_data_dir.Rd b/code/MOSuite/man/load_moo_from_data_dir.Rd
new file mode 100644
index 0000000..210aa48
--- /dev/null
+++ b/code/MOSuite/man/load_moo_from_data_dir.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{load_moo_from_data_dir}
+\alias{load_moo_from_data_dir}
+\title{Load multiOmicDataSet from data directory}
+\usage{
+load_moo_from_data_dir(data_dir = file.path("..", "data"))
+}
+\arguments{
+\item{data_dir}{path to data directory containing .rds file (default: \code{../data})}
+}
+\value{
+loaded multiOmicDataSet object
+}
+\description{
+Searches the ../data directory for .rds files and loads the first matching
+multiOmicDataSet object. Validates that the loaded object is of the correct class.
+}
+\examples{
+\dontrun{
+moo <- load_moo_from_data_dir()
+}
+
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/meta_tbl_to_dat.Rd b/code/MOSuite/man/meta_tbl_to_dat.Rd
new file mode 100644
index 0000000..23becd1
--- /dev/null
+++ b/code/MOSuite/man/meta_tbl_to_dat.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/metadata.R
+\name{meta_tbl_to_dat}
+\alias{meta_tbl_to_dat}
+\title{Convert sample metadata from a tibble to a dataframe with sample IDs as row names}
+\usage{
+meta_tbl_to_dat(meta_tbl, sample_id_colname = sample_id)
+}
+\arguments{
+\item{meta_tbl}{tibble with \code{sample_id} column}
+
+\item{sample_id_colname}{name of the column in \code{sample_metadata} that contains the sample IDs. (Default: \code{NULL} -
+first column in the sample metadata will be used.)}
+}
+\value{
+dataframe where row names are the sample IDs
+}
+\description{
+Convert sample metadata from a tibble to a dataframe with sample IDs as row names
+}
+\examples{
+\dontrun{
+sample_meta_tbl <- readr::read_tsv(system.file("extdata",
+ "sample_metadata.tsv.gz",
+ package = "MOSuite"
+))
+head(sample_meta_tbl)
+meta_tbl_to_dat(sample_meta_tbl)
+}
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/multiOmicDataSet.Rd b/code/MOSuite/man/multiOmicDataSet.Rd
new file mode 100644
index 0000000..07efd66
--- /dev/null
+++ b/code/MOSuite/man/multiOmicDataSet.Rd
@@ -0,0 +1,48 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/0_mo-class.R
+\name{multiOmicDataSet}
+\alias{multiOmicDataSet}
+\title{multiOmicDataSet class}
+\usage{
+multiOmicDataSet(sample_metadata, anno_dat, counts_lst, analyses_lst = list())
+}
+\arguments{
+\item{sample_metadata}{sample metadata as a data frame or tibble. The first column is assumed to contain the sample
+IDs which must correspond to column names in the raw counts.}
+
+\item{anno_dat}{data frame of feature annotations, such as gene symbols or any other information about the features
+in \code{counts_lst}.}
+
+\item{counts_lst}{named list of data frames containing counts, e.g. expected feature counts from RSEM. Each data
+frame is expected to contain a \code{feature_id} column as the first column, and all remaining columns are sample IDs in
+the \code{sample_meta}.}
+
+\item{analyses_lst}{named list of analysis results, e.g. DESeq results object}
+}
+\value{
+A \code{multiOmicDataSet} S7 object.
+}
+\description{
+multiOmicDataSet class
+}
+\seealso{
+Other moo constructors:
+\code{\link[=create_multiOmicDataSet_from_dataframes]{create_multiOmicDataSet_from_dataframes()}},
+\code{\link[=create_multiOmicDataSet_from_files]{create_multiOmicDataSet_from_files()}}
+}
+\concept{moo constructors}
+\section{Additional properties}{
+
+\describe{
+\item{\code{@sample_meta}}{sample metadata as a data frame or tibble. The first column is assumed to contain the sample
+IDs which must correspond to column names in the raw counts.}
+
+\item{\code{@annotation}}{data frame of feature annotations, such as gene symbols or any other information about the
+features in the counts list.}
+
+\item{\code{@counts}}{named list of counts data frames (e.g. \code{raw}, \code{clean}, \code{cpm}, \code{filt}, \code{norm}, \code{batch}). Each data
+frame is expected to contain a feature ID column as the first column, and all remaining columns are sample IDs.}
+
+\item{\code{@analyses}}{named list of analysis results (e.g. DESeq2 results, colors).}
+}}
+
diff --git a/code/MOSuite/man/nidap_batch_corrected_counts.Rd b/code/MOSuite/man/nidap_batch_corrected_counts.Rd
new file mode 100644
index 0000000..ca00e82
--- /dev/null
+++ b/code/MOSuite/man/nidap_batch_corrected_counts.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{nidap_batch_corrected_counts}
+\alias{nidap_batch_corrected_counts}
+\title{Batch-corrected counts for the NIDAP test dataset.}
+\format{
+An object of class \code{spec_tbl_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 7943 rows and 10 columns.
+}
+\usage{
+nidap_batch_corrected_counts
+}
+\description{
+Batch-corrected counts for the NIDAP test dataset.
+}
+\keyword{data}
diff --git a/code/MOSuite/man/nidap_batch_corrected_counts_2.Rd b/code/MOSuite/man/nidap_batch_corrected_counts_2.Rd
new file mode 100644
index 0000000..386d71c
--- /dev/null
+++ b/code/MOSuite/man/nidap_batch_corrected_counts_2.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{nidap_batch_corrected_counts_2}
+\alias{nidap_batch_corrected_counts_2}
+\title{Batch-corrected counts for the NIDAP test dataset.
+The result of running \code{batch_correct_counts()} on \code{nidap_norm_counts}.}
+\format{
+An object of class \code{data.frame} with 7943 rows and 10 columns.
+}
+\usage{
+nidap_batch_corrected_counts_2
+}
+\description{
+Batch-corrected counts for the NIDAP test dataset.
+The result of running \code{batch_correct_counts()} on \code{nidap_norm_counts}.
+}
+\keyword{data}
diff --git a/code/MOSuite/man/nidap_clean_raw_counts.Rd b/code/MOSuite/man/nidap_clean_raw_counts.Rd
new file mode 100644
index 0000000..7a9b2fe
--- /dev/null
+++ b/code/MOSuite/man/nidap_clean_raw_counts.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{nidap_clean_raw_counts}
+\alias{nidap_clean_raw_counts}
+\title{Clean raw counts for the NIDAP test dataset.
+The result of running \code{clean_raw_counts()} on \code{nidap_raw_counts}.}
+\format{
+An object of class \code{spec_tbl_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 43280 rows and 10 columns.
+}
+\usage{
+nidap_clean_raw_counts
+}
+\description{
+Clean raw counts for the NIDAP test dataset.
+The result of running \code{clean_raw_counts()} on \code{nidap_raw_counts}.
+}
+\keyword{data}
diff --git a/code/MOSuite/man/nidap_deg_analysis.Rd b/code/MOSuite/man/nidap_deg_analysis.Rd
new file mode 100644
index 0000000..daec3f9
--- /dev/null
+++ b/code/MOSuite/man/nidap_deg_analysis.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{nidap_deg_analysis}
+\alias{nidap_deg_analysis}
+\title{Differential gene expression analysis for the NIDAP test dataset.}
+\format{
+An object of class \code{spec_tbl_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 7943 rows and 25 columns.
+}
+\usage{
+nidap_deg_analysis
+}
+\description{
+Differential gene expression analysis for the NIDAP test dataset.
+}
+\keyword{data}
diff --git a/code/MOSuite/man/nidap_deg_analysis_2.Rd b/code/MOSuite/man/nidap_deg_analysis_2.Rd
new file mode 100644
index 0000000..0bf6ab7
--- /dev/null
+++ b/code/MOSuite/man/nidap_deg_analysis_2.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{nidap_deg_analysis_2}
+\alias{nidap_deg_analysis_2}
+\title{Differential gene expression analysis for the NIDAP test dataset.
+The result of running \code{diff_counts()} on \code{nidap_filtered_counts}.}
+\format{
+An object of class \code{list} of length 3.
+}
+\usage{
+nidap_deg_analysis_2
+}
+\description{
+Differential gene expression analysis for the NIDAP test dataset.
+The result of running \code{diff_counts()} on \code{nidap_filtered_counts}.
+}
+\keyword{data}
diff --git a/code/MOSuite/man/nidap_deg_gene_list.Rd b/code/MOSuite/man/nidap_deg_gene_list.Rd
new file mode 100644
index 0000000..12bbc31
--- /dev/null
+++ b/code/MOSuite/man/nidap_deg_gene_list.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{nidap_deg_gene_list}
+\alias{nidap_deg_gene_list}
+\title{List of differentially expressed genes from the NIDAP test dataset using
+default parameters with \code{filter_diff()}.}
+\format{
+An object of class \code{data.frame} with 641 rows and 16 columns.
+}
+\usage{
+nidap_deg_gene_list
+}
+\description{
+List of differentially expressed genes from the NIDAP test dataset using
+default parameters with \code{filter_diff()}.
+}
+\keyword{data}
diff --git a/code/MOSuite/man/nidap_filtered_counts.Rd b/code/MOSuite/man/nidap_filtered_counts.Rd
new file mode 100644
index 0000000..5886eab
--- /dev/null
+++ b/code/MOSuite/man/nidap_filtered_counts.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{nidap_filtered_counts}
+\alias{nidap_filtered_counts}
+\title{Filtered counts for the NIDAP test dataset.
+The result of running \code{filter_counts()} on \code{nidap_clean_raw_counts}.}
+\format{
+An object of class \code{spec_tbl_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 7943 rows and 10 columns.
+}
+\usage{
+nidap_filtered_counts
+}
+\description{
+Filtered counts for the NIDAP test dataset.
+The result of running \code{filter_counts()} on \code{nidap_clean_raw_counts}.
+}
+\keyword{data}
diff --git a/code/MOSuite/man/nidap_norm_counts.Rd b/code/MOSuite/man/nidap_norm_counts.Rd
new file mode 100644
index 0000000..bc659b7
--- /dev/null
+++ b/code/MOSuite/man/nidap_norm_counts.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{nidap_norm_counts}
+\alias{nidap_norm_counts}
+\title{Normalized counts for the NIDAP test dataset.
+The result of running \code{normalize_counts()} on \code{nidap_filtered_counts}.}
+\format{
+An object of class \code{spec_tbl_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 7943 rows and 10 columns.
+}
+\usage{
+nidap_norm_counts
+}
+\description{
+Normalized counts for the NIDAP test dataset.
+The result of running \code{normalize_counts()} on \code{nidap_filtered_counts}.
+}
+\keyword{data}
diff --git a/code/MOSuite/man/nidap_raw_counts.Rd b/code/MOSuite/man/nidap_raw_counts.Rd
new file mode 100644
index 0000000..77bc2b8
--- /dev/null
+++ b/code/MOSuite/man/nidap_raw_counts.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{nidap_raw_counts}
+\alias{nidap_raw_counts}
+\title{Raw counts for the NIDAP test dataset
+Pairs with \code{nidap_sample_metadata}.}
+\format{
+An object of class \code{spec_tbl_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 43280 rows and 10 columns.
+}
+\usage{
+nidap_raw_counts
+}
+\description{
+Raw counts for the NIDAP test dataset
+Pairs with \code{nidap_sample_metadata}.
+}
+\keyword{data}
diff --git a/code/MOSuite/man/nidap_sample_metadata.Rd b/code/MOSuite/man/nidap_sample_metadata.Rd
new file mode 100644
index 0000000..31ed521
--- /dev/null
+++ b/code/MOSuite/man/nidap_sample_metadata.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{nidap_sample_metadata}
+\alias{nidap_sample_metadata}
+\title{Sample metadata for the NIDAP test dataset}
+\format{
+An object of class \code{spec_tbl_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 9 rows and 5 columns.
+}
+\usage{
+nidap_sample_metadata
+}
+\description{
+Sample metadata for the NIDAP test dataset
+}
+\keyword{data}
diff --git a/code/MOSuite/man/nidap_venn_diagram_dat.Rd b/code/MOSuite/man/nidap_venn_diagram_dat.Rd
new file mode 100644
index 0000000..6ab32bb
--- /dev/null
+++ b/code/MOSuite/man/nidap_venn_diagram_dat.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{nidap_venn_diagram_dat}
+\alias{nidap_venn_diagram_dat}
+\title{Output data from venn diagram.
+The result of running \code{plot_venn_diagram()} on \code{nidap_volcano_summary_dat}}
+\format{
+An object of class \code{spec_tbl_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 3068 rows and 4 columns.
+}
+\usage{
+nidap_venn_diagram_dat
+}
+\description{
+Output data from venn diagram.
+The result of running \code{plot_venn_diagram()} on \code{nidap_volcano_summary_dat}
+}
+\keyword{data}
diff --git a/code/MOSuite/man/nidap_volcano_summary_dat.Rd b/code/MOSuite/man/nidap_volcano_summary_dat.Rd
new file mode 100644
index 0000000..43bdf38
--- /dev/null
+++ b/code/MOSuite/man/nidap_volcano_summary_dat.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{nidap_volcano_summary_dat}
+\alias{nidap_volcano_summary_dat}
+\title{Summarized differential expression analysis for input to venn diagram}
+\format{
+An object of class \code{spec_tbl_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 4929 rows and 7 columns.
+}
+\usage{
+nidap_volcano_summary_dat
+}
+\description{
+Summarized differential expression analysis for input to venn diagram
+}
+\keyword{data}
diff --git a/code/MOSuite/man/normalize_counts.Rd b/code/MOSuite/man/normalize_counts.Rd
new file mode 100644
index 0000000..0bfd0ea
--- /dev/null
+++ b/code/MOSuite/man/normalize_counts.Rd
@@ -0,0 +1,169 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{normalize_counts}
+\alias{normalize_counts}
+\title{Normalize counts}
+\usage{
+normalize_counts(
+ moo,
+ count_type = "filt",
+ norm_type = "voom",
+ feature_id_colname = NULL,
+ samples_to_include = NULL,
+ sample_id_colname = NULL,
+ group_colname = "Group",
+ label_colname = NULL,
+ input_in_log_counts = FALSE,
+ voom_normalization_method = "quantile",
+ samples_to_rename = c(""),
+ add_label_to_pca = TRUE,
+ principal_component_on_x_axis = 1,
+ principal_component_on_y_axis = 2,
+ legend_position_for_pca = "top",
+ label_offset_x_ = 2,
+ label_offset_y_ = 2,
+ label_font_size = 3,
+ point_size_for_pca = 8,
+ color_histogram_by_group = TRUE,
+ set_min_max_for_x_axis_for_histogram = FALSE,
+ minimum_for_x_axis_for_histogram = -1,
+ maximum_for_x_axis_for_histogram = 1,
+ legend_font_size_for_histogram = 10,
+ legend_position_for_histogram = "top",
+ number_of_histogram_legend_columns = 6,
+ plot_corr_matrix_heatmap = TRUE,
+ colors_for_plots = NULL,
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ interactive_plots = FALSE,
+ plots_subdir = "norm"
+)
+}
+\arguments{
+\item{moo}{multiOmicDataSet object (see \code{create_multiOmicDataSet_from_dataframes()})}
+
+\item{count_type}{the type of counts to use -- must be a name in the counts slot (\code{moo@counts})}
+
+\item{norm_type}{normalization type. Default: "voom" which uses \code{limma::voom}.}
+
+\item{feature_id_colname}{The column from the counts data containing the Feature IDs (Usually Gene or Protein ID).
+This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts
+Matrix will be available to select for this parameter. (Default: \code{NULL} - first column in the counts matrix will be
+used.)}
+
+\item{samples_to_include}{Which samples would you like to include? Usually, you will choose all sample columns, or
+you could choose to remove certain samples. Samples excluded here will be removed in this step and from further
+analysis downstream of this step. (Default: \code{NULL} - all sample IDs in \code{moo@sample_meta} will be used.)}
+
+\item{sample_id_colname}{The column from the sample metadata containing the sample names. The names in this column
+must exactly match the names used as the sample column names of your input Counts Matrix. (Default: \code{NULL} - first
+column in the sample metadata will be used.)}
+
+\item{group_colname}{The column from the sample metadata containing the sample group information. This is usually a
+column showing to which experimental treatments each sample belongs (e.g. WildType, Knockout, Tumor, Normal,
+Before, After, etc.).}
+
+\item{label_colname}{The column from the sample metadata containing the sample labels as you wish them to appear in
+the plots produced by this template. This can be the same Sample Names Column. However, you may desire different
+labels to display on your figure (e.g. shorter labels are sometimes preferred on plots). In that case, select the
+column with your preferred Labels here. The selected column should contain unique names for each sample. (Default:
+\code{NULL} -- \code{sample_id_colname} will be used.)}
+
+\item{input_in_log_counts}{set this to \code{TRUE} if counts are already log2-transformed}
+
+\item{voom_normalization_method}{Normalization method to be applied to the logCPM values when using \code{limma::voom}}
+
+\item{samples_to_rename}{If you do not have a Plot Labels Column in your sample metadata table, you can use this
+parameter to rename samples manually for display on the PCA plot. Use "Add item" to add each additional sample for
+renaming. Use the following format to describe which old name (in your sample metadata table) you want to rename to
+which new name: old_name: new_name}
+
+\item{add_label_to_pca}{label points on the PCA plot}
+
+\item{principal_component_on_x_axis}{The principal component to plot on the x-axis for the PCA plot. Choices include
+1, 2, 3, ... (default: 1)}
+
+\item{principal_component_on_y_axis}{The principal component to plot on the y-axis for the PCA plot. Choices include
+1, 2, 3, ... (default: 2)}
+
+\item{legend_position_for_pca}{legend position for the PCA plot}
+
+\item{label_offset_x_}{label offset x for the PCA plot}
+
+\item{label_offset_y_}{label offset y for the PCA plot}
+
+\item{label_font_size}{label font size for the PCA plot}
+
+\item{point_size_for_pca}{geom point size for the PCA plot}
+
+\item{color_histogram_by_group}{Set to FALSE to label histogram by Sample Names, or set to TRUE to label histogram by
+the column you select in the "Group Column Used to Color Histogram" parameter (below). Default is FALSE.}
+
+\item{set_min_max_for_x_axis_for_histogram}{whether to set min/max value for histogram x-axis}
+
+\item{minimum_for_x_axis_for_histogram}{x-axis minimum for histogram plot}
+
+\item{maximum_for_x_axis_for_histogram}{x-axis maximum for histogram plot}
+
+\item{legend_font_size_for_histogram}{legend font size for the histogram plot}
+
+\item{legend_position_for_histogram}{legend position for the histogram plot. consider setting to 'none' for a large
+number of samples.}
+
+\item{number_of_histogram_legend_columns}{number of columns for the histogram legend}
+
+\item{plot_corr_matrix_heatmap}{Datasets with a large number of samples may be too large to create a correlation
+matrix heatmap. If this function takes longer than 5 minutes to run, Set to \code{FALSE} and the correlation matrix will
+not be be created. Default is \code{TRUE}.}
+
+\item{colors_for_plots}{Colors for the PCA and histogram will be picked, in order, from this list.
+Colors must either be names in \code{grDevices::colors()} or valid hex codes.}
+
+\item{print_plots}{Whether to print plots during analysis (Defaults to \code{FALSE}, overwritable using option 'moo_print_plots' or environment variable 'MOO_PRINT_PLOTS')}
+
+\item{save_plots}{Whether to save plots to files during analysis (Defaults to \code{TRUE}, overwritable using option 'moo_save_plots' or environment variable 'MOO_SAVE_PLOTS')}
+
+\item{interactive_plots}{set to TRUE to make PCA and Histogram plots interactive with \code{plotly}, allowing you to hover
+your mouse over a point or line to view sample information. The similarity heat map will not display if this toggle
+is set to \code{TRUE}. Default is \code{FALSE}.}
+
+\item{plots_subdir}{subdirectory in \verb{figures/} where plots will be saved if \code{save_plots} is \code{TRUE}}
+}
+\value{
+\code{multiOmicDataSet} with normalized counts
+}
+\description{
+Normalize counts
+}
+\examples{
+moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts)
+ )
+) |>
+ normalize_counts(
+ group_colname = "Group",
+ label_colname = "Label"
+ )
+head(moo@counts[["norm"]][["voom"]])
+}
+\seealso{
+Other moo methods:
+\code{\link[=batch_correct_counts]{batch_correct_counts()}},
+\code{\link[=clean_raw_counts]{clean_raw_counts()}},
+\code{\link[=diff_counts]{diff_counts()}},
+\code{\link[=filter_counts]{filter_counts()}},
+\code{\link[=filter_diff]{filter_diff()}},
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=run_deseq2]{run_deseq2()}},
+\code{\link[=set_color_pal]{set_color_pal()}}
+}
+\concept{moo methods}
diff --git a/code/MOSuite/man/option_params.Rd b/code/MOSuite/man/option_params.Rd
new file mode 100644
index 0000000..4ea8a38
--- /dev/null
+++ b/code/MOSuite/man/option_params.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/options.R
+\name{option_params}
+\alias{option_params}
+\title{Option parameters}
+\arguments{
+\item{print_plots}{Whether to print plots during analysis (Defaults to \code{FALSE}, overwritable using option 'moo_print_plots' or environment variable 'MOO_PRINT_PLOTS')}
+
+\item{plots_dir}{Path where plots are saved when \code{moo_save_plots} is \code{TRUE} (Defaults to \code{"figures/"}, overwritable using option 'moo_plots_dir' or environment variable 'MOO_PLOTS_DIR')}
+
+\item{save_plots}{Whether to save plots to files during analysis (Defaults to \code{TRUE}, overwritable using option 'moo_save_plots' or environment variable 'MOO_SAVE_PLOTS')}
+}
+\description{
+Option parameters
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/options.Rd b/code/MOSuite/man/options.Rd
new file mode 100644
index 0000000..10d4fcd
--- /dev/null
+++ b/code/MOSuite/man/options.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/options.R
+\name{options}
+\alias{options}
+\title{MOSuite Options}
+\description{
+Internally used, package-specific options. All options will prioritize R options() values, and fall back to environment variables if undefined. If neither the option nor the environment variable is set, a default value is used.
+}
+\section{Checking Option Values}{
+
+Option values specific to \code{MOSuite} can be
+accessed by passing the package name to \code{env}.
+
+\if{html}{\out{}}\preformatted{options::opts(env = "MOSuite")
+
+options::opt(x, default, env = "MOSuite")
+}\if{html}{\out{
}}
+}
+
+\section{Options}{
+
+\describe{
+\item{print_plots}{\describe{
+Whether to print plots during analysis\item{default: }{\preformatted{FALSE}}
+\item{option: }{moo_print_plots}
+\item{envvar: }{MOO_PRINT_PLOTS (evaluated if possible, raw string otherwise)}
+}}
+
+\item{save_plots}{\describe{
+Whether to save plots to files during analysis\item{default: }{\preformatted{TRUE}}
+\item{option: }{moo_save_plots}
+\item{envvar: }{MOO_SAVE_PLOTS (evaluated if possible, raw string otherwise)}
+}}
+
+\item{plots_dir}{\describe{
+Path where plots are saved when \code{moo_save_plots} is \code{TRUE}\item{default: }{\preformatted{"figures/"}}
+\item{option: }{moo_plots_dir}
+\item{envvar: }{MOO_PLOTS_DIR (evaluated if possible, raw string otherwise)}
+}}
+
+}
+}
+
+\seealso{
+options getOption Sys.setenv Sys.getenv
+}
diff --git a/code/MOSuite/man/parse_optional_vector.Rd b/code/MOSuite/man/parse_optional_vector.Rd
new file mode 100644
index 0000000..291bee1
--- /dev/null
+++ b/code/MOSuite/man/parse_optional_vector.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{parse_optional_vector}
+\alias{parse_optional_vector}
+\title{Parse comma-separated string into a vector}
+\usage{
+parse_optional_vector(x)
+}
+\arguments{
+\item{x}{character string with comma-separated values}
+}
+\value{
+character vector or NULL if input is empty
+}
+\description{
+Splits a comma-separated string into a trimmed character vector.
+Returns NULL if input is empty, NULL, or has zero length.
+}
+\examples{
+\dontrun{
+parse_optional_vector("a, b, c")
+parse_optional_vector("")
+}
+
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/parse_samples_to_rename.Rd b/code/MOSuite/man/parse_samples_to_rename.Rd
new file mode 100644
index 0000000..6a9e7e9
--- /dev/null
+++ b/code/MOSuite/man/parse_samples_to_rename.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{parse_samples_to_rename}
+\alias{parse_samples_to_rename}
+\title{Parse sample rename pairs from string}
+\usage{
+parse_samples_to_rename(x)
+}
+\arguments{
+\item{x}{character string with rename pairs in format "old:new,old2:new2"}
+}
+\value{
+named list with old names as keys and new names as values, or NULL if empty
+}
+\description{
+Parses a string containing sample rename pairs in format "old:new,old2:new2"
+and returns a named list where names are old sample names and values are new names.
+}
+\examples{
+\dontrun{
+parse_samples_to_rename("sample1:S1,sample2:S2")
+parse_samples_to_rename("")
+}
+
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/parse_vector_with_default.Rd b/code/MOSuite/man/parse_vector_with_default.Rd
new file mode 100644
index 0000000..d4a5f01
--- /dev/null
+++ b/code/MOSuite/man/parse_vector_with_default.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{parse_vector_with_default}
+\alias{parse_vector_with_default}
+\title{Parse comma-separated string with default fallback}
+\usage{
+parse_vector_with_default(x, default)
+}
+\arguments{
+\item{x}{character string with comma-separated values}
+
+\item{default}{default value to return if x is empty}
+}
+\value{
+character vector or default value
+}
+\description{
+Splits a comma-separated string into a trimmed character vector.
+Returns a default value if input is empty, NULL, or has zero length.
+}
+\examples{
+\dontrun{
+parse_vector_with_default("a, b, c", "default")
+parse_vector_with_default("", "default")
+}
+
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/plot_corr_heatmap-data.frame.Rd b/code/MOSuite/man/plot_corr_heatmap-data.frame.Rd
new file mode 100644
index 0000000..0eec133
--- /dev/null
+++ b/code/MOSuite/man/plot_corr_heatmap-data.frame.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_heatmap.R
+\name{plot_corr_heatmap,data.frame-method}
+\alias{plot_corr_heatmap,data.frame-method}
+\alias{plot_corr_heatmap.data.frame}
+\title{Plot correlation heatmap for counts dataframe}
+\arguments{
+\item{moo_counts}{a \code{data.frame} of counts}
+
+\item{sample_metadata}{sample metadata as a data frame or tibble (\strong{Required})}
+
+\item{sample_id_colname}{The column from the sample metadata containing the sample names. The names in this column
+must exactly match the names used as the sample column names of your input Counts Matrix. (Default: \code{NULL} - first
+column in the sample metadata will be used.)}
+
+\item{feature_id_colname}{The column from the counts data containing the Feature IDs (Usually Gene or Protein ID).
+This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts
+Matrix will be available to select for this parameter. (Default: \code{NULL} - first column in the counts matrix will be
+used.)}
+
+\item{group_colname}{The column from the sample metadata containing the sample group information. This is usually a
+column showing to which experimental treatments each sample belongs (e.g. WildType, Knockout, Tumor, Normal,
+Before, After, etc.).}
+
+\item{label_colname}{The column from the sample metadata containing the sample labels as you wish them to appear in
+the plots produced by this template. This can be the same Sample Names Column. However, you may desire different
+labels to display on your figure (e.g. shorter labels are sometimes preferred on plots). In that case, select the
+column with your preferred Labels here. The selected column should contain unique names for each sample. (Default:
+\code{NULL} -- \code{sample_id_colname} will be used.)}
+
+\item{color_values}{vector of colors as hex values or names recognized by R}
+}
+\description{
+Plot correlation heatmap for counts dataframe
+}
+\seealso{
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}} generic
+
+Other plotters for counts dataframes:
+\code{\link{plot_histogram,data.frame-method}},
+\code{\link{plot_pca,data.frame-method}},
+\code{\link{plot_read_depth,data.frame-method}}
+}
+\concept{plotters for counts dataframes}
diff --git a/code/MOSuite/man/plot_corr_heatmap-multiOmicDataSet.Rd b/code/MOSuite/man/plot_corr_heatmap-multiOmicDataSet.Rd
new file mode 100644
index 0000000..0f09035
--- /dev/null
+++ b/code/MOSuite/man/plot_corr_heatmap-multiOmicDataSet.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_heatmap.R
+\name{plot_corr_heatmap,MOSuite::multiOmicDataSet-method}
+\alias{plot_corr_heatmap,MOSuite::multiOmicDataSet-method}
+\alias{plot_corr_heatmap.multiOmicDataSet}
+\title{Plot correlation heatmap for multiOmicDataSet}
+\arguments{
+\item{moo_counts}{a \code{multiOmicDataSet} object}
+
+\item{count_type}{the type of counts to use. Must be a name in the counts slot (\code{names(moo@counts)}).}
+
+\item{sub_count_type}{used if \code{count_type} is a list in the counts slot: specify the sub count type within the list.
+Must be a name in \code{names(moo@counts[[count_type]])}.}
+
+\item{...}{additional arguments forwarded to \code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}} for \code{data.frame}}
+}
+\description{
+Plot correlation heatmap for multiOmicDataSet
+}
+\seealso{
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}} generic
+
+Other plotters for multiOmicDataSets:
+\code{\link{plot_histogram,MOSuite::multiOmicDataSet-method}},
+\code{\link{plot_pca,MOSuite::multiOmicDataSet-method}},
+\code{\link{plot_read_depth,MOSuite::multiOmicDataSet-method}}
+}
+\concept{plotters for multiOmicDataSets}
diff --git a/code/MOSuite/man/plot_corr_heatmap.Rd b/code/MOSuite/man/plot_corr_heatmap.Rd
new file mode 100644
index 0000000..f09d191
--- /dev/null
+++ b/code/MOSuite/man/plot_corr_heatmap.Rd
@@ -0,0 +1,96 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_heatmap.R
+\name{plot_corr_heatmap}
+\alias{plot_corr_heatmap}
+\title{Plot correlation heatmap}
+\usage{
+plot_corr_heatmap(moo_counts, ...)
+}
+\arguments{
+\item{moo_counts}{counts dataframe or \code{multiOmicDataSet} containing \code{count_type} & \code{sub_count_type} in the counts
+slot}
+
+\item{...}{arguments forwarded to method}
+}
+\value{
+heatmap from \code{ComplexHeatmap::Heatmap()}
+}
+\description{
+Plot correlation heatmap
+}
+\details{
+\subsection{Method Usage}{
+
+\if{html}{\out{}}\preformatted{# multiOmicDataSet
+plot_corr_heatmap(moo_counts,
+ count_type,
+ sub_count_type = NULL,
+ ...)
+
+# dataframe
+plot_corr_heatmap(moo_counts,
+ sample_metadata,
+ sample_id_colname = NULL,
+ feature_id_colname = NULL,
+ group_colname = "Group",
+ label_colname = "Label",
+ color_values = c(
+ "#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"
+ ))
+}\if{html}{\out{
}}
+}
+}
+\examples{
+# plot correlation heatmap for a counts slot in a multiOmicDataset Object
+moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list("raw" = as.data.frame(nidap_raw_counts))
+)
+p <- plot_corr_heatmap(moo, count_type = "raw")
+
+# plot correlation heatmap for a counts dataframe
+plot_corr_heatmap(
+ moo@counts$raw,
+ sample_metadata = moo@sample_meta,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ group_colname = "Group",
+ label_colname = "Label"
+)
+}
+\seealso{
+\itemize{
+\item \code{\link[=plot_corr_heatmap.multiOmicDataSet]{plot_corr_heatmap.multiOmicDataSet()}}
+\item \code{\link[=plot_corr_heatmap.data.frame]{plot_corr_heatmap.data.frame()}}
+}
+
+Other plotters:
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=print_or_save_plot]{print_or_save_plot()}}
+
+Other heatmaps:
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}}
+
+Other moo methods:
+\code{\link[=batch_correct_counts]{batch_correct_counts()}},
+\code{\link[=clean_raw_counts]{clean_raw_counts()}},
+\code{\link[=diff_counts]{diff_counts()}},
+\code{\link[=filter_counts]{filter_counts()}},
+\code{\link[=filter_diff]{filter_diff()}},
+\code{\link[=normalize_counts]{normalize_counts()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=run_deseq2]{run_deseq2()}},
+\code{\link[=set_color_pal]{set_color_pal()}}
+}
+\concept{heatmaps}
+\concept{moo methods}
+\concept{plotters}
+\keyword{plotters}
diff --git a/code/MOSuite/man/plot_expr_heatmap.Rd b/code/MOSuite/man/plot_expr_heatmap.Rd
new file mode 100644
index 0000000..8f8b890
--- /dev/null
+++ b/code/MOSuite/man/plot_expr_heatmap.Rd
@@ -0,0 +1,376 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_heatmap.R
+\name{plot_expr_heatmap}
+\alias{plot_expr_heatmap}
+\alias{plot_expr_heatmap,MOSuite::multiOmicDataSet-method}
+\alias{plot_expr_heatmap,data.frame-method}
+\title{Plot expression heatmap}
+\usage{
+plot_expr_heatmap(
+ moo_counts,
+ count_type,
+ sub_count_type = NULL,
+ sample_metadata = NULL,
+ sample_id_colname = NULL,
+ feature_id_colname = NULL,
+ group_colname = "Group",
+ label_colname = NULL,
+ samples_to_include = NULL,
+ color_values = c("#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"),
+ include_all_genes = FALSE,
+ filter_top_genes_by_variance = TRUE,
+ top_genes_by_variance_to_include = 500,
+ specific_genes_to_include_in_heatmap = "None",
+ cluster_genes = TRUE,
+ gene_distance_metric = "correlation",
+ gene_clustering_method = "average",
+ display_gene_dendrograms = TRUE,
+ display_gene_names = FALSE,
+ center_and_rescale_expression = TRUE,
+ cluster_samples = FALSE,
+ arrange_sample_columns = TRUE,
+ order_by_gene_expression = FALSE,
+ gene_to_order_columns = " ",
+ gene_expression_order = "low_to_high",
+ smpl_distance_metric = "correlation",
+ smpl_clustering_method = "average",
+ display_smpl_dendrograms = TRUE,
+ reorder_dendrogram = FALSE,
+ reorder_dendrogram_order = c(),
+ display_sample_names = TRUE,
+ group_columns = c("Group", "Replicate", "Batch"),
+ assign_group_colors = FALSE,
+ assign_color_to_sample_groups = c(),
+ group_colors = c("#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"),
+ heatmap_color_scheme = "Default",
+ autoscale_heatmap_color = TRUE,
+ set_min_heatmap_color = -2,
+ set_max_heatmap_color = 2,
+ aspect_ratio = "Auto",
+ legend_font_size = 10,
+ gene_name_font_size = 4,
+ sample_name_font_size = 8,
+ display_numbers = FALSE,
+ plot_filename = "expr_heatmap.png",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "heatmap"
+)
+
+## S7 method for class
+plot_expr_heatmap(
+ moo_counts,
+ count_type,
+ sub_count_type = NULL,
+ sample_metadata = NULL,
+ sample_id_colname = NULL,
+ feature_id_colname = NULL,
+ group_colname = "Group",
+ label_colname = NULL,
+ samples_to_include = NULL,
+ color_values = c("#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"),
+ include_all_genes = FALSE,
+ filter_top_genes_by_variance = TRUE,
+ top_genes_by_variance_to_include = 500,
+ specific_genes_to_include_in_heatmap = "None",
+ cluster_genes = TRUE,
+ gene_distance_metric = "correlation",
+ gene_clustering_method = "average",
+ display_gene_dendrograms = TRUE,
+ display_gene_names = FALSE,
+ center_and_rescale_expression = TRUE,
+ cluster_samples = FALSE,
+ arrange_sample_columns = TRUE,
+ order_by_gene_expression = FALSE,
+ gene_to_order_columns = " ",
+ gene_expression_order = "low_to_high",
+ smpl_distance_metric = "correlation",
+ smpl_clustering_method = "average",
+ display_smpl_dendrograms = TRUE,
+ reorder_dendrogram = FALSE,
+ reorder_dendrogram_order = c(),
+ display_sample_names = TRUE,
+ group_columns = c("Group", "Replicate", "Batch"),
+ assign_group_colors = FALSE,
+ assign_color_to_sample_groups = c(),
+ group_colors = c("#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"),
+ heatmap_color_scheme = "Default",
+ autoscale_heatmap_color = TRUE,
+ set_min_heatmap_color = -2,
+ set_max_heatmap_color = 2,
+ aspect_ratio = "Auto",
+ legend_font_size = 10,
+ gene_name_font_size = 4,
+ sample_name_font_size = 8,
+ display_numbers = FALSE,
+ plot_filename = "expr_heatmap.png",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "heatmap"
+)
+
+## S7 method for class
+plot_expr_heatmap(
+ moo_counts,
+ count_type,
+ sub_count_type = NULL,
+ sample_metadata = NULL,
+ sample_id_colname = NULL,
+ feature_id_colname = NULL,
+ group_colname = "Group",
+ label_colname = NULL,
+ samples_to_include = NULL,
+ color_values = c("#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"),
+ include_all_genes = FALSE,
+ filter_top_genes_by_variance = TRUE,
+ top_genes_by_variance_to_include = 500,
+ specific_genes_to_include_in_heatmap = "None",
+ cluster_genes = TRUE,
+ gene_distance_metric = "correlation",
+ gene_clustering_method = "average",
+ display_gene_dendrograms = TRUE,
+ display_gene_names = FALSE,
+ center_and_rescale_expression = TRUE,
+ cluster_samples = FALSE,
+ arrange_sample_columns = TRUE,
+ order_by_gene_expression = FALSE,
+ gene_to_order_columns = " ",
+ gene_expression_order = "low_to_high",
+ smpl_distance_metric = "correlation",
+ smpl_clustering_method = "average",
+ display_smpl_dendrograms = TRUE,
+ reorder_dendrogram = FALSE,
+ reorder_dendrogram_order = c(),
+ display_sample_names = TRUE,
+ group_columns = c("Group", "Replicate", "Batch"),
+ assign_group_colors = FALSE,
+ assign_color_to_sample_groups = c(),
+ group_colors = c("#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"),
+ heatmap_color_scheme = "Default",
+ autoscale_heatmap_color = TRUE,
+ set_min_heatmap_color = -2,
+ set_max_heatmap_color = 2,
+ aspect_ratio = "Auto",
+ legend_font_size = 10,
+ gene_name_font_size = 4,
+ sample_name_font_size = 8,
+ display_numbers = FALSE,
+ plot_filename = "expr_heatmap.png",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "heatmap"
+)
+}
+\arguments{
+\item{moo_counts}{counts dataframe or \code{multiOmicDataSet} containing \code{count_type} & \code{sub_count_type} in the counts
+slot}
+
+\item{count_type}{the type of counts to use. Must be a name in the counts slot (\code{names(moo@counts)}).}
+
+\item{sub_count_type}{used if \code{count_type} is a list in the counts slot: specify the sub count type within the list.
+Must be a name in \code{names(moo@counts[[count_type]])}.}
+
+\item{sample_metadata}{sample metadata as a data frame or tibble (only required if \code{moo_counts} is a dataframe)}
+
+\item{sample_id_colname}{The column from the sample metadata containing the sample names. The names in this column
+must exactly match the names used as the sample column names of your input Counts Matrix. (Default: \code{NULL} - first
+column in the sample metadata will be used.)}
+
+\item{feature_id_colname}{The column from the counts dataa containing the Feature IDs (Usually Gene or Protein ID).
+This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts
+Matrix will be available to select for this parameter. (Default: \code{NULL} - first column in the counts matrix will be
+used.)}
+
+\item{group_colname}{The column from the sample metadata containing the sample group information. This is usually a
+column showing to which experimental treatments each sample belongs (e.g. WildType, Knockout, Tumor, Normal,
+Before, After, etc.).}
+
+\item{label_colname}{The column from the sample metadata containing the sample labels as you wish them to appear in
+the plots produced by this template. This can be the same Sample Names Column. However, you may desire different
+labels to display on your figure (e.g. shorter labels are sometimes preferred on plots). In that case, select the
+column with your preferred Labels here. The selected column should contain unique names for each sample. (Default:
+\code{NULL} -- \code{sample_id_colname} will be used.)}
+
+\item{samples_to_include}{Which samples would you like to include? Usually, you will choose all sample columns, or
+you could choose to remove certain samples. Samples excluded here will be removed in this step and from further
+analysis downstream of this step. (Default: \code{NULL} - all sample IDs in \code{moo@sample_meta} will be used.)}
+
+\item{color_values}{vector of colors as hex values or names recognized by R}
+
+\item{include_all_genes}{Set to TRUE if all genes are to be included. Set to FALSE if you want to filter genes by
+variance and/or provide a list of specific genes that will appear in the heatmap.}
+
+\item{filter_top_genes_by_variance}{Set to TRUE if you want to only include the top genes by variance. Set to FALSE
+if you do not want to filter genes by variance.}
+
+\item{top_genes_by_variance_to_include}{The number of genes to include if filtering genes by variance. This parameter
+is ignored if "Filter top genes by variance" is set to FALSE.}
+
+\item{specific_genes_to_include_in_heatmap}{Enter the gene symbols to be included in the heatmap, with each gene
+symbol separated with a space from the others. Alternatively, paste in a column of gene names from any spreadsheet
+application. This parameter is ignored if "Include all genes" is set to TRUE.}
+
+\item{cluster_genes}{Choose whether to cluster the rows (genes). If TRUE, rows will have clustering applied. If
+FALSE, clustering will not be applied to rows.}
+
+\item{gene_distance_metric}{Distance metric to be used in clustering genes. (TODO document options)}
+
+\item{gene_clustering_method}{Clustering method metric to be used in clustering samples. (TODO document options)}
+
+\item{display_gene_dendrograms}{Set to TRUE to show gene dendrograms. Set to FALSE to hide dendrograms.}
+
+\item{display_gene_names}{Set to TRUE to display gene names on the right side of the heatmap. Set to FALSE to hide
+gene names.}
+
+\item{center_and_rescale_expression}{Center and rescale expression for each gene across all included samples.}
+
+\item{cluster_samples}{Choose whether to cluster the columns (samples). If TRUE, columns will have clustering
+applied. If FALSE, clustering will not be applied to columns.}
+
+\item{arrange_sample_columns}{If TRUE, arranges columns by annotation groups. If FALSE, and "Cluster Samples" is
+FALSE, samples will appear in the order of input (samples to include)}
+
+\item{order_by_gene_expression}{If TRUE, set gene name below and direction for ordering}
+
+\item{gene_to_order_columns}{Gene to order columns by expression levels}
+
+\item{gene_expression_order}{Choose direction for gene order}
+
+\item{smpl_distance_metric}{Distance metric to be used in clustering samples. (TODO document options)}
+
+\item{smpl_clustering_method}{Clustering method to be used in clustering samples. (TODO document options)}
+
+\item{display_smpl_dendrograms}{Set to TRUE to show sample dendrograms. Set to FALSE to hide dendrogram.}
+
+\item{reorder_dendrogram}{If TRUE, set the order of the dendrogram (below)}
+
+\item{reorder_dendrogram_order}{Reorder the samples (columns) of the dendrogram by name, e.g.
+“sample2”,“sample3",“sample1".}
+
+\item{display_sample_names}{Set to TRUE if you want sample names to be displayed on the plot. Set to FALSE to hide
+sample names.}
+
+\item{group_columns}{Columns containing the sample groups for annotation tracks}
+
+\item{assign_group_colors}{If TRUE, set the groups assigned colors (below)}
+
+\item{assign_color_to_sample_groups}{Enter each sample to color in the format: group_name: color This parameter is
+ignored if "Assign Colors" is set to FALSE.}
+
+\item{group_colors}{Set group annotation colors.}
+
+\item{heatmap_color_scheme}{color scheme (TODO document options)}
+
+\item{autoscale_heatmap_color}{Set to TRUE to autoscale the heatmap colors between the maximum and minimum heatmap
+color parameters. If FALSE, set the heatmap colors between "Set max heatmap color" and "Set min heatmap color"
+(below).}
+
+\item{set_min_heatmap_color}{If Autoscale heatmap color is set to FALSE, set the minimum heatmap z-score value}
+
+\item{set_max_heatmap_color}{If Autoscale heatmap color is set to FALSE, set the maximum heatmap z-score value.}
+
+\item{aspect_ratio}{Set figure Aspect Ratio. Ratio refers to entire figure including legend. If set to Auto figure
+size is based on number of rows and columns form counts matrix. default - Auto}
+
+\item{legend_font_size}{Set Font size for figure legend. Default is 10.}
+
+\item{gene_name_font_size}{Font size for gene names. If you don't want gene labels to show, toggle "Display Gene
+Names" below to FALSE}
+
+\item{sample_name_font_size}{Font size for sample names. If you don't want to display samples names, toggle "Display
+sample names" (below) to FALSE}
+
+\item{display_numbers}{Setting to FALSE (default) will not display numerical value of heat on heatmap. Set to TRUE if
+you want to see these numbers on the plot.}
+
+\item{plot_filename}{plot output filename - only used if save_plots is TRUE}
+
+\item{print_plots}{Whether to print plots during analysis (Defaults to \code{FALSE}, overwritable using option 'moo_print_plots' or environment variable 'MOO_PRINT_PLOTS')}
+
+\item{save_plots}{Whether to save plots to files during analysis (Defaults to \code{TRUE}, overwritable using option 'moo_save_plots' or environment variable 'MOO_SAVE_PLOTS')}
+
+\item{plots_subdir}{subdirectory in \verb{figures/} where plots will be saved if \code{save_plots} is \code{TRUE}}
+}
+\value{
+heatmap from \code{ComplexHeatmap::Heatmap()}
+}
+\description{
+The samples (i.e. the columns) are clustered in an unsupervised fashion based
+on how similar their expression profiles are across the included genes. This
+can help identify samples that are non clustering with their group as you
+might expect based on the experimental design.
+}
+\details{
+By default, the top 500 genes by variance are used, as these are
+generally going to include those genes that most distinguish your samples
+from one another. You can change this as well as many other parameters about
+this heatmap if you explore the advanced options.
+}
+\examples{
+# plot expression heatmap for a counts slot in a multiOmicDataset Object
+moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = nidap_raw_counts,
+ "norm" = list(
+ "voom" = as.data.frame(nidap_norm_counts)
+ )
+ )
+)
+p <- plot_expr_heatmap(moo, count_type = "norm", sub_count_type = "voom")
+
+# customize the plot
+plot_expr_heatmap(moo,
+ count_type = "norm", sub_count_type = "voom",
+ top_genes_by_variance_to_include = 100
+)
+
+# plot expression heatmap for a counts dataframe
+counts_dat <- moo@counts$norm$voom
+plot_expr_heatmap(
+ counts_dat,
+ sample_metadata = nidap_sample_metadata,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ group_colname = "Group",
+ label_colname = "Label",
+ top_genes_by_variance_to_include = 100
+)
+
+}
+\seealso{
+Other plotters:
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=print_or_save_plot]{print_or_save_plot()}}
+
+Other heatmaps:
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}}
+
+Other moo methods:
+\code{\link[=batch_correct_counts]{batch_correct_counts()}},
+\code{\link[=clean_raw_counts]{clean_raw_counts()}},
+\code{\link[=diff_counts]{diff_counts()}},
+\code{\link[=filter_counts]{filter_counts()}},
+\code{\link[=filter_diff]{filter_diff()}},
+\code{\link[=normalize_counts]{normalize_counts()}},
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=run_deseq2]{run_deseq2()}},
+\code{\link[=set_color_pal]{set_color_pal()}}
+}
+\concept{heatmaps}
+\concept{moo methods}
+\concept{plotters}
+\keyword{plotters}
diff --git a/code/MOSuite/man/plot_histogram.Rd b/code/MOSuite/man/plot_histogram.Rd
new file mode 100644
index 0000000..4f6a920
--- /dev/null
+++ b/code/MOSuite/man/plot_histogram.Rd
@@ -0,0 +1,76 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_histogram.R
+\name{plot_histogram}
+\alias{plot_histogram}
+\title{Plot histogram}
+\usage{
+plot_histogram(moo_counts, ...)
+}
+\arguments{
+\item{moo_counts}{counts dataframe or \code{multiOmicDataSet} containing \code{count_type} & \code{sub_count_type} in the counts
+slot}
+
+\item{...}{arguments forwarded to method}
+}
+\value{
+ggplot object
+}
+\description{
+Plot histogram
+}
+\examples{
+# plot histogram for a counts slot in a multiOmicDataset Object
+moo <- multiOmicDataSet(
+ sample_metadata = nidap_sample_metadata,
+ anno_dat = data.frame(),
+ counts_lst = list("raw" = nidap_raw_counts)
+)
+p <- plot_histogram(moo, count_type = "raw")
+
+# customize the plot
+plot_histogram(moo,
+ count_type = "raw",
+ group_colname = "Group", color_by_group = TRUE
+)
+
+# plot histogram for a counts dataframe directly
+counts_dat <- moo@counts$raw
+plot_histogram(
+ counts_dat,
+ sample_metadata = nidap_sample_metadata,
+ sample_id_colname = "Sample",
+ feature_id_colname = "GeneName",
+ label_colname = "Label"
+)
+
+}
+\seealso{
+\itemize{
+\item \code{\link[=plot_histogram.multiOmicDataSet]{plot_histogram.multiOmicDataSet()}}
+\item \code{\link[=plot_histogram.data.frame]{plot_histogram.data.frame()}}
+}
+
+Other plotters:
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=print_or_save_plot]{print_or_save_plot()}}
+
+Other moo methods:
+\code{\link[=batch_correct_counts]{batch_correct_counts()}},
+\code{\link[=clean_raw_counts]{clean_raw_counts()}},
+\code{\link[=diff_counts]{diff_counts()}},
+\code{\link[=filter_counts]{filter_counts()}},
+\code{\link[=filter_diff]{filter_diff()}},
+\code{\link[=normalize_counts]{normalize_counts()}},
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=run_deseq2]{run_deseq2()}},
+\code{\link[=set_color_pal]{set_color_pal()}}
+}
+\concept{moo methods}
+\concept{plotters}
+\keyword{plotters}
diff --git a/code/MOSuite/man/plot_histogram.data.frame.Rd b/code/MOSuite/man/plot_histogram.data.frame.Rd
new file mode 100644
index 0000000..ba46275
--- /dev/null
+++ b/code/MOSuite/man/plot_histogram.data.frame.Rd
@@ -0,0 +1,89 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_histogram.R
+\name{plot_histogram,data.frame-method}
+\alias{plot_histogram,data.frame-method}
+\alias{plot_histogram.data.frame}
+\title{Plot histogram for counts dataframe}
+\arguments{
+\item{sample_metadata}{sample metadata as a data frame or tibble (\strong{required})}
+
+\item{sample_id_colname}{The column from the sample metadata containing the sample names. The names in this column
+must exactly match the names used as the sample column names of your input Counts Matrix. (Default: \code{NULL} - first
+column in the sample metadata will be used.)}
+
+\item{feature_id_colname}{The column from the counts dataa containing the Feature IDs (Usually Gene or Protein ID).
+This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts
+Matrix will be available to select for this parameter. (Default: \code{NULL} - first column in the counts matrix will be
+used.)}
+
+\item{group_colname}{The column from the sample metadata containing the sample group information. This is usually a
+column showing to which experimental treatments each sample belongs (e.g. WildType, Knockout, Tumor, Normal,
+Before, After, etc.).}
+
+\item{label_colname}{The column from the sample metadata containing the sample labels as you wish them to appear in
+the plots produced by this template. This can be the same Sample Names Column. However, you may desire different
+labels to display on your figure (e.g. shorter labels are sometimes preferred on plots). In that case, select the
+column with your preferred Labels here. The selected column should contain unique names for each sample. (Default:
+\code{NULL} -- \code{sample_id_colname} will be used.)}
+
+\item{color_values}{vector of colors as hex values or names recognized by R}
+
+\item{color_by_group}{Set to FALSE to label histogram by Sample Names, or set to TRUE to label histogram by the
+column you select in the "Group Column Used to Color Histogram" parameter (below). Default is FALSE.}
+
+\item{set_min_max_for_x_axis}{whether to override the default for \code{ggplot2::xlim()} (default: \code{FALSE})}
+
+\item{minimum_for_x_axis}{value to override default \code{min} for \code{ggplot2::xlim()}}
+
+\item{maximum_for_x_axis}{value to override default \code{max} for \code{ggplot2::xlim()}}
+
+\item{x_axis_label}{text label for the x axis \code{ggplot2::xlab()}}
+
+\item{y_axis_label}{text label for the y axis \code{ggplot2::ylab()}}
+
+\item{legend_position}{passed to in \code{legend.position} \code{ggplot2::theme()}}
+
+\item{legend_font_size}{passed to \code{ggplot2::element_text()} via \code{ggplot2::theme()}}
+
+\item{number_of_legend_columns}{passed to \code{ncol} in \code{ggplot2::guide_legend()}}
+
+\item{interactive_plots}{set to TRUE to make the plot interactive with \code{plotly}, allowing you to hover your mouse
+over a point or line to view sample information. The similarity heat map will not display if this toggle is set to
+TRUE. Default is FALSE.}
+
+\item{...}{additional arguments (ignored; accepted for compatibility with the moo dispatch)}
+}
+\description{
+Plot histogram for counts dataframe
+}
+\examples{
+
+# plot histogram for a counts dataframe directly
+plot_histogram(
+ nidap_clean_raw_counts,
+ sample_metadata = nidap_sample_metadata,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ label_colname = "Label"
+)
+
+# customize the plot
+plot_histogram(
+ nidap_clean_raw_counts,
+ sample_metadata = nidap_sample_metadata,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ group_colname = "Group",
+ color_by_group = TRUE
+)
+
+}
+\seealso{
+\code{\link[=plot_histogram]{plot_histogram()}} generic
+
+Other plotters for counts dataframes:
+\code{\link{plot_corr_heatmap,data.frame-method}},
+\code{\link{plot_pca,data.frame-method}},
+\code{\link{plot_read_depth,data.frame-method}}
+}
+\concept{plotters for counts dataframes}
diff --git a/code/MOSuite/man/plot_histogram.multiOmicDataSet.Rd b/code/MOSuite/man/plot_histogram.multiOmicDataSet.Rd
new file mode 100644
index 0000000..75da66e
--- /dev/null
+++ b/code/MOSuite/man/plot_histogram.multiOmicDataSet.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_histogram.R
+\name{plot_histogram,MOSuite::multiOmicDataSet-method}
+\alias{plot_histogram,MOSuite::multiOmicDataSet-method}
+\alias{plot_histogram.multiOmicDataSet}
+\title{Plot histogram for multiOmicDataSet}
+\arguments{
+\item{count_type}{Required if \code{moo_counts} is a \code{multiOmicDataSet}: the type of counts to use -- must be a name in
+the counts slot (\code{moo@counts}).}
+
+\item{sub_count_type}{Used if \code{moo_counts} is a \code{multiOmicDataSet} AND if \code{count_type} is a list, specify the sub
+count type within the list}
+}
+\description{
+Plot histogram for multiOmicDataSet
+}
+\examples{
+# plot histogram for a counts slot in a multiOmicDataset Object
+moo <- multiOmicDataSet(
+ sample_metadata = nidap_sample_metadata,
+ anno_dat = data.frame(),
+ counts_lst = list("raw" = nidap_raw_counts)
+)
+p <- plot_histogram(moo, count_type = "raw")
+
+# customize the plot
+plot_histogram(moo,
+ count_type = "raw",
+ group_colname = "Group", color_by_group = TRUE
+)
+
+}
+\seealso{
+\code{\link[=plot_histogram]{plot_histogram()}} generic
+
+Other plotters for multiOmicDataSets:
+\code{\link{plot_corr_heatmap,MOSuite::multiOmicDataSet-method}},
+\code{\link{plot_pca,MOSuite::multiOmicDataSet-method}},
+\code{\link{plot_read_depth,MOSuite::multiOmicDataSet-method}}
+}
+\concept{plotters for multiOmicDataSets}
diff --git a/code/MOSuite/man/plot_pca.Rd b/code/MOSuite/man/plot_pca.Rd
new file mode 100644
index 0000000..6dfa47e
--- /dev/null
+++ b/code/MOSuite/man/plot_pca.Rd
@@ -0,0 +1,89 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_pca.R
+\name{plot_pca}
+\alias{plot_pca}
+\title{Perform and plot a Principal Components Analysis}
+\usage{
+plot_pca(moo_counts, principal_components = c(1, 2), ...)
+}
+\arguments{
+\item{moo_counts}{counts dataframe or \code{multiOmicDataSet} containing \code{count_type} & \code{sub_count_type} in the counts
+slot}
+
+\item{principal_components}{vector with numbered principal components to plot. Use 2 for a 2D pca with ggplot, or 3
+for a 3D pca with plotly. (Default: \code{c(1,2)})}
+
+\item{...}{additional arguments forwarded to method (see Details below)}
+}
+\value{
+PCA plot (2D or 3D depending on the number of \code{principal_components})
+}
+\description{
+Perform and plot a Principal Components Analysis
+}
+\details{
+See the low-level function docs for additional arguments
+depending on whether you're plotting 2 or 3 PCs:
+\itemize{
+\item \code{\link[=plot_pca_2d]{plot_pca_2d()}} - used when there are \strong{2} principal components
+\item \code{\link[=plot_pca_3d]{plot_pca_3d()}} - used when there are \strong{3} principal components
+}
+}
+\examples{
+# multiOmicDataSet
+moo <- multiOmicDataSet(
+ sample_metadata = nidap_sample_metadata,
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = nidap_raw_counts,
+ "clean" = nidap_clean_raw_counts
+ )
+)
+plot_pca(moo, count_type = "clean", principal_components = c(1, 2))
+
+# 3D
+plot_pca(moo, count_type = "clean", principal_components = c(1, 2, 3))
+
+# dataframe
+plot_pca(nidap_clean_raw_counts,
+ sample_metadata = nidap_sample_metadata,
+ principal_components = c(1, 2)
+)
+
+}
+\seealso{
+\itemize{
+\item \code{\link[=plot_pca.multiOmicDataSet]{plot_pca.multiOmicDataSet()}}
+\item \code{\link[=plot_pca.data.frame]{plot_pca.data.frame()}}
+}
+
+Other plotters:
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=print_or_save_plot]{print_or_save_plot()}}
+
+Other PCA functions:
+\code{\link[=calc_pca]{calc_pca()}},
+\code{\link[=plot_pca_2d]{plot_pca_2d()}},
+\code{\link[=plot_pca_3d]{plot_pca_3d()}}
+
+Other moo methods:
+\code{\link[=batch_correct_counts]{batch_correct_counts()}},
+\code{\link[=clean_raw_counts]{clean_raw_counts()}},
+\code{\link[=diff_counts]{diff_counts()}},
+\code{\link[=filter_counts]{filter_counts()}},
+\code{\link[=filter_diff]{filter_diff()}},
+\code{\link[=normalize_counts]{normalize_counts()}},
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=run_deseq2]{run_deseq2()}},
+\code{\link[=set_color_pal]{set_color_pal()}}
+}
+\concept{PCA functions}
+\concept{moo methods}
+\concept{plotters}
+\keyword{plotters}
diff --git a/code/MOSuite/man/plot_pca.data.frame.Rd b/code/MOSuite/man/plot_pca.data.frame.Rd
new file mode 100644
index 0000000..7a02771
--- /dev/null
+++ b/code/MOSuite/man/plot_pca.data.frame.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_pca.R
+\name{plot_pca,data.frame-method}
+\alias{plot_pca,data.frame-method}
+\alias{plot_pca.data.frame}
+\title{Plot 2D or 3D PCA for counts dataframe}
+\arguments{
+\item{sample_metadata}{\strong{Required} if \code{moo_counts} is a \code{data.frame}: sample metadata as a data frame or tibble.}
+}
+\description{
+Plot 2D or 3D PCA for counts dataframe
+}
+\seealso{
+\code{\link[=plot_pca]{plot_pca()}} generic
+
+Other plotters for counts dataframes:
+\code{\link{plot_corr_heatmap,data.frame-method}},
+\code{\link{plot_histogram,data.frame-method}},
+\code{\link{plot_read_depth,data.frame-method}}
+}
+\concept{plotters for counts dataframes}
diff --git a/code/MOSuite/man/plot_pca.multiOmicDataSet.Rd b/code/MOSuite/man/plot_pca.multiOmicDataSet.Rd
new file mode 100644
index 0000000..f8206d7
--- /dev/null
+++ b/code/MOSuite/man/plot_pca.multiOmicDataSet.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_pca.R
+\name{plot_pca,MOSuite::multiOmicDataSet-method}
+\alias{plot_pca,MOSuite::multiOmicDataSet-method}
+\alias{plot_pca.multiOmicDataSet}
+\title{Plot 2D or 3D PCA for multiOmicDataset}
+\arguments{
+\item{count_type}{the type of counts to use. Must be a name in the counts slot (\code{names(moo@counts)}).}
+
+\item{sub_count_type}{used if \code{count_type} is a list in the counts slot: specify the sub count type within the list.
+Must be a name in \code{names(moo@counts[[count_type]])}.}
+}
+\value{
+PCA plot
+}
+\description{
+Plot 2D or 3D PCA for multiOmicDataset
+}
+\seealso{
+\code{\link[=plot_pca]{plot_pca()}} generic
+
+Other plotters for multiOmicDataSets:
+\code{\link{plot_corr_heatmap,MOSuite::multiOmicDataSet-method}},
+\code{\link{plot_histogram,MOSuite::multiOmicDataSet-method}},
+\code{\link{plot_read_depth,MOSuite::multiOmicDataSet-method}}
+}
+\concept{plotters for multiOmicDataSets}
diff --git a/code/MOSuite/man/plot_pca_2d.Rd b/code/MOSuite/man/plot_pca_2d.Rd
new file mode 100644
index 0000000..244d8b2
--- /dev/null
+++ b/code/MOSuite/man/plot_pca_2d.Rd
@@ -0,0 +1,168 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_pca.R
+\name{plot_pca_2d}
+\alias{plot_pca_2d}
+\alias{plot_pca_2d,MOSuite::multiOmicDataSet-method}
+\alias{plot_pca_2d,data.frame-method}
+\title{Perform and plot a 2D Principal Components Analysis}
+\usage{
+plot_pca_2d(
+ moo_counts,
+ count_type = NULL,
+ sub_count_type = NULL,
+ sample_metadata = NULL,
+ sample_id_colname = NULL,
+ feature_id_colname = NULL,
+ group_colname = "Group",
+ label_colname = "Label",
+ samples_to_rename = NULL,
+ color_values = c("#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"),
+ principal_components = c(1, 2),
+ legend_position = "top",
+ point_size = 1,
+ add_label = TRUE,
+ label_font_size = 3,
+ label_offset_x_ = 2,
+ label_offset_y_ = 2,
+ interactive_plots = FALSE,
+ plots_subdir = "pca",
+ plot_filename = "pca_2D.png",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots")
+)
+
+## S7 method for class
+plot_pca_2d(
+ moo_counts,
+ count_type = NULL,
+ sub_count_type = NULL,
+ sample_metadata = NULL,
+ sample_id_colname = NULL,
+ feature_id_colname = NULL,
+ group_colname = "Group",
+ label_colname = "Label",
+ samples_to_rename = NULL,
+ color_values = c("#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"),
+ principal_components = c(1, 2),
+ legend_position = "top",
+ point_size = 1,
+ add_label = TRUE,
+ label_font_size = 3,
+ label_offset_x_ = 2,
+ label_offset_y_ = 2,
+ interactive_plots = FALSE,
+ plots_subdir = "pca",
+ plot_filename = "pca_2D.png",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots")
+)
+
+## S7 method for class
+plot_pca_2d(
+ moo_counts,
+ count_type = NULL,
+ sub_count_type = NULL,
+ sample_metadata = NULL,
+ sample_id_colname = NULL,
+ feature_id_colname = NULL,
+ group_colname = "Group",
+ label_colname = "Label",
+ samples_to_rename = NULL,
+ color_values = c("#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"),
+ principal_components = c(1, 2),
+ legend_position = "top",
+ point_size = 1,
+ add_label = TRUE,
+ label_font_size = 3,
+ label_offset_x_ = 2,
+ label_offset_y_ = 2,
+ interactive_plots = FALSE,
+ plots_subdir = "pca",
+ plot_filename = "pca_2D.png",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots")
+)
+}
+\arguments{
+\item{moo_counts}{counts dataframe or \code{multiOmicDataSet} containing \code{count_type} & \code{sub_count_type} in the counts
+slot}
+
+\item{count_type}{type to assign the values of \code{counts_dat} to in the \code{counts} slot}
+
+\item{sub_count_type}{used if \code{count_type} is a list in the counts slot: specify the sub count type within the list.
+Must be a name in \code{names(moo@counts[[count_type]])}.}
+
+\item{sample_metadata}{sample metadata as a data frame or tibble.}
+
+\item{sample_id_colname}{The column from the sample metadata containing the sample names. The names in this column
+must exactly match the names used as the sample column names of your input Counts Matrix. (Default: \code{NULL} - first
+column in the sample metadata will be used.)}
+
+\item{feature_id_colname}{The column from the counts dataa containing the Feature IDs (Usually Gene or Protein ID).
+This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts
+Matrix will be available to select for this parameter. (Default: \code{NULL} - first column in the counts matrix will be
+used.)}
+
+\item{group_colname}{The column from the sample metadata containing the sample group information. This is usually a
+column showing to which experimental treatments each sample belongs (e.g. WildType, Knockout, Tumor, Normal,
+Before, After, etc.).}
+
+\item{label_colname}{The column from the sample metadata containing the sample labels as you wish them to appear in
+the plots produced by this template. This can be the same Sample Names Column. However, you may desire different
+labels to display on your figure (e.g. shorter labels are sometimes preferred on plots). In that case, select the
+column with your preferred Labels here. The selected column should contain unique names for each sample. (Default:
+\code{NULL} -- \code{sample_id_colname} will be used.)}
+
+\item{samples_to_rename}{If you do not have a Plot Labels Column in your sample metadata table, you can use this
+parameter to rename samples manually for display on the PCA plot. Use "Add item" to add each additional sample for
+renaming. Use the following format to describe which old name (in your sample metadata table) you want to rename to
+which new name: old_name: new_name}
+
+\item{color_values}{vector of colors as hex values or names recognized by R}
+
+\item{principal_components}{vector with numbered principal components to plot}
+
+\item{legend_position}{passed to in \code{legend.position} \code{ggplot2::theme()}}
+
+\item{point_size}{size for \code{ggplot2::geom_point()}}
+
+\item{add_label}{whether to add text labels for the points}
+
+\item{label_font_size}{label font size for the PCA plot}
+
+\item{label_offset_x_}{label offset x for the PCA plot}
+
+\item{label_offset_y_}{label offset y for the PCA plot}
+
+\item{interactive_plots}{set to TRUE to make PCA and Histogram plots interactive with \code{plotly}, allowing you to hover
+your mouse over a point or line to view sample information. The similarity heat map will not display if this toggle
+is set to \code{TRUE}. Default is \code{FALSE}.}
+
+\item{plots_subdir}{subdirectory in \verb{figures/} where plots will be saved if \code{save_plots} is \code{TRUE}}
+
+\item{plot_filename}{plot output filename - only used if save_plots is TRUE}
+
+\item{print_plots}{Whether to print plots during analysis (Defaults to \code{FALSE}, overwritable using option 'moo_print_plots' or environment variable 'MOO_PRINT_PLOTS')}
+
+\item{save_plots}{Whether to save plots to files during analysis (Defaults to \code{TRUE}, overwritable using option 'moo_save_plots' or environment variable 'MOO_SAVE_PLOTS')}
+}
+\value{
+ggplot object
+}
+\description{
+Perform and plot a 2D Principal Components Analysis
+
+Perform and plot a 2D Principal Components Analysis
+}
+\seealso{
+\code{\link[=plot_pca]{plot_pca()}} generic
+
+Other PCA functions:
+\code{\link[=calc_pca]{calc_pca()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_pca_3d]{plot_pca_3d()}}
+}
+\concept{PCA functions}
diff --git a/code/MOSuite/man/plot_pca_3d.Rd b/code/MOSuite/man/plot_pca_3d.Rd
new file mode 100644
index 0000000..acf569d
--- /dev/null
+++ b/code/MOSuite/man/plot_pca_3d.Rd
@@ -0,0 +1,128 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_pca.R
+\name{plot_pca_3d}
+\alias{plot_pca_3d}
+\alias{plot_pca_3d,MOSuite::multiOmicDataSet-method}
+\alias{plot_pca_3d,data.frame-method}
+\title{Perform and plot a 3D Principal Components Analysis}
+\usage{
+plot_pca_3d(
+ moo_counts,
+ count_type = NULL,
+ sub_count_type = NULL,
+ sample_metadata = NULL,
+ feature_id_colname = NULL,
+ sample_id_colname = NULL,
+ samples_to_rename = NULL,
+ group_colname = "Group",
+ label_colname = "Label",
+ principal_components = c(1, 2, 3),
+ point_size = 8,
+ label_font_size = 24,
+ color_values = c("#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"),
+ plot_title = "PCA 3D",
+ plot_filename = "pca_3D.html",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "pca"
+)
+
+## S7 method for class
+plot_pca_3d(
+ moo_counts,
+ count_type = NULL,
+ sub_count_type = NULL,
+ sample_metadata = NULL,
+ feature_id_colname = NULL,
+ sample_id_colname = NULL,
+ samples_to_rename = NULL,
+ group_colname = "Group",
+ label_colname = "Label",
+ principal_components = c(1, 2, 3),
+ point_size = 8,
+ label_font_size = 24,
+ color_values = c("#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"),
+ plot_title = "PCA 3D",
+ plot_filename = "pca_3D.html",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "pca"
+)
+
+## S7 method for class
+plot_pca_3d(
+ moo_counts,
+ count_type = NULL,
+ sub_count_type = NULL,
+ sample_metadata = NULL,
+ feature_id_colname = NULL,
+ sample_id_colname = NULL,
+ samples_to_rename = NULL,
+ group_colname = "Group",
+ label_colname = "Label",
+ principal_components = c(1, 2, 3),
+ point_size = 8,
+ label_font_size = 24,
+ color_values = c("#5954d6", "#e1562c", "#b80058", "#00c6f8", "#d163e6", "#00a76c",
+ "#ff9287", "#008cf9", "#006e00", "#796880", "#FFA500", "#878500"),
+ plot_title = "PCA 3D",
+ plot_filename = "pca_3D.html",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "pca"
+)
+}
+\arguments{
+\item{moo_counts}{counts dataframe}
+
+\item{count_type}{the type of counts to use. Ignored when \code{moo_counts} is already a dataframe.}
+
+\item{sub_count_type}{used if \code{count_type} is a list in the counts slot: specify the sub count type within the list.}
+
+\item{sample_metadata}{sample metadata as a data frame or tibble.}
+
+\item{feature_id_colname}{The column from the counts data containing feature IDs. If \code{NULL}, first column is used.}
+
+\item{sample_id_colname}{The column from sample metadata containing sample names. If \code{NULL}, first column is used.}
+
+\item{samples_to_rename}{optional named mapping in \code{old_name: new_name} format for display labels.}
+
+\item{group_colname}{The column from sample metadata containing sample group information.}
+
+\item{label_colname}{The column from sample metadata containing sample labels.}
+
+\item{principal_components}{vector with numbered principal components to plot}
+
+\item{point_size}{size for \code{ggplot2::geom_point()}}
+
+\item{label_font_size}{font size used for labels in the interactive figure.}
+
+\item{color_values}{vector of colors as hex values or names recognized by R.}
+
+\item{plot_title}{title for the plot}
+
+\item{plot_filename}{output filename when saving plots.}
+
+\item{print_plots}{whether to print plot to the active graphics device.}
+
+\item{save_plots}{whether to save plot to disk.}
+
+\item{plots_subdir}{output subdirectory for saved plots.}
+}
+\value{
+\code{plotly::plot_ly} figure
+}
+\description{
+Perform and plot a 3D Principal Components Analysis
+
+3D PCA for counts dataframe
+}
+\seealso{
+Other PCA functions:
+\code{\link[=calc_pca]{calc_pca()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_pca_2d]{plot_pca_2d()}}
+}
+\concept{PCA functions}
diff --git a/code/MOSuite/man/plot_read_depth.Rd b/code/MOSuite/man/plot_read_depth.Rd
new file mode 100644
index 0000000..c6d5c1d
--- /dev/null
+++ b/code/MOSuite/man/plot_read_depth.Rd
@@ -0,0 +1,76 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_read_depth.R
+\name{plot_read_depth}
+\alias{plot_read_depth}
+\title{Plot read depth as a bar plot}
+\usage{
+plot_read_depth(moo_counts, ...)
+}
+\arguments{
+\item{moo_counts}{counts dataframe or \code{multiOmicDataSet} containing \code{count_type} & \code{sub_count_type} in the counts
+slot}
+
+\item{...}{arguments forwarded to method}
+}
+\value{
+ggplot barplot
+}
+\description{
+The first argument can be a \code{multiOmicDataset} object (\code{moo}) or a \code{data.frame} containing counts.
+For a \code{moo}, choose which counts slot to use with \code{count_type} & (optionally) \code{sub_count_type}.
+}
+\section{Methods}{
+\tabular{ll}{
+ link to docs \tab class \cr
+ \code{\link[=plot_read_depth]{plot_read_depth()}} \tab \code{multiOmicDataSet} \cr
+ \code{\link[=plot_read_depth]{plot_read_depth()}} \tab \code{data.frame} \cr
+}
+}
+
+\examples{
+# multiOmicDataSet
+moo <- multiOmicDataSet(
+ sample_metadata = nidap_sample_metadata,
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = nidap_raw_counts,
+ "clean" = nidap_clean_raw_counts
+ )
+)
+
+plot_read_depth(moo, count_type = "clean")
+
+# dataframe
+plot_read_depth(nidap_clean_raw_counts)
+
+}
+\seealso{
+\itemize{
+\item \code{\link[=plot_read_depth.multiOmicDataSet]{plot_read_depth.multiOmicDataSet()}}
+\item \code{\link[=plot_read_depth.data.frame]{plot_read_depth.data.frame()}}
+}
+
+Other plotters:
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=print_or_save_plot]{print_or_save_plot()}}
+
+Other moo methods:
+\code{\link[=batch_correct_counts]{batch_correct_counts()}},
+\code{\link[=clean_raw_counts]{clean_raw_counts()}},
+\code{\link[=diff_counts]{diff_counts()}},
+\code{\link[=filter_counts]{filter_counts()}},
+\code{\link[=filter_diff]{filter_diff()}},
+\code{\link[=normalize_counts]{normalize_counts()}},
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=run_deseq2]{run_deseq2()}},
+\code{\link[=set_color_pal]{set_color_pal()}}
+}
+\concept{moo methods}
+\concept{plotters}
+\keyword{plotters}
diff --git a/code/MOSuite/man/plot_read_depth.data.frame.Rd b/code/MOSuite/man/plot_read_depth.data.frame.Rd
new file mode 100644
index 0000000..35c2689
--- /dev/null
+++ b/code/MOSuite/man/plot_read_depth.data.frame.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_read_depth.R
+\name{plot_read_depth,data.frame-method}
+\alias{plot_read_depth,data.frame-method}
+\alias{plot_read_depth.data.frame}
+\title{Plot read depth for \code{data.frame}}
+\arguments{
+\item{...}{additional arguments (ignored; accepted for compatibility with the moo dispatch)}
+}
+\value{
+ggplot barplot
+}
+\description{
+Plot read depth for \code{data.frame}
+}
+\examples{
+# dataframe
+plot_read_depth(nidap_clean_raw_counts)
+
+}
+\seealso{
+\code{\link[=plot_read_depth]{plot_read_depth()}} generic
+
+Other plotters for counts dataframes:
+\code{\link{plot_corr_heatmap,data.frame-method}},
+\code{\link{plot_histogram,data.frame-method}},
+\code{\link{plot_pca,data.frame-method}}
+}
+\concept{plotters for counts dataframes}
diff --git a/code/MOSuite/man/plot_read_depth.multiOmicDataSet.Rd b/code/MOSuite/man/plot_read_depth.multiOmicDataSet.Rd
new file mode 100644
index 0000000..50647ec
--- /dev/null
+++ b/code/MOSuite/man/plot_read_depth.multiOmicDataSet.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_read_depth.R
+\name{plot_read_depth,MOSuite::multiOmicDataSet-method}
+\alias{plot_read_depth,MOSuite::multiOmicDataSet-method}
+\alias{plot_read_depth.multiOmicDataSet}
+\title{Plot read depth for multiOmicDataSet}
+\arguments{
+\item{count_type}{the type of counts to use. Must be a name in the counts slot (\code{names(moo@counts)}).}
+
+\item{sub_count_type}{used if \code{count_type} is a list in the counts slot: specify the sub count type within the list.
+Must be a name in \code{names(moo@counts[[count_type]])}.}
+}
+\value{
+ggplot barplot
+}
+\description{
+Plot read depth for multiOmicDataSet
+}
+\examples{
+# multiOmicDataSet
+moo <- multiOmicDataSet(
+ sample_metadata = nidap_sample_metadata,
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = nidap_raw_counts,
+ "clean" = nidap_clean_raw_counts
+ )
+)
+
+plot_read_depth(moo, count_type = "clean")
+
+}
+\seealso{
+\code{\link[=plot_read_depth]{plot_read_depth()}} generic
+
+Other plotters for multiOmicDataSets:
+\code{\link{plot_corr_heatmap,MOSuite::multiOmicDataSet-method}},
+\code{\link{plot_histogram,MOSuite::multiOmicDataSet-method}},
+\code{\link{plot_pca,MOSuite::multiOmicDataSet-method}}
+}
+\concept{plotters for multiOmicDataSets}
diff --git a/code/MOSuite/man/plot_venn_diagram.Rd b/code/MOSuite/man/plot_venn_diagram.Rd
new file mode 100644
index 0000000..658e8c1
--- /dev/null
+++ b/code/MOSuite/man/plot_venn_diagram.Rd
@@ -0,0 +1,202 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_venn_diagram.R
+\name{plot_venn_diagram}
+\alias{plot_venn_diagram}
+\alias{plot_venn_diagram,MOSuite::multiOmicDataSet-method}
+\alias{plot_venn_diagram,data.frame-method}
+\title{Plot a venn diagram, UpSet plot, or table of intersections}
+\usage{
+plot_venn_diagram(
+ moo_diff_summary_dat,
+ feature_id_colname = NULL,
+ contrasts_colname = "Contrast",
+ select_contrasts = c(),
+ plot_type = "Venn diagram",
+ intersection_ids = c(),
+ venn_force_unique = TRUE,
+ venn_numbers_format = "raw",
+ venn_significant_digits = 2,
+ venn_fill_colors = c("darkgoldenrod2", "darkolivegreen2", "mediumpurple3",
+ "darkorange2", "lightgreen"),
+ venn_fill_transparency = 0.2,
+ venn_border_colors = "fill colors",
+ venn_font_size_for_category_names = 3,
+ venn_category_names_distance = c(),
+ venn_category_names_position = c(),
+ venn_font_size_for_counts = 6,
+ venn_outer_margin = 0,
+ intersections_order = "degree",
+ display_empty_intersections = FALSE,
+ intersection_bar_color = "steelblue4",
+ intersection_point_size = 2.2,
+ intersection_line_width = 0.7,
+ table_font_size = 0.7,
+ table_content = "all intersections",
+ graphics_device = grDevices::png,
+ dpi = 300,
+ image_width = 4000,
+ image_height = 3000,
+ plot_filename = "venn_diagram.png",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "diff"
+)
+
+## S7 method for class
+plot_venn_diagram(
+ moo_diff_summary_dat,
+ feature_id_colname = NULL,
+ contrasts_colname = "Contrast",
+ select_contrasts = c(),
+ plot_type = "Venn diagram",
+ intersection_ids = c(),
+ venn_force_unique = TRUE,
+ venn_numbers_format = "raw",
+ venn_significant_digits = 2,
+ venn_fill_colors = c("darkgoldenrod2", "darkolivegreen2", "mediumpurple3",
+ "darkorange2", "lightgreen"),
+ venn_fill_transparency = 0.2,
+ venn_border_colors = "fill colors",
+ venn_font_size_for_category_names = 3,
+ venn_category_names_distance = c(),
+ venn_category_names_position = c(),
+ venn_font_size_for_counts = 6,
+ venn_outer_margin = 0,
+ intersections_order = "degree",
+ display_empty_intersections = FALSE,
+ intersection_bar_color = "steelblue4",
+ intersection_point_size = 2.2,
+ intersection_line_width = 0.7,
+ table_font_size = 0.7,
+ table_content = "all intersections",
+ graphics_device = grDevices::png,
+ dpi = 300,
+ image_width = 4000,
+ image_height = 3000,
+ plot_filename = "venn_diagram.png",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "diff"
+)
+
+## S7 method for class
+plot_venn_diagram(
+ moo_diff_summary_dat,
+ feature_id_colname = NULL,
+ contrasts_colname = "Contrast",
+ select_contrasts = c(),
+ plot_type = "Venn diagram",
+ intersection_ids = c(),
+ venn_force_unique = TRUE,
+ venn_numbers_format = "raw",
+ venn_significant_digits = 2,
+ venn_fill_colors = c("darkgoldenrod2", "darkolivegreen2", "mediumpurple3",
+ "darkorange2", "lightgreen"),
+ venn_fill_transparency = 0.2,
+ venn_border_colors = "fill colors",
+ venn_font_size_for_category_names = 3,
+ venn_category_names_distance = c(),
+ venn_category_names_position = c(),
+ venn_font_size_for_counts = 6,
+ venn_outer_margin = 0,
+ intersections_order = "degree",
+ display_empty_intersections = FALSE,
+ intersection_bar_color = "steelblue4",
+ intersection_point_size = 2.2,
+ intersection_line_width = 0.7,
+ table_font_size = 0.7,
+ table_content = "all intersections",
+ graphics_device = grDevices::png,
+ dpi = 300,
+ image_width = 4000,
+ image_height = 3000,
+ plot_filename = "venn_diagram.png",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "diff"
+)
+}
+\arguments{
+\item{moo_diff_summary_dat}{Summarized differential expression analysis}
+
+\item{feature_id_colname}{The column from the counts data containing the Feature IDs (Usually Gene or Protein ID).
+This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts
+Matrix will be available to select for this parameter. (Default: \code{NULL} - first column in the counts matrix will be
+used.)}
+
+\item{contrasts_colname}{Name of the column in \code{moo_diff_summary_dat} that contains the contrast names (default:
+"Contrast")}
+
+\item{select_contrasts}{A vector of contrast names to select for the plot. If empty, all contrasts are used.}
+
+\item{plot_type}{Type of plot to generate: "Venn diagram" or "Intersection plot". Default: "Venn diagram"}
+
+\item{intersection_ids}{A vector of intersection IDs to select for the plot. If empty, all intersections are used.}
+
+\item{venn_force_unique}{If TRUE, forces unique elements in the Venn diagram. Default: TRUE}
+
+\item{venn_numbers_format}{Format for the numbers in the Venn diagram. Options: "raw", "percent", "raw-percent",
+"percent-raw". Default: "raw"}
+
+\item{venn_significant_digits}{Number of significant digits for the Venn diagram numbers. Default: 2}
+
+\item{venn_fill_colors}{A vector of colors to fill the Venn diagram categories. Default: c("darkgoldenrod2",
+"darkolivegreen2", "mediumpurple3", "darkorange2", "lightgreen")}
+
+\item{venn_fill_transparency}{Transparency level for the Venn diagram fill colors. Default: 0.2}
+
+\item{venn_border_colors}{Colors for the borders of the Venn diagram categories. Default: "fill colors" (uses the
+same colors as \code{venn_fill_colors})}
+
+\item{venn_font_size_for_category_names}{Font size for the category names in the Venn diagram. Default: 3}
+
+\item{venn_category_names_distance}{Distance of the category names from the Venn diagram circles. Default: c()}
+
+\item{venn_category_names_position}{Position of the category names in the Venn diagram. Default: c()}
+
+\item{venn_font_size_for_counts}{Font size for the counts in the Venn diagram. Default: 6}
+
+\item{venn_outer_margin}{Outer margin for the Venn diagram. Default: 0}
+
+\item{intersections_order}{Order of the intersections in the plot. Default: "by size"}
+
+\item{display_empty_intersections}{If TRUE, displays empty intersections in the plot. Default: FALSE}
+
+\item{intersection_bar_color}{Color for the intersection bars in the plot. Default: "lightgray"}
+
+\item{intersection_point_size}{Size of the points in the intersection plot. Default: 2}
+
+\item{intersection_line_width}{Width of the lines in the intersection plot. Default: 0.5}
+
+\item{table_font_size}{Font size for the table in the plot. Default: 3}
+
+\item{table_content}{Content of the table in the plot. Default: NULL}
+
+\item{graphics_device}{passed to \code{ggsave(device)}. Default: \code{grDevices::png}}
+
+\item{dpi}{dots-per-inch of the output image (see \code{ggsave()}) - only used if save_plots is TRUE}
+
+\item{image_width}{output image width in pixels - only used if save_plots is TRUE}
+
+\item{image_height}{output image height in pixels - only used if save_plots is TRUE}
+
+\item{plot_filename}{plot output filename - only used if save_plots is TRUE}
+
+\item{print_plots}{Whether to print plots during analysis (Defaults to \code{FALSE}, overwritable using option 'moo_print_plots' or environment variable 'MOO_PRINT_PLOTS')}
+
+\item{save_plots}{Whether to save plots to files during analysis (Defaults to \code{TRUE}, overwritable using option 'moo_save_plots' or environment variable 'MOO_SAVE_PLOTS')}
+
+\item{plots_subdir}{subdirectory in \verb{figures/} where plots will be saved if \code{save_plots} is \code{TRUE}}
+}
+\description{
+Generates Venn diagram of intersections across a series of sets (e.g., intersections of significant genes across
+tested contrasts). This Venn diagram is available for up to five sets; Intersection plot is available for any number
+of sets. Specific sets can be selected for the visualizations and the returned dataset may include all (default) or
+specified intersections.
+An S7 generic with methods for \code{multiOmicDataSet} and \code{data.frame}.
+}
+\examples{
+plot_venn_diagram(nidap_volcano_summary_dat, print_plots = TRUE)
+
+}
+\keyword{plotters}
diff --git a/code/MOSuite/man/plot_volcano_enhanced.Rd b/code/MOSuite/man/plot_volcano_enhanced.Rd
new file mode 100644
index 0000000..9b7f238
--- /dev/null
+++ b/code/MOSuite/man/plot_volcano_enhanced.Rd
@@ -0,0 +1,189 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_volcano_enhanced.R
+\name{plot_volcano_enhanced}
+\alias{plot_volcano_enhanced}
+\alias{plot_volcano_enhanced,MOSuite::multiOmicDataSet-method}
+\alias{plot_volcano_enhanced,data.frame-method}
+\title{Enhanced Volcano Plot}
+\usage{
+plot_volcano_enhanced(
+ moo_diff,
+ feature_id_colname = NULL,
+ signif_colname = c("B-A_adjpval", "B-C_adjpval"),
+ signif_threshold = 0.05,
+ change_colname = c("B-A_logFC", "B-C_logFC"),
+ change_threshold = 1,
+ value_to_sort_the_output_dataset = "p-value",
+ num_features_to_label = 30,
+ use_only_addition_labels = FALSE,
+ additional_labels = "",
+ is_red = TRUE,
+ lab_size = 4,
+ change_sig_name = "p-value",
+ change_lfc_name = "log2FC",
+ title = "Volcano Plots",
+ use_custom_lab = FALSE,
+ ylim = 0,
+ custom_xlim = "",
+ xlim_additional = 0,
+ ylim_additional = 0,
+ axis_lab_size = 24,
+ point_size = 2,
+ image_width = 3000,
+ image_height = 3000,
+ dpi = 300,
+ interactive_plots = FALSE,
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "diff",
+ plot_filename = "volcano_enhanced.png"
+)
+
+## S7 method for class
+plot_volcano_enhanced(
+ moo_diff,
+ feature_id_colname = NULL,
+ signif_colname = c("B-A_adjpval", "B-C_adjpval"),
+ signif_threshold = 0.05,
+ change_colname = c("B-A_logFC", "B-C_logFC"),
+ change_threshold = 1,
+ value_to_sort_the_output_dataset = "p-value",
+ num_features_to_label = 30,
+ use_only_addition_labels = FALSE,
+ additional_labels = "",
+ is_red = TRUE,
+ lab_size = 4,
+ change_sig_name = "p-value",
+ change_lfc_name = "log2FC",
+ title = "Volcano Plots",
+ use_custom_lab = FALSE,
+ ylim = 0,
+ custom_xlim = "",
+ xlim_additional = 0,
+ ylim_additional = 0,
+ axis_lab_size = 24,
+ point_size = 2,
+ image_width = 3000,
+ image_height = 3000,
+ dpi = 300,
+ interactive_plots = FALSE,
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "diff",
+ plot_filename = "volcano_enhanced.png"
+)
+
+## S7 method for class
+plot_volcano_enhanced(
+ moo_diff,
+ feature_id_colname = NULL,
+ signif_colname = c("B-A_adjpval", "B-C_adjpval"),
+ signif_threshold = 0.05,
+ change_colname = c("B-A_logFC", "B-C_logFC"),
+ change_threshold = 1,
+ value_to_sort_the_output_dataset = "p-value",
+ num_features_to_label = 30,
+ use_only_addition_labels = FALSE,
+ additional_labels = "",
+ is_red = TRUE,
+ lab_size = 4,
+ change_sig_name = "p-value",
+ change_lfc_name = "log2FC",
+ title = "Volcano Plots",
+ use_custom_lab = FALSE,
+ ylim = 0,
+ custom_xlim = "",
+ xlim_additional = 0,
+ ylim_additional = 0,
+ axis_lab_size = 24,
+ point_size = 2,
+ image_width = 3000,
+ image_height = 3000,
+ dpi = 300,
+ interactive_plots = FALSE,
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "diff",
+ plot_filename = "volcano_enhanced.png"
+)
+}
+\arguments{
+\item{moo_diff}{Differential expression analysis result from one or more contrasts. This must be a dataframe.}
+
+\item{feature_id_colname}{The column from the counts data containing the Feature IDs (Usually Gene or Protein ID).
+This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts
+Matrix will be available to select for this parameter. (Default: \code{NULL} - first column in the counts matrix will be
+used.)}
+
+\item{signif_colname}{column name of significance values (e.g., adjusted p-values or FDR). This column will be used
+to determine which points are considered significant in the volcano plot.}
+
+\item{signif_threshold}{Numeric value specifying the significance cutoff for p-values (i.e. filters on
+\code{signif_colname})}
+
+\item{change_colname}{column name of fold change values.}
+
+\item{change_threshold}{Numeric value specifying the fold change cutoff for significance (i.e. filters on
+\code{change_colname})}
+
+\item{value_to_sort_the_output_dataset}{How to sort the output dataset. Options are "fold-change" or "p-value".}
+
+\item{num_features_to_label}{Number of top features/genes to label in the volcano plot. Default is 30.}
+
+\item{use_only_addition_labels}{If \code{TRUE}, only the additional labels specified in \code{additional_labels} will be used
+for labeling in the volcano plot, ignoring the top features.}
+
+\item{additional_labels}{comma-separated string of feature names or IDs to include in the volcano plot.}
+
+\item{is_red}{Logical. If TRUE, highlights points in red.}
+
+\item{lab_size}{Size of the labels in the volcano plot.}
+
+\item{change_sig_name}{Name for the significance column in the plot. Default is "p-value".}
+
+\item{change_lfc_name}{Name for the fold change column in the plot. Default is "log2FC".}
+
+\item{title}{Title of the plot. Default is "Volcano Plots".}
+
+\item{use_custom_lab}{If TRUE, uses custom labels for the plot (set by \code{change_sig_name} and \code{change_lfc_name})}
+
+\item{ylim}{Y-axis limits for the plot.}
+
+\item{custom_xlim}{Custom X-axis limits for the plot.}
+
+\item{xlim_additional}{Additional space to add to the X-axis limits.}
+
+\item{ylim_additional}{Additional space to add to the Y-axis limits.}
+
+\item{axis_lab_size}{Size of the axis labels.}
+
+\item{point_size}{Size of the points in the plot.}
+
+\item{image_width}{output image width in pixels - only used if save_plots is TRUE}
+
+\item{image_height}{output image height in pixels - only used if save_plots is TRUE}
+
+\item{dpi}{dots-per-inch of the output image (see \code{ggsave()}) - only used if save_plots is TRUE}
+
+\item{interactive_plots}{set to TRUE to make PCA and Histogram plots interactive with \code{plotly}, allowing you to hover
+your mouse over a point or line to view sample information. The similarity heat map will not display if this toggle
+is set to \code{TRUE}. Default is \code{FALSE}.}
+
+\item{print_plots}{Whether to print plots during analysis (Defaults to \code{FALSE}, overwritable using option 'moo_print_plots' or environment variable 'MOO_PRINT_PLOTS')}
+
+\item{save_plots}{Whether to save plots to files during analysis (Defaults to \code{TRUE}, overwritable using option 'moo_save_plots' or environment variable 'MOO_SAVE_PLOTS')}
+
+\item{plots_subdir}{subdirectory in \verb{figures/} where plots will be saved if \code{save_plots} is \code{TRUE}}
+
+\item{plot_filename}{plot output filename - only used if save_plots is TRUE}
+}
+\description{
+Uses \href{https://bioconductor.org/packages/release/bioc/html/EnhancedVolcano.html}{Bioconductor's Enhanced Volcano Plot}.
+An S7 generic with methods for \code{multiOmicDataSet} and \code{data.frame}.
+}
+\examples{
+plot_volcano_enhanced(nidap_deg_analysis, print_plots = TRUE)
+
+}
+\keyword{plotters}
+\keyword{volcano}
diff --git a/code/MOSuite/man/plot_volcano_summary.Rd b/code/MOSuite/man/plot_volcano_summary.Rd
new file mode 100644
index 0000000..b6d410c
--- /dev/null
+++ b/code/MOSuite/man/plot_volcano_summary.Rd
@@ -0,0 +1,271 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_volcano_summary.R
+\name{plot_volcano_summary}
+\alias{plot_volcano_summary}
+\alias{plot_volcano_summary,MOSuite::multiOmicDataSet-method}
+\alias{plot_volcano_summary,data.frame-method}
+\title{Volcano Plot - Summary}
+\usage{
+plot_volcano_summary(
+ moo_diff,
+ feature_id_colname = NULL,
+ signif_colname = "pval",
+ signif_threshold = 0.05,
+ change_threshold = 1,
+ value_to_sort_the_output_dataset = "t-statistic",
+ num_features_to_label = 30,
+ add_features = FALSE,
+ label_features = FALSE,
+ custom_gene_list = "",
+ default_label_color = "black",
+ custom_label_color = "green3",
+ label_x_adj = 0.2,
+ label_y_adj = 0.2,
+ line_thickness = 0.5,
+ label_font_size = 4,
+ label_font_type = 1,
+ displace_feature_labels = FALSE,
+ custom_gene_list_special_label_displacement = "",
+ special_label_displacement_x_axis = 2,
+ special_label_displacement_y_axis = 2,
+ color_of_signif_threshold_line = "blue",
+ color_of_non_significant_features = "black",
+ color_of_logfold_change_threshold_line = "red",
+ color_of_features_meeting_only_signif_threshold = "lightgoldenrod2",
+ color_for_features_meeting_pvalue_and_foldchange_thresholds = "red",
+ flip_vplot = FALSE,
+ use_default_x_axis_limit = TRUE,
+ x_axis_limit = 5,
+ use_default_y_axis_limit = TRUE,
+ y_axis_limit = 10,
+ point_size = 2,
+ add_deg_columns = c("FC", "logFC", "tstat", "pval", "adjpval"),
+ graphics_device = grDevices::png,
+ image_width = 15,
+ image_height = 15,
+ dpi = 300,
+ use_default_grid_layout = TRUE,
+ number_of_rows_in_grid_layout = 1,
+ aspect_ratio = 0,
+ plot_filename = "volcano_summary.png",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "diff"
+)
+
+## S7 method for class
+plot_volcano_summary(
+ moo_diff,
+ feature_id_colname = NULL,
+ signif_colname = "pval",
+ signif_threshold = 0.05,
+ change_threshold = 1,
+ value_to_sort_the_output_dataset = "t-statistic",
+ num_features_to_label = 30,
+ add_features = FALSE,
+ label_features = FALSE,
+ custom_gene_list = "",
+ default_label_color = "black",
+ custom_label_color = "green3",
+ label_x_adj = 0.2,
+ label_y_adj = 0.2,
+ line_thickness = 0.5,
+ label_font_size = 4,
+ label_font_type = 1,
+ displace_feature_labels = FALSE,
+ custom_gene_list_special_label_displacement = "",
+ special_label_displacement_x_axis = 2,
+ special_label_displacement_y_axis = 2,
+ color_of_signif_threshold_line = "blue",
+ color_of_non_significant_features = "black",
+ color_of_logfold_change_threshold_line = "red",
+ color_of_features_meeting_only_signif_threshold = "lightgoldenrod2",
+ color_for_features_meeting_pvalue_and_foldchange_thresholds = "red",
+ flip_vplot = FALSE,
+ use_default_x_axis_limit = TRUE,
+ x_axis_limit = 5,
+ use_default_y_axis_limit = TRUE,
+ y_axis_limit = 10,
+ point_size = 2,
+ add_deg_columns = c("FC", "logFC", "tstat", "pval", "adjpval"),
+ graphics_device = grDevices::png,
+ image_width = 15,
+ image_height = 15,
+ dpi = 300,
+ use_default_grid_layout = TRUE,
+ number_of_rows_in_grid_layout = 1,
+ aspect_ratio = 0,
+ plot_filename = "volcano_summary.png",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "diff"
+)
+
+## S7 method for class
+plot_volcano_summary(
+ moo_diff,
+ feature_id_colname = NULL,
+ signif_colname = "pval",
+ signif_threshold = 0.05,
+ change_threshold = 1,
+ value_to_sort_the_output_dataset = "t-statistic",
+ num_features_to_label = 30,
+ add_features = FALSE,
+ label_features = FALSE,
+ custom_gene_list = "",
+ default_label_color = "black",
+ custom_label_color = "green3",
+ label_x_adj = 0.2,
+ label_y_adj = 0.2,
+ line_thickness = 0.5,
+ label_font_size = 4,
+ label_font_type = 1,
+ displace_feature_labels = FALSE,
+ custom_gene_list_special_label_displacement = "",
+ special_label_displacement_x_axis = 2,
+ special_label_displacement_y_axis = 2,
+ color_of_signif_threshold_line = "blue",
+ color_of_non_significant_features = "black",
+ color_of_logfold_change_threshold_line = "red",
+ color_of_features_meeting_only_signif_threshold = "lightgoldenrod2",
+ color_for_features_meeting_pvalue_and_foldchange_thresholds = "red",
+ flip_vplot = FALSE,
+ use_default_x_axis_limit = TRUE,
+ x_axis_limit = 5,
+ use_default_y_axis_limit = TRUE,
+ y_axis_limit = 10,
+ point_size = 2,
+ add_deg_columns = c("FC", "logFC", "tstat", "pval", "adjpval"),
+ graphics_device = grDevices::png,
+ image_width = 15,
+ image_height = 15,
+ dpi = 300,
+ use_default_grid_layout = TRUE,
+ number_of_rows_in_grid_layout = 1,
+ aspect_ratio = 0,
+ plot_filename = "volcano_summary.png",
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_subdir = "diff"
+)
+}
+\arguments{
+\item{moo_diff}{multiOmicDataSet or differential expression analysis result data frame.}
+
+\item{feature_id_colname}{The column from the counts data containing the Feature IDs (Usually Gene or Protein ID).
+This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts
+Matrix will be available to select for this parameter. (Default: \code{NULL} - first column in the counts matrix will be
+used.)}
+
+\item{signif_colname}{column name of significance values (e.g., adjusted p-values or FDR). This column will be used
+to determine which points are considered significant in the volcano plot.}
+
+\item{signif_threshold}{Numeric value specifying the significance cutoff for p-values (i.e. filters on
+\code{signif_colname})}
+
+\item{change_threshold}{Numeric value specifying the fold change cutoff for significance (i.e. filters on
+\code{change_colname})}
+
+\item{value_to_sort_the_output_dataset}{How to sort the output dataset. Options are "fold-change" or "p-value".}
+
+\item{num_features_to_label}{Number of top features/genes to label in the volcano plot. Default is 30.}
+
+\item{add_features}{Add custom_gene_list To Labels. Set TRUE when you want to label a specific set of features
+(features) in the "custom_gene_list" parameter" IN ADDITION to the number of features you set in the "Number of
+Features to Label" parameter.}
+
+\item{label_features}{Select TRUE when you want to label ONLY a specific list of features(features) given in the
+"custom_gene_list" parameter.}
+
+\item{custom_gene_list}{Provide a list of features (comma separated) to be labeled on the volcano plot. You must
+toggle one of the following ON to see these labels: "Add features" or "Label Only My Feature List".}
+
+\item{default_label_color}{Set the color for the text used to add feature (gene) name labels to points.}
+
+\item{custom_label_color}{Set the color for the specific list of features (features) provided in the "Feature List"
+parameter.}
+
+\item{label_x_adj}{adjust position of the labels on the x-axis. Default: 0.2}
+
+\item{label_y_adj}{adjust position of the labels on the y-axis. Default: 0.2}
+
+\item{line_thickness}{Set the thickness of the lines in the plot. Default: 0.5}
+
+\item{label_font_size}{Set the font size of the labels. Default: 4}
+
+\item{label_font_type}{Set the font type of the labels. Default: 1}
+
+\item{displace_feature_labels}{Set to TRUE to displace gene labels. Default: FALSE. Set TRUE if you want to displace
+the feature (gene) label for a specific set of features. Make sure to use custom x- and y- limits and give
+sufficient space for displacement; otherwise other labels than the desired ones will appear displaced.}
+
+\item{custom_gene_list_special_label_displacement}{Provide a list of features (comma separated) for which you want
+special displacement of the feature label.}
+
+\item{special_label_displacement_x_axis}{Displacement of the feature label on the x-axis. Default: 2}
+
+\item{special_label_displacement_y_axis}{Displacement of the feature label on the y-axis. Default: 2}
+
+\item{color_of_signif_threshold_line}{Color of the significance threshold line. Default: "blue"}
+
+\item{color_of_non_significant_features}{Color of the non-significant features. Default: "black"}
+
+\item{color_of_logfold_change_threshold_line}{Color of the log fold change threshold line. Default: "red"}
+
+\item{color_of_features_meeting_only_signif_threshold}{Color of the features that meet only the significance
+threshold. Default: "lightgoldenrod2"}
+
+\item{color_for_features_meeting_pvalue_and_foldchange_thresholds}{Color of the features that meet both the p-value
+and fold change thresholds. Default: "red"}
+
+\item{flip_vplot}{Set to TRUE to flip the fold change values so that the volcano plot looks like a comparison was
+B-A. Default: FALSE}
+
+\item{use_default_x_axis_limit}{Set to TRUE to use the default x-axis limit. Default: TRUE}
+
+\item{x_axis_limit}{Custom x-axis limit. Default: c(-5, 5)}
+
+\item{use_default_y_axis_limit}{Set to TRUE to use the default y-axis limit. Default: TRUE}
+
+\item{y_axis_limit}{Custom y-axis limit. Default: c(0, 10)}
+
+\item{point_size}{Size of the points in the plot. Default: 1}
+
+\item{add_deg_columns}{Add additional columns from the DEG analysis to the
+output dataset. Default: \verb{"FC", "logFC", "tstat", "pval", "adjpval"}}
+
+\item{graphics_device}{passed to \code{ggsave(device)}. Default: \code{grDevices::png}}
+
+\item{image_width}{output image width in pixels - only used if save_plots is TRUE}
+
+\item{image_height}{output image height in pixels - only used if save_plots is TRUE}
+
+\item{dpi}{dots-per-inch of the output image (see \code{ggsave()}) - only used if save_plots is TRUE}
+
+\item{use_default_grid_layout}{Set to TRUE to use the default grid layout. Default: TRUE}
+
+\item{number_of_rows_in_grid_layout}{Number of rows in the grid layout. Default: 1}
+
+\item{aspect_ratio}{Aspect ratio of the output image. Default: 4/3}
+
+\item{plot_filename}{Filename for the output plot. Default: "volcano_plot.png"}
+
+\item{print_plots}{Whether to print plots during analysis (Defaults to \code{FALSE}, overwritable using option 'moo_print_plots' or environment variable 'MOO_PRINT_PLOTS')}
+
+\item{save_plots}{Whether to save plots to files during analysis (Defaults to \code{TRUE}, overwritable using option 'moo_save_plots' or environment variable 'MOO_SAVE_PLOTS')}
+
+\item{plots_subdir}{subdirectory in \verb{figures/} where plots will be saved if \code{save_plots} is \code{TRUE}}
+}
+\description{
+Produces one volcano plot for each tested contrast in the input DEG table.
+It can be sorted by either fold change, t-statistic, or p-value. The returned dataset includes one row for each
+significant gene in each contrast, and contains columns from the DEG analysis of that contrast as well as columns
+useful to the Venn diagram template downstream.
+An S7 generic with methods for \code{multiOmicDataSet} and \code{data.frame}.
+}
+\examples{
+plot_volcano_summary(nidap_deg_analysis, print_plots = TRUE)
+
+}
+\keyword{plotters}
+\keyword{volcano}
diff --git a/code/MOSuite/man/print_or_save_plot.Rd b/code/MOSuite/man/print_or_save_plot.Rd
new file mode 100644
index 0000000..4d22ef1
--- /dev/null
+++ b/code/MOSuite/man/print_or_save_plot.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plots.R
+\name{print_or_save_plot}
+\alias{print_or_save_plot}
+\title{Print and/or save a ggplot}
+\usage{
+print_or_save_plot(
+ plot_obj,
+ filename,
+ print_plots = options::opt("print_plots"),
+ save_plots = options::opt("save_plots"),
+ plots_dir = options::opt("plots_dir"),
+ graphics_device = grDevices::png,
+ ...
+)
+}
+\arguments{
+\item{plot_obj}{plot object (e.g. ggplot, ComplexHeatmap...)}
+
+\item{filename}{name of the output file. will be joined with the \code{plots_dir} option.}
+
+\item{print_plots}{Whether to print plots during analysis (Defaults to \code{FALSE}, overwritable using option 'moo_print_plots' or environment variable 'MOO_PRINT_PLOTS')}
+
+\item{save_plots}{Whether to save plots to files during analysis (Defaults to \code{TRUE}, overwritable using option 'moo_save_plots' or environment variable 'MOO_SAVE_PLOTS')}
+
+\item{plots_dir}{Path where plots are saved when \code{moo_save_plots} is \code{TRUE} (Defaults to \code{"figures/"}, overwritable using option 'moo_plots_dir' or environment variable 'MOO_PLOTS_DIR')}
+
+\item{graphics_device}{Default: \code{grDevice::png()}. Only used if the plot is not a ggplot.}
+
+\item{...}{arguments forwarded to \code{ggplot2::ggsave()}}
+}
+\value{
+invisibly returns the path where the plot image was saved to the disk
+}
+\description{
+If \code{save_plots} is \code{TRUE}, the plot will be saved as an image to the path at
+\code{file.path(plots_dir, filename)}.
+If \code{plot_obj} is a ggplot, \code{ggplot2::ggsave()} is used to save the image.
+Otherwise, \code{graphics_device} is used (\code{grDevice::png()} by default).
+}
+\seealso{
+Other plotters:
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}}
+}
+\concept{plotters}
+\keyword{plotters}
diff --git a/code/MOSuite/man/read_multiOmicDataSet.Rd b/code/MOSuite/man/read_multiOmicDataSet.Rd
new file mode 100644
index 0000000..fdd0ef4
--- /dev/null
+++ b/code/MOSuite/man/read_multiOmicDataSet.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/0_mo-class.R
+\name{read_multiOmicDataSet}
+\alias{read_multiOmicDataSet}
+\title{Read a multiOmicDataSet from disk}
+\usage{
+read_multiOmicDataSet(filepath)
+}
+\arguments{
+\item{filepath}{Path to an RDS file produced by \code{\link[=write_multiOmicDataSet]{write_multiOmicDataSet()}}}
+}
+\value{
+\link{multiOmicDataSet}
+}
+\description{
+Read a multiOmicDataSet from disk
+}
diff --git a/code/MOSuite/man/reexports.Rd b/code/MOSuite/man/reexports.Rd
new file mode 100644
index 0000000..41acbe5
--- /dev/null
+++ b/code/MOSuite/man/reexports.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports.R
+\docType{import}
+\name{reexports}
+\alias{reexports}
+\alias{:=}
+\alias{!!}
+\alias{.data}
+\title{walrus operator}
+\keyword{internal}
+\description{
+These objects are imported from other packages. Follow the links
+below to see their documentation.
+
+\describe{
+ \item{rlang}{\code{\link[rlang::=]{:=()}}, \code{\link[rlang:!!]{!!()}}, \code{\link[rlang:.data]{.data}}}
+}}
+
diff --git a/code/MOSuite/man/remove_low_count_genes.Rd b/code/MOSuite/man/remove_low_count_genes.Rd
new file mode 100644
index 0000000..6564ecd
--- /dev/null
+++ b/code/MOSuite/man/remove_low_count_genes.Rd
@@ -0,0 +1,52 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/filter.R
+\name{remove_low_count_genes}
+\alias{remove_low_count_genes}
+\title{Remove low-count genes}
+\usage{
+remove_low_count_genes(
+ counts_dat,
+ sample_metadata,
+ feature_id_colname,
+ group_colname,
+ use_cpm_counts_to_filter = TRUE,
+ use_group_based_filtering = FALSE,
+ minimum_count_value_to_be_considered_nonzero = 8,
+ minimum_number_of_samples_with_nonzero_counts_in_total = 7,
+ minimum_number_of_samples_with_nonzero_counts_in_a_group = 3
+)
+}
+\arguments{
+\item{feature_id_colname}{The column from the counts data containing the Feature IDs (Usually Gene or Protein ID).
+This is usually the first column of your input Counts Matrix. Only columns of Text type from your input Counts
+Matrix will be available to select for this parameter. (Default: \code{NULL} - first column in the counts matrix will be
+used.)}
+
+\item{group_colname}{The column from the sample metadata containing the sample group information. This is usually a
+column showing to which experimental treatments each sample belongs (e.g. WildType, Knockout, Tumor, Normal,
+Before, After, etc.).}
+
+\item{use_cpm_counts_to_filter}{If no transformation has been been performed on counts matrix (eg Raw Counts) set to
+TRUE. If TRUE counts will be transformed to CPM and filtered based on given criteria. If gene counts matrix has
+been transformed (eg log2, CPM, FPKM or some form of Normalization) set to FALSE. If FALSE no further
+transformation will be applied and features will be filtered as is. For RNAseq data RAW counts should be
+transformed to CPM in order to properly filter.}
+
+\item{use_group_based_filtering}{If TRUE, only keeps features (e.g. genes) that have at least a certain number of
+samples with nonzero CPM counts in at least one group}
+
+\item{minimum_count_value_to_be_considered_nonzero}{Minimum count value to be considered non-zero for a sample}
+
+\item{minimum_number_of_samples_with_nonzero_counts_in_total}{Minimum number of samples (total) with non-zero counts}
+
+\item{minimum_number_of_samples_with_nonzero_counts_in_a_group}{Only keeps genes that have at least this number of
+samples with nonzero CPM counts in at least one group}
+}
+\value{
+counts matrix with low-count genes removed
+}
+\description{
+TODO this function also transforms raw counts to CPM, but that should be a separate function before this step, before
+filter_counts function()
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/rename_samples.Rd b/code/MOSuite/man/rename_samples.Rd
new file mode 100644
index 0000000..0eaeb46
--- /dev/null
+++ b/code/MOSuite/man/rename_samples.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rename.R
+\name{rename_samples}
+\alias{rename_samples}
+\title{Rename samples}
+\usage{
+rename_samples(dat, samples_to_rename_manually)
+}
+\arguments{
+\item{dat}{data frame}
+
+\item{samples_to_rename_manually}{TODO use sample metadata spreadsheet custom column. Need to document the format of
+this object.}
+}
+\value{
+data frame with samples renamed
+}
+\description{
+TODO this should happen right at the beginning of the template?
+}
+\details{
+TODO accept new names for samples in sample metadata spreadsheet
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/run_deseq2.Rd b/code/MOSuite/man/run_deseq2.Rd
new file mode 100644
index 0000000..db9ff05
--- /dev/null
+++ b/code/MOSuite/man/run_deseq2.Rd
@@ -0,0 +1,52 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/deseq2.R
+\name{run_deseq2}
+\alias{run_deseq2}
+\title{Run DESeq2 on a multiOmicDataSet}
+\usage{
+run_deseq2(moo, design, ...)
+}
+\arguments{
+\item{moo}{multiOmicDataSet object}
+
+\item{design}{model formula for experimental design. Columns must exist in \code{meta_dat}.}
+
+\item{...}{remaining variables are forwarded to \code{DESeq2::DESeq()}.}
+}
+\value{
+multiOmicDataSet object with DESeq2 slot filled
+}
+\description{
+Run DESeq2 on a multiOmicDataSet
+}
+\examples{
+\dontrun{
+moo <- create_multiOmicDataSet_from_files(
+ system.file("extdata", "sample_metadata.tsv.gz",
+ package = "MOSuite"
+ ),
+ system.file("extdata",
+ "RSEM.genes.expected_count.all_samples.txt.gz",
+ package = "MOSuite"
+ )
+) |> filter_counts()
+moo <- run_deseq2(moo, ~condition)
+}
+}
+\seealso{
+Other moo methods:
+\code{\link[=batch_correct_counts]{batch_correct_counts()}},
+\code{\link[=clean_raw_counts]{clean_raw_counts()}},
+\code{\link[=diff_counts]{diff_counts()}},
+\code{\link[=filter_counts]{filter_counts()}},
+\code{\link[=filter_diff]{filter_diff()}},
+\code{\link[=normalize_counts]{normalize_counts()}},
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=set_color_pal]{set_color_pal()}}
+}
+\concept{moo methods}
+\keyword{internal}
diff --git a/code/MOSuite/man/separate_gene_meta_columns.Rd b/code/MOSuite/man/separate_gene_meta_columns.Rd
new file mode 100644
index 0000000..5734a85
--- /dev/null
+++ b/code/MOSuite/man/separate_gene_meta_columns.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clean.R
+\name{separate_gene_meta_columns}
+\alias{separate_gene_meta_columns}
+\title{Separate gene metadata column}
+\usage{
+separate_gene_meta_columns(counts_dat, split_gene_name = TRUE)
+}
+\arguments{
+\item{counts_dat}{dataframe with raw counts data}
+
+\item{split_gene_name}{If \code{TRUE}, split the gene name column by any of these special characters: \verb{,|_-:}}
+}
+\value{
+dataframe with metadata separated
+}
+\description{
+Separate gene metadata column
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/set_color_pal.Rd b/code/MOSuite/man/set_color_pal.Rd
new file mode 100644
index 0000000..c603f0e
--- /dev/null
+++ b/code/MOSuite/man/set_color_pal.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/colors.R
+\name{set_color_pal}
+\alias{set_color_pal}
+\title{Set color palette for a single group/column}
+\usage{
+set_color_pal(moo, colname, palette_fun = grDevices::palette.colors, ...)
+}
+\arguments{
+\item{moo}{\code{multiOmicDataSet} object (see \code{create_multiOmicDataSet_from_dataframes()})}
+
+\item{colname}{group column name to set the palette for}
+
+\item{palette_fun}{Function for selecting colors. Assumed to contain \code{n} for the number of colors. Default:
+\code{grDevices::palette.colors()}}
+
+\item{...}{additional arguments forwarded to \code{palette_fun}}
+}
+\value{
+\code{moo} with colors updated at \code{moo@analyses$colors$colname}
+}
+\description{
+This allows you to set custom palettes individually for groups in the dataset
+}
+\examples{
+moo <- create_multiOmicDataSet_from_dataframes(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ counts_dat = as.data.frame(nidap_raw_counts)
+)
+moo@analyses$colors$Group
+moo <- moo |> set_color_pal("Group", palette_fun = RColorBrewer::brewer.pal, name = "Set2")
+moo@analyses$colors$Group
+
+}
+\seealso{
+Other moo methods:
+\code{\link[=batch_correct_counts]{batch_correct_counts()}},
+\code{\link[=clean_raw_counts]{clean_raw_counts()}},
+\code{\link[=diff_counts]{diff_counts()}},
+\code{\link[=filter_counts]{filter_counts()}},
+\code{\link[=filter_diff]{filter_diff()}},
+\code{\link[=normalize_counts]{normalize_counts()}},
+\code{\link[=plot_corr_heatmap]{plot_corr_heatmap()}},
+\code{\link[=plot_expr_heatmap]{plot_expr_heatmap()}},
+\code{\link[=plot_histogram]{plot_histogram()}},
+\code{\link[=plot_pca]{plot_pca()}},
+\code{\link[=plot_read_depth]{plot_read_depth()}},
+\code{\link[=run_deseq2]{run_deseq2()}}
+}
+\concept{moo methods}
diff --git a/code/MOSuite/man/setup_capsule_environment.Rd b/code/MOSuite/man/setup_capsule_environment.Rd
new file mode 100644
index 0000000..96b4acb
--- /dev/null
+++ b/code/MOSuite/man/setup_capsule_environment.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{setup_capsule_environment}
+\alias{setup_capsule_environment}
+\title{Set up capsule environment and directories}
+\usage{
+setup_capsule_environment(base_results_dir = file.path("..", "results"))
+}
+\arguments{
+\item{base_results_dir}{base path to results directory (default: \code{../results})}
+}
+\value{
+invisibly returns a list with \code{results_dir} and \code{plots_dir} paths
+}
+\description{
+Initializes the results directory structure and logs installed R package versions.
+This is a common setup task used across all Code Ocean capsules.
+}
+\examples{
+\dontrun{
+setup_capsule_environment()
+}
+
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/strip_ensembl_version.Rd b/code/MOSuite/man/strip_ensembl_version.Rd
new file mode 100644
index 0000000..4e5e568
--- /dev/null
+++ b/code/MOSuite/man/strip_ensembl_version.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clean.R
+\name{strip_ensembl_version}
+\alias{strip_ensembl_version}
+\title{Remove version number from ENSEMBLE IDs}
+\usage{
+strip_ensembl_version(x)
+}
+\arguments{
+\item{x}{vector of IDs}
+}
+\value{
+IDs without version numbers
+}
+\description{
+Remove version number from ENSEMBLE IDs
+}
+\keyword{internal}
diff --git a/code/MOSuite/man/write_multiOmicDataSet.Rd b/code/MOSuite/man/write_multiOmicDataSet.Rd
new file mode 100644
index 0000000..83fd0dc
--- /dev/null
+++ b/code/MOSuite/man/write_multiOmicDataSet.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/0_mo-class.R
+\name{write_multiOmicDataSet}
+\alias{write_multiOmicDataSet}
+\title{Write a multiOmicDataSet to disk as an RDS file}
+\usage{
+write_multiOmicDataSet(moo, filepath = "moo.rds")
+}
+\arguments{
+\item{moo}{\link{multiOmicDataSet} object to serialize}
+
+\item{filepath}{Path to the RDS file to write (default: "moo.rds")}
+}
+\value{
+Invisibly returns \code{filepath}
+}
+\description{
+Write a multiOmicDataSet to disk as an RDS file
+}
diff --git a/code/MOSuite/man/write_multiOmicDataSet_properties.Rd b/code/MOSuite/man/write_multiOmicDataSet_properties.Rd
new file mode 100644
index 0000000..79a19e3
--- /dev/null
+++ b/code/MOSuite/man/write_multiOmicDataSet_properties.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/0_mo-class.R
+\name{write_multiOmicDataSet_properties}
+\alias{write_multiOmicDataSet_properties}
+\alias{write_multiOmicDataSet_properties,MOSuite::multiOmicDataSet-method}
+\title{Write multiOmicDataSet properties to disk as CSV files}
+\usage{
+write_multiOmicDataSet_properties(moo, output_dir = "moo")
+
+## S7 method for class
+write_multiOmicDataSet_properties(moo, output_dir = "moo")
+}
+\arguments{
+\item{moo}{\code{multiOmicDataSet} object to write properties from}
+
+\item{output_dir}{Directory where the properties will be saved (default: "moo")}
+}
+\value{
+Invisibly returns the \code{output_dir} where the files were saved
+}
+\description{
+Writes the properties of a multiOmicDataSet object to disk as separate files in output_dir.
+Properties that are data frames are saved as CSV files, while all other objects are saved as RDS files.
+}
diff --git a/code/MOSuite/tests/testthat.R b/code/MOSuite/tests/testthat.R
new file mode 100644
index 0000000..254e711
--- /dev/null
+++ b/code/MOSuite/tests/testthat.R
@@ -0,0 +1,14 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
+# * https://testthat.r-lib.org/articles/special-files.html
+
+library(testthat)
+library(MOSuite)
+
+options(moo_save_plots = FALSE)
+
+test_check("MOSuite")
diff --git a/code/MOSuite/tests/testthat/_snaps/E2E.md b/code/MOSuite/tests/testthat/_snaps/E2E.md
new file mode 100644
index 0000000..b420659
--- /dev/null
+++ b/code/MOSuite/tests/testthat/_snaps/E2E.md
@@ -0,0 +1,106 @@
+# E2E workflow succeeds for RENEE data
+
+ Code
+ moo <- filter_diff(diff_counts(normalize_counts(filter_counts(clean_raw_counts(
+ create_multiOmicDataSet_from_files(sample_meta_filepath = metadata_tsv,
+ feature_counts_filepath = gene_counts_tsv)), group_colname = "condition",
+ label_colname = "sample_id", minimum_count_value_to_be_considered_nonzero = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_total = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_a_group = 1, ), group_colname = "condition",
+ label_colname = "sample_id"), covariates_colnames = "condition",
+ contrast_colname = "condition", contrasts = c("knockout-wildtype")),
+ significance_column = "adjpval", significance_cutoff = 0.05, change_column = "logFC",
+ change_cutoff = 1, filtering_mode = "any", include_estimates = c("FC", "logFC",
+ "tstat", "pval", "adjpval"), round_estimates = TRUE,
+ rounding_decimal_for_percent_cells = 0, contrast_filter = "none", contrasts = c(),
+ groups = c(), groups_filter = "none", label_font_size = 6, label_distance = 1,
+ y_axis_expansion = 0.08, fill_colors = c("steelblue1", "whitesmoke"),
+ pie_chart_in_3d = TRUE, bar_width = 0.4, draw_bar_border = TRUE, plot_type = "bar",
+ plot_titles_fontsize = 12)
+ Message
+ Rows: 58929 Columns: 6
+ -- Column specification --------------------------------------------------------
+ Delimiter: "\t"
+ chr (2): gene_id, GeneName
+ dbl (4): KO_S3, KO_S4, WT_S1, WT_S2
+
+ i Use `spec()` to retrieve the full column specification for this data.
+ i Specify the column types or set `show_col_types = FALSE` to quiet this message.
+ Rows: 4 Columns: 2
+ -- Column specification --------------------------------------------------------
+ Delimiter: "\t"
+ chr (2): sample_id, condition
+
+ i Use `spec()` to retrieve the full column specification for this data.
+ i Specify the column types or set `show_col_types = FALSE` to quiet this message.
+ * cleaning raw counts
+ Not able to identify multiple id's in gene_id
+ Columns that can be used to aggregate gene information gene_id
+ Aggregating the counts for the same ID in different chromosome locations.
+ Column used to Aggregate duplicate IDs: gene_id
+ Number of rows before Collapse: 58929
+ no duplicated IDs in gene_id
+ * filtering clean counts
+ Number of features after filtering: 291
+ colors_for_plots NULL
+ * normalizing filt counts
+ Total number of features included: 291
+ Sample columns: KO_S3, Sample columns: KO_S4, Sample columns: WT_S1, Sample columns: WT_S2
+ * differential counts
+ Setting first column of `counts` as gene annotation.
+ Total number of genes included: 291
+ * filtering differential features
+ Total number of genes selected with adjpval < 0.05 and | logFC | ≥ 1 is sum(selgenes)
+
+# E2E workflow succeeds for NIDAP data
+
+ Code
+ moo_nidap <- filter_diff(diff_counts(batch_correct_counts(normalize_counts(
+ filter_counts(clean_raw_counts(create_multiOmicDataSet_from_dataframes(
+ sample_metadata = as.data.frame(nidap_sample_metadata), counts_dat = as.data.frame(
+ nidap_raw_counts))), group_colname = "Group"), group_colname = "Group"),
+ covariates_colname = "Group", batch_colname = "Batch", label_colname = "Label"),
+ count_type = "filt", sub_count_type = NULL, sample_id_colname = "Sample",
+ feature_id_colname = "GeneName", covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"), contrasts = c("B-A", "C-A", "B-C"),
+ input_in_log_counts = FALSE, return_mean_and_sd = TRUE,
+ voom_normalization_method = "quantile", ), significance_column = "adjpval",
+ significance_cutoff = 0.05, change_column = "logFC", change_cutoff = 1,
+ filtering_mode = "any", include_estimates = c("FC", "logFC", "tstat", "pval",
+ "adjpval"), round_estimates = TRUE, rounding_decimal_for_percent_cells = 0,
+ contrast_filter = "none", contrasts = c(), groups = c(), groups_filter = "none",
+ label_font_size = 6, label_distance = 1, y_axis_expansion = 0.08, fill_colors = c(
+ "steelblue1", "whitesmoke"), pie_chart_in_3d = TRUE, bar_width = 0.4,
+ draw_bar_border = TRUE, plot_type = "bar", plot_titles_fontsize = 12)
+ Message
+ * cleaning raw counts
+ Not able to identify multiple id's in GeneName
+ Columns that can be used to aggregate gene information GeneName
+ Aggregating the counts for the same ID in different chromosome locations.
+ Column used to Aggregate duplicate IDs: GeneName
+ Number of rows before Collapse: 43280
+ no duplicated IDs in GeneName
+ * filtering clean counts
+ Number of features after filtering: 7943
+ colors_for_plots NULL
+ * normalizing filt counts
+ Total number of features included: 7943
+ Sample columns: A1, Sample columns: A2, Sample columns: A3, Sample columns: B1, Sample columns: B2, Sample columns: B3, Sample columns: C1, Sample columns: C2, Sample columns: C3
+ * batch-correcting norm-voom counts
+ Found2batches
+ Adjusting for2covariate(s) or covariate level(s)
+ Standardizing Data across genes
+ Fitting L/S model and finding priors
+ Finding parametric adjustments
+ Adjusting the Data
+
+ The total number of features in output: 7943
+ Number of samples after batch correction: 10
+ * differential counts
+ Setting first column of `counts` as gene annotation.
+ Total number of genes included: 7942
+ Joining with `by = join_by(GeneName)`
+ Joining with `by = join_by(GeneName)`
+ * filtering differential features
+ Total number of genes selected with adjpval < 0.05 and | logFC | ≥ 1 is sum(selgenes)
+
diff --git a/code/MOSuite/tests/testthat/_snaps/cli.md b/code/MOSuite/tests/testthat/_snaps/cli.md
new file mode 100644
index 0000000..cc98210
--- /dev/null
+++ b/code/MOSuite/tests/testthat/_snaps/cli.md
@@ -0,0 +1,20 @@
+# mosuite cli
+
+ Code
+ system(command)
+
+# mosuite --help
+
+ Code
+ cli_exec("--help")
+
+---
+
+ Code
+ system(paste(system.file("exec", "mosuite", package = "MOSuite"), "--help"))
+
+---
+
+ Code
+ cli_exec("help")
+
diff --git a/code/MOSuite/tests/testthat/_snaps/plot_venn_diagram.md b/code/MOSuite/tests/testthat/_snaps/plot_venn_diagram.md
new file mode 100644
index 0000000..b6e1eb0
--- /dev/null
+++ b/code/MOSuite/tests/testthat/_snaps/plot_venn_diagram.md
@@ -0,0 +1,9 @@
+# plot_venn_diagram works with defaults
+
+ Code
+ p <- plot_venn_diagram(nidap_volcano_summary_dat, print_plots = FALSE,
+ save_plots = TRUE)
+ Message
+ All intersections: 1:7,c(1, 2, 3, 4, 5, 6, 7),c(163, 237, 518, 780, 225, 379, 766),c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")
+ Intersections returned: 1:7,c(1, 2, 3, 4, 5, 6, 7),c(163, 237, 518, 780, 225, 379, 766)
+
diff --git a/code/MOSuite/tests/testthat/_snaps/plot_volcano_enhanced.md b/code/MOSuite/tests/testthat/_snaps/plot_volcano_enhanced.md
new file mode 100644
index 0000000..58c8804
--- /dev/null
+++ b/code/MOSuite/tests/testthat/_snaps/plot_volcano_enhanced.md
@@ -0,0 +1,15 @@
+# plot_volcano_enhanced works on nidap dataset
+
+ Code
+ df_volc_enh <- plot_volcano_enhanced(nidap_deg_analysis, save_plots = FALSE,
+ print_plots = FALSE)
+ Message
+ Genes in initial dataset: 7943
+
+ Max y: 4.60041859457819
+
+ Genes in initial dataset: 7943
+
+ Max y: 4.32577808863472
+
+
diff --git a/code/MOSuite/tests/testthat/_snaps/plot_volcano_summary.md b/code/MOSuite/tests/testthat/_snaps/plot_volcano_summary.md
new file mode 100644
index 0000000..7b248c1
--- /dev/null
+++ b/code/MOSuite/tests/testthat/_snaps/plot_volcano_summary.md
@@ -0,0 +1,19 @@
+# plot_volcano_summary works on nidap dataset
+
+ Code
+ df_volc_sum <- plot_volcano_summary(nidap_deg_analysis, save_plots = FALSE,
+ print_plots = FALSE)
+ Message
+ Preparing table for contrast: B-A
+ Fold change column: B-A_logFC
+ pval column: B-A_pval
+ Total number of features included in volcano plot: 7943
+ Preparing table for contrast: C-A
+ Fold change column: C-A_logFC
+ pval column: C-A_pval
+ Total number of features included in volcano plot: 7943
+ Preparing table for contrast: B-C
+ Fold change column: B-C_logFC
+ pval column: B-C_pval
+ Total number of features included in volcano plot: 7943
+
diff --git a/code/MOSuite/tests/testthat/data/moo.rds b/code/MOSuite/tests/testthat/data/moo.rds
new file mode 100644
index 0000000..4545d90
Binary files /dev/null and b/code/MOSuite/tests/testthat/data/moo.rds differ
diff --git a/code/MOSuite/tests/testthat/data/test_mosuite_cli.sh b/code/MOSuite/tests/testthat/data/test_mosuite_cli.sh
new file mode 100644
index 0000000..d592d74
--- /dev/null
+++ b/code/MOSuite/tests/testthat/data/test_mosuite_cli.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# set MOSuite options for plots
+export MOO_SAVE_PLOTS=TRUE
+export MOO_PLOTS_DIR=tests/testthat/data/figures
+mkdir -p $MOO_PLOTS_DIR
+
+# add mosuite executable to the path
+mosuite=$(R -s -e "cat(system.file('exec','mosuite', package='MOSuite'))")
+export PATH="$PATH:$(dirname $mosuite)"
+
+mosuite create_multiOmicDataSet_from_files --json=tests/testthat/data/create_multiOmicDataSet_from_files.json
+mosuite clean_raw_counts --json=tests/testthat/data/clean_raw_counts.json
+mosuite filter_counts --json=tests/testthat/data/filter_counts.json
+mosuite normalize_counts --json=tests/testthat/data/normalize_counts.json
+mosuite batch_correct_counts --json=tests/testthat/data/batch_correct_counts.json
+mosuite diff_counts --json=tests/testthat/data/diff_counts.json
+mosuite filter_diff --json=tests/testthat/data/filter_diff.json
diff --git a/code/MOSuite/tests/testthat/helper-functions.R b/code/MOSuite/tests/testthat/helper-functions.R
new file mode 100644
index 0000000..cb655a1
--- /dev/null
+++ b/code/MOSuite/tests/testthat/helper-functions.R
@@ -0,0 +1,59 @@
+equal_dfs <- function(x, y) {
+ return(all(
+ class(x) == class(y),
+ names(x) == names(y),
+ rownames(x) == rownames(y),
+ all.equal(x, y),
+ all.equal(lapply(x, class), lapply(y, class))
+ ))
+}
+
+# source https://stackoverflow.com/a/75232781/5787827
+compare_proxy.plotly <- function(x, path = "x") {
+ names(x$x$visdat) <- "proxy"
+ e <- environment(x$x$visdat$proxy)
+
+ # Maybe we should follow the recursion, but not now.
+ e$p <- NULL
+
+ e$id <- "proxy"
+
+ x$x$cur_data <- "proxy"
+ names(x$x$attrs) <- "proxy"
+
+ return(list(object = x, path = paste0("compare_proxy(", path, ")")))
+}
+
+run_function_cli <- function(func_name) {
+ json_path <- paste0(
+ func_name,
+ ".json"
+ )
+
+ return(cli_exec(c(
+ func_name,
+ paste0('--json="', json_path, '"')
+ )))
+}
+
+# source: https://github.com/r-lib/testthat/issues/664#issuecomment-340809997
+create_empty_dir <- function(x) {
+ unlink(x, recursive = TRUE, force = TRUE)
+ return(dir.create(x))
+}
+
+# source: https://github.com/r-lib/testthat/issues/664#issuecomment-340809997
+test_with_dir <- function(desc, ...) {
+ new <- tempfile()
+ create_empty_dir(new)
+ withr::with_dir(
+ # or local_dir()
+ new = new,
+ code = {
+ capture.output(
+ testthat::test_that(desc = desc, ...) # nolint: object_usage_linter
+ )
+ }
+ )
+ return(invisible())
+}
diff --git a/code/MOSuite/tests/testthat/test-0_mo-class.R b/code/MOSuite/tests/testthat/test-0_mo-class.R
new file mode 100644
index 0000000..2bd946d
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-0_mo-class.R
@@ -0,0 +1,451 @@
+test_that("constructing MOO works for RENEE data", {
+ moo <- create_multiOmicDataSet_from_files(
+ system.file("extdata", "sample_metadata.tsv.gz", package = "MOSuite"),
+ system.file(
+ "extdata",
+ "RSEM.genes.expected_count.all_samples.txt.gz",
+ package = "MOSuite"
+ ),
+ sample_id_colname = "sample_id",
+ feature_id_colname = "gene_id"
+ )
+ expect_equal(
+ moo@sample_meta,
+ structure(
+ list(
+ sample_id = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"),
+ condition = c("knockout", "knockout", "wildtype", "wildtype")
+ ),
+ row.names = c(NA, -4L),
+ class = c("tbl_df", "tbl", "data.frame")
+ )
+ )
+ expect_equal(
+ moo@annotation |> head(),
+ structure(
+ list(
+ gene_id = c(
+ "ENSG00000121410.11",
+ "ENSG00000268895.5",
+ "ENSG00000148584.15",
+ "ENSG00000175899.14",
+ "ENSG00000245105.3",
+ "ENSG00000166535.20"
+ ),
+ GeneName = c("A1BG", "A1BG-AS1", "A1CF", "A2M", "A2M-AS1", "A2ML1")
+ ),
+ row.names = c(NA, -6L),
+ class = c("tbl_df", "tbl", "data.frame")
+ )
+ )
+ expect_equal(
+ moo@counts$raw |> head(),
+ structure(
+ list(
+ gene_id = c(
+ "ENSG00000121410.11",
+ "ENSG00000268895.5",
+ "ENSG00000148584.15",
+ "ENSG00000175899.14",
+ "ENSG00000245105.3",
+ "ENSG00000166535.20"
+ ),
+ KO_S3 = c(0, 0, 0, 0, 0, 0),
+ KO_S4 = c(0, 0, 0, 0, 0, 0),
+ WT_S1 = c(0, 0, 0, 0, 0, 0),
+ WT_S2 = c(0, 0, 0, 0, 0, 0)
+ ),
+ row.names = c(NA, -6L),
+ class = c("tbl_df", "tbl", "data.frame")
+ )
+ )
+})
+
+test_that("constructing MOO works from CSV files", {
+ moo <- create_multiOmicDataSet_from_files(
+ system.file(
+ "extdata",
+ "nidap",
+ "Sample_Metadata_Bulk_RNA-seq_Training_Dataset_CCBR.csv.gz",
+ package = "MOSuite"
+ ),
+ system.file("extdata", "nidap", "Raw_Counts.csv.gz", package = "MOSuite"),
+ delim = ","
+ )
+ expect_equal(
+ moo@sample_meta,
+ structure(
+ list(
+ Sample = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3"),
+ Group = c("A", "A", "A", "B", "B", "B", "C", "C", "C"),
+ Replicate = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
+ Batch = c(1, 2, 2, 1, 1, 2, 1, 2, 2),
+ Label = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3")
+ ),
+ row.names = c(NA, -9L),
+ class = c("tbl_df", "tbl", "data.frame")
+ )
+ )
+ expect_equal(
+ moo@annotation |> head(),
+ structure(
+ list(
+ GeneName = c(
+ "RP23-271O17.1",
+ "Gm26206",
+ "Xkr4",
+ "RP23-317L18.1",
+ "RP23-317L18.4",
+ "RP23-317L18.3"
+ )
+ ),
+ row.names = c(NA, -6L),
+ class = c("tbl_df", "tbl", "data.frame")
+ )
+ )
+ expect_equal(
+ moo@counts$raw |> head(),
+ structure(
+ list(
+ GeneName = c(
+ "RP23-271O17.1",
+ "Gm26206",
+ "Xkr4",
+ "RP23-317L18.1",
+ "RP23-317L18.4",
+ "RP23-317L18.3"
+ ),
+ A1 = c(0, 0, 0, 0, 0, 0),
+ A2 = c(0, 0, 0, 0, 0, 0),
+ A3 = c(0, 0, 0, 0, 0, 0),
+ B1 = c(0, 0, 0, 0, 0, 0),
+ B2 = c(0, 0, 0, 0, 0, 0),
+ B3 = c(0, 0, 0, 0, 0, 0),
+ C1 = c(0, 0, 0, 0, 0, 0),
+ C2 = c(0, 0, 0, 0, 0, 0),
+ C3 = c(0, 0, 0, 0, 0, 0)
+ ),
+ row.names = c(NA, -6L),
+ class = c("tbl_df", "tbl", "data.frame")
+ )
+ )
+})
+
+test_that("annotation minimally contains feature id column", {
+ moo <- create_multiOmicDataSet_from_dataframes(
+ readr::read_tsv(
+ system.file("extdata", "sample_metadata.tsv.gz", package = "MOSuite")
+ ),
+ gene_counts |> glue_gene_symbols()
+ )
+ expect_equal(
+ moo@annotation |> head(),
+ structure(
+ list(
+ gene_id = structure(
+ c(
+ "ENSG00000121410.11|A1BG",
+ "ENSG00000268895.5|A1BG-AS1",
+ "ENSG00000148584.15|A1CF",
+ "ENSG00000175899.14|A2M",
+ "ENSG00000245105.3|A2M-AS1",
+ "ENSG00000166535.20|A2ML1"
+ ),
+ class = c("glue", "character")
+ )
+ ),
+ row.names = c(NA, -6L),
+ class = c("tbl_df", "tbl", "data.frame")
+ )
+ )
+})
+
+test_that("multiOmicDataSet from data frames detect problems", {
+ sample_meta <- data.frame(
+ sample_id = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"),
+ condition = factor(
+ c("knockout", "knockout", "wildtype", "wildtype"),
+ levels = c("wildtype", "knockout")
+ )
+ )
+ expect_error(
+ create_multiOmicDataSet_from_dataframes(sample_meta, gene_counts[, 1:4]),
+ "Not all sample IDs in the sample metadata are in the count data"
+ )
+})
+
+test_that("extract_counts works", {
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts),
+ "norm" = list("voom" = as.data.frame(nidap_norm_counts))
+ )
+ )
+ expect_equal(extract_counts(moo, "clean"), moo@counts$clean)
+ expect_equal(extract_counts(moo, "norm", "voom"), moo@counts$norm$voom)
+ expect_error(extract_counts(moo, "notacounttype"), "not in moo")
+ expect_error(
+ extract_counts(moo, "raw", "notasubtype"),
+ "does not contain subtypes"
+ )
+ expect_error(extract_counts(moo, "norm"), "contains subtypes")
+})
+
+
+test_that("write_multiOmicDataSet_properties works", {
+ moo_nidap <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts),
+ "norm" = list("voom" = as.data.frame(nidap_norm_counts))
+ )
+ ) |>
+ diff_counts(
+ count_type = "filt",
+ sub_count_type = NULL,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"),
+ contrasts = c("B-A", "C-A", "B-C"),
+ voom_normalization_method = "quantile",
+ ) |>
+ filter_diff(
+ significance_column = "adjpval",
+ significance_cutoff = 0.05,
+ change_column = "logFC",
+ change_cutoff = 1,
+ filtering_mode = "any",
+ include_estimates = c("FC", "logFC", "tstat", "pval", "adjpval"),
+ round_estimates = TRUE,
+ rounding_decimal_for_percent_cells = 0,
+ contrast_filter = "none",
+ contrasts = c(),
+ groups = c(),
+ groups_filter = "none",
+ label_font_size = 6,
+ label_distance = 1,
+ y_axis_expansion = 0.08,
+ fill_colors = c("steelblue1", "whitesmoke"),
+ pie_chart_in_3d = TRUE,
+ bar_width = 0.4,
+ draw_bar_border = TRUE,
+ plot_type = "bar",
+ plot_titles_fontsize = 12
+ )
+ moo_nidap@analyses$foo <- "bar"
+
+ temp_dir <- tempfile(pattern = "moo-write-")
+ on.exit(unlink(temp_dir, recursive = TRUE), add = TRUE)
+
+ expect_equal(write_multiOmicDataSet_properties(moo_nidap, temp_dir), temp_dir)
+
+ expect_true(file.exists(file.path(temp_dir, "sample_metadata.csv")))
+ expect_true(file.exists(file.path(temp_dir, "feature_annotation.csv")))
+
+ expect_true(file.exists(file.path(temp_dir, "counts", "raw_counts.csv")))
+ expect_true(file.exists(file.path(temp_dir, "counts", "clean_counts.csv")))
+ expect_true(file.exists(file.path(temp_dir, "counts", "filt_counts.csv")))
+ expect_true(
+ file.exists(file.path(temp_dir, "counts", "norm", "voom_counts.csv"))
+ )
+
+ expect_true(file.exists(file.path(temp_dir, "analyses", "foo.rds")))
+ expect_true(file.exists(file.path(
+ temp_dir,
+ "analyses",
+ "diff",
+ "diff_B-A.csv"
+ )))
+ expect_true(file.exists(file.path(
+ temp_dir,
+ "analyses",
+ "diff",
+ "diff_C-A.csv"
+ )))
+ expect_true(file.exists(file.path(
+ temp_dir,
+ "analyses",
+ "diff",
+ "diff_B-C.csv"
+ )))
+ expect_true(file.exists(file.path(temp_dir, "analyses", "diff_filt.csv")))
+ expect_true(dir.exists(file.path(temp_dir, "analyses", "colors")))
+ expect_true(file.exists(file.path(
+ temp_dir,
+ "analyses",
+ "colors",
+ "colors_Sample.rds"
+ )))
+ expect_true(file.exists(file.path(
+ temp_dir,
+ "analyses",
+ "colors",
+ "colors_Batch.rds"
+ )))
+ expect_true(file.exists(file.path(
+ temp_dir,
+ "analyses",
+ "colors",
+ "colors_Group.rds"
+ )))
+ expect_true(file.exists(file.path(
+ temp_dir,
+ "analyses",
+ "colors",
+ "colors_Label.rds"
+ )))
+ expect_true(file.exists(file.path(
+ temp_dir,
+ "analyses",
+ "colors",
+ "colors_Replicate.rds"
+ )))
+})
+
+test_that("write_multiOmicDataSet and read_multiOmicDataSet work", {
+ # Create a simple moo object
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(GeneName = unique(nidap_raw_counts$GeneName)),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts)
+ )
+ )
+
+ # Write to temp file
+ temp_file <- tempfile(pattern = "moo-", fileext = ".rds")
+ on.exit(unlink(temp_file), add = TRUE)
+
+ # Test write returns filepath invisibly
+ expect_equal(write_multiOmicDataSet(moo, temp_file), temp_file)
+ expect_true(file.exists(temp_file))
+
+ # Test read
+ moo_read <- read_multiOmicDataSet(temp_file)
+ expect_true(S7::S7_inherits(moo_read, multiOmicDataSet))
+
+ # Verify all properties match
+ expect_equal(moo_read@sample_meta, moo@sample_meta)
+ expect_equal(moo_read@annotation, moo@annotation)
+ expect_equal(moo_read@counts, moo@counts)
+ expect_equal(names(moo_read@analyses), names(moo@analyses))
+})
+
+test_that("write_multiOmicDataSet validates input", {
+ expect_error(
+ write_multiOmicDataSet("not a moo"),
+ "moo must be a multiOmicDataSet"
+ )
+ expect_error(
+ write_multiOmicDataSet(list(sample_meta = data.frame())),
+ "moo must be a multiOmicDataSet"
+ )
+})
+
+test_that("read_multiOmicDataSet validates input", {
+ temp_file <- tempfile(fileext = ".rds")
+ on.exit(unlink(temp_file), add = TRUE)
+
+ # Write a non-moo object
+ readr::write_rds(list(a = 1, b = 2), temp_file)
+
+ expect_error(
+ read_multiOmicDataSet(temp_file),
+ "RDS does not contain a multiOmicDataSet"
+ )
+})
+
+test_that("write and read preserves complex moo with analyses", {
+ moo_complex <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts),
+ "norm" = list("voom" = as.data.frame(nidap_norm_counts))
+ )
+ )
+
+ temp_file <- tempfile(pattern = "moo-complex-", fileext = ".rds")
+ on.exit(unlink(temp_file), add = TRUE)
+
+ write_multiOmicDataSet(moo_complex, temp_file)
+ moo_restored <- read_multiOmicDataSet(temp_file)
+
+ expect_equal(moo_restored@sample_meta, moo_complex@sample_meta)
+ expect_equal(moo_restored@annotation, moo_complex@annotation)
+ expect_equal(moo_restored@counts$raw, moo_complex@counts$raw)
+ expect_equal(moo_restored@counts$clean, moo_complex@counts$clean)
+ expect_equal(moo_restored@counts$filt, moo_complex@counts$filt)
+ expect_equal(moo_restored@counts$norm$voom, moo_complex@counts$norm$voom)
+ expect_equal(
+ names(moo_restored@analyses$colors),
+ names(moo_complex@analyses$colors)
+ )
+})
+
+test_that("validator returns character vector for invalid objects", {
+ # Test validator with invalid count type
+ expect_error(
+ multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "invalid_type" = as.data.frame(nidap_clean_raw_counts)
+ )
+ ),
+ "@counts can only contain these names"
+ )
+
+ # Test validator with missing raw counts
+ expect_error(
+ multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "clean" = as.data.frame(nidap_clean_raw_counts)
+ )
+ ),
+ "@counts must contain at least 'raw' counts"
+ )
+
+ # Test validator with mismatched sample IDs
+ mismatched_counts <- as.data.frame(nidap_raw_counts)
+ colnames(mismatched_counts)[2] <- "WRONG_SAMPLE_ID"
+ expect_error(
+ multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = mismatched_counts
+ )
+ ),
+ "@sample_meta"
+ )
+})
+
+test_that("validator returns NULL for valid objects", {
+ # Create a valid moo object
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts)
+ )
+ )
+ # If the validator returned errors, the object wouldn't have been created
+ # So we just check that the object exists and is the correct type
+ expect_true(S7::S7_inherits(moo, multiOmicDataSet))
+})
diff --git a/code/MOSuite/tests/testthat/test-E2E.R b/code/MOSuite/tests/testthat/test-E2E.R
new file mode 100644
index 0000000..d3080e5
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-E2E.R
@@ -0,0 +1,113 @@
+test_that("E2E workflow succeeds for RENEE data", {
+ options(moo_print_plots = FALSE, moo_save_plots = FALSE)
+ gene_counts_tsv <- system.file(
+ "extdata",
+ "RSEM.genes.expected_count.all_samples.txt.gz",
+ package = "MOSuite"
+ )
+ metadata_tsv <- system.file(
+ "extdata",
+ "sample_metadata.tsv.gz",
+ package = "MOSuite"
+ )
+
+ expect_snapshot(
+ moo <- create_multiOmicDataSet_from_files(
+ sample_meta_filepath = metadata_tsv,
+ feature_counts_filepath = gene_counts_tsv
+ ) |>
+ clean_raw_counts() |>
+ filter_counts(
+ group_colname = "condition",
+ label_colname = "sample_id",
+ minimum_count_value_to_be_considered_nonzero = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_total = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_a_group = 1,
+ ) |>
+ normalize_counts(
+ group_colname = "condition",
+ label_colname = "sample_id"
+ ) |>
+ diff_counts(
+ covariates_colnames = "condition",
+ contrast_colname = "condition",
+ contrasts = c("knockout-wildtype")
+ ) |>
+ filter_diff(
+ significance_column = "adjpval",
+ significance_cutoff = 0.05,
+ change_column = "logFC",
+ change_cutoff = 1,
+ filtering_mode = "any",
+ include_estimates = c("FC", "logFC", "tstat", "pval", "adjpval"),
+ round_estimates = TRUE,
+ rounding_decimal_for_percent_cells = 0,
+ contrast_filter = "none",
+ contrasts = c(),
+ groups = c(),
+ groups_filter = "none",
+ label_font_size = 6,
+ label_distance = 1,
+ y_axis_expansion = 0.08,
+ fill_colors = c("steelblue1", "whitesmoke"),
+ pie_chart_in_3d = TRUE,
+ bar_width = 0.4,
+ draw_bar_border = TRUE,
+ plot_type = "bar",
+ plot_titles_fontsize = 12
+ )
+ )
+})
+
+test_that("E2E workflow succeeds for NIDAP data", {
+ options(moo_print_plots = FALSE, moo_save_plots = FALSE)
+ expect_snapshot(
+ moo_nidap <- create_multiOmicDataSet_from_dataframes(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ counts_dat = as.data.frame(nidap_raw_counts)
+ ) |>
+ clean_raw_counts() |>
+ filter_counts(group_colname = "Group") |>
+ normalize_counts(group_colname = "Group") |>
+ batch_correct_counts(
+ covariates_colname = "Group",
+ batch_colname = "Batch",
+ label_colname = "Label"
+ ) |>
+ diff_counts(
+ count_type = "filt",
+ sub_count_type = NULL,
+ sample_id_colname = "Sample",
+ feature_id_colname = "GeneName",
+ covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"),
+ contrasts = c("B-A", "C-A", "B-C"),
+ input_in_log_counts = FALSE,
+ return_mean_and_sd = TRUE,
+ voom_normalization_method = "quantile",
+ ) |>
+ filter_diff(
+ significance_column = "adjpval",
+ significance_cutoff = 0.05,
+ change_column = "logFC",
+ change_cutoff = 1,
+ filtering_mode = "any",
+ include_estimates = c("FC", "logFC", "tstat", "pval", "adjpval"),
+ round_estimates = TRUE,
+ rounding_decimal_for_percent_cells = 0,
+ contrast_filter = "none",
+ contrasts = c(),
+ groups = c(),
+ groups_filter = "none",
+ label_font_size = 6,
+ label_distance = 1,
+ y_axis_expansion = 0.08,
+ fill_colors = c("steelblue1", "whitesmoke"),
+ pie_chart_in_3d = TRUE,
+ bar_width = 0.4,
+ draw_bar_border = TRUE,
+ plot_type = "bar",
+ plot_titles_fontsize = 12
+ )
+ )
+})
diff --git a/code/MOSuite/tests/testthat/test-batch-correction.R b/code/MOSuite/tests/testthat/test-batch-correction.R
new file mode 100644
index 0000000..850b004
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-batch-correction.R
@@ -0,0 +1,64 @@
+test_that("batch_correction works for NIDAP", {
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts),
+ "norm" = list("voom" = as.data.frame(nidap_norm_counts))
+ )
+ ) |>
+ batch_correct_counts(
+ count_type = "norm",
+ sub_count_type = "voom",
+ covariates_colnames = "Group",
+ batch_colname = "Batch",
+ label_colname = "Label",
+ print_plots = TRUE
+ )
+ # TODO: getting different results than nidap_batch_corrected_counts
+ expect_true(all.equal(
+ moo@counts[["batch"]] |>
+ dplyr::arrange(desc(Gene)),
+ as.data.frame(nidap_batch_corrected_counts_2) |>
+ dplyr::arrange(desc(Gene))
+ ))
+})
+
+test_that("batch_correction warnings & errors", {
+ moo <- create_multiOmicDataSet_from_dataframes(
+ readr::read_tsv(
+ system.file("extdata", "sample_metadata.tsv.gz", package = "MOSuite")
+ ) |>
+ dplyr::mutate(batch = 1),
+ gene_counts
+ ) |>
+ clean_raw_counts() |>
+ filter_counts(
+ group_colname = "condition",
+ label_colname = "sample_id",
+ minimum_count_value_to_be_considered_nonzero = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_total = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_a_group = 1,
+ print_plots = FALSE
+ ) |>
+ normalize_counts(group_colname = "condition", label_colname = "sample_id")
+
+ expect_warning(
+ moo |>
+ batch_correct_counts(
+ covariates_colnames = "condition",
+ batch_colname = "batch"
+ ),
+ "Batch column 'batch' contains only 1 unique value"
+ )
+ expect_error(
+ moo |>
+ batch_correct_counts(
+ covariates_colnames = "batch",
+ batch_colname = "batch"
+ ),
+ "Batch column 'batch' cannot be included in covariates."
+ )
+})
diff --git a/code/MOSuite/tests/testthat/test-clean.R b/code/MOSuite/tests/testthat/test-clean.R
new file mode 100644
index 0000000..475c239
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-clean.R
@@ -0,0 +1,106 @@
+test_that("clean_raw_counts works for NIDAP data", {
+ moo_nidap <- create_multiOmicDataSet_from_dataframes(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ counts_dat = as.data.frame(nidap_raw_counts)
+ ) |>
+ clean_raw_counts(print_plots = TRUE)
+
+ actual <- moo_nidap@counts[["clean"]] |>
+ dplyr::rename(Gene = GeneName) |>
+ as.data.frame()
+
+ expected <- as.data.frame(nidap_clean_raw_counts)
+
+ cmp <- all.equal(actual, expected, check.attributes = FALSE)
+ expect_true(isTRUE(cmp), info = paste(cmp, collapse = "\n"))
+})
+
+test_that("clean_raw_counts works for RENEE data", {
+ moo <- create_multiOmicDataSet_from_dataframes(
+ readr::read_tsv(
+ system.file("extdata", "sample_metadata.tsv.gz", package = "MOSuite")
+ ),
+ gene_counts
+ ) |>
+ clean_raw_counts()
+ expect_equal(
+ head(moo@counts$clean),
+ structure(
+ list(
+ gene_id = c(
+ "ENSG00000121410.11",
+ "ENSG00000268895.5",
+ "ENSG00000148584.15",
+ "ENSG00000175899.14",
+ "ENSG00000245105.3",
+ "ENSG00000166535.20"
+ ),
+ KO_S3 = c(0, 0, 0, 0, 0, 0),
+ KO_S4 = c(0, 0, 0, 0, 0, 0),
+ WT_S1 = c(0, 0, 0, 0, 0, 0),
+ WT_S2 = c(0, 0, 0, 0, 0, 0)
+ ),
+ row.names = c(NA, 6L),
+ class = "data.frame"
+ )
+ )
+ expect_equal(
+ tail(moo@counts$clean),
+ structure(
+ list(
+ gene_id = c(
+ "ENSG00000232242.2",
+ "ENSG00000162378.13",
+ "ENSG00000159840.16",
+ "ENSG00000274572.1",
+ "ENSG00000074755.15",
+ "ENSG00000272920.1"
+ ),
+ KO_S3 = c(0, 0, 0, 0, 0, 0),
+ KO_S4 = c(0, 0, 0, 0, 0, 0),
+ WT_S1 = c(0, 0, 0, 0, 0, 0),
+ WT_S2 = c(0, 0, 0, 0, 0, 0)
+ ),
+ row.names = 58924:58929,
+ class = "data.frame"
+ )
+ )
+})
+
+test_that("aggregate_duplicate_gene_names returns collapsed dfout", {
+ counts_dat <- data.frame(
+ gene_id = c("A", "A", "B"),
+ sample1 = c(1, 2, 3),
+ sample2 = c(4, 5, 6),
+ stringsAsFactors = FALSE,
+ check.names = FALSE
+ )
+
+ # Case 1: aggregation enabled
+ out <- MOSuite:::aggregate_duplicate_gene_names(
+ counts_dat = counts_dat,
+ gene_name_column_to_use_for_collapsing_duplicates = "gene_id",
+ aggregate_rows_with_duplicate_gene_names = TRUE,
+ split_gene_name = FALSE
+ )
+
+ expect_equal(nrow(out), 2)
+ expect_equal(sum(duplicated(out$gene_id)), 0)
+
+ a_row <- out[out$gene_id == "A", , drop = FALSE]
+ expect_equal(a_row$sample1, 3)
+ expect_equal(a_row$sample2, 9)
+
+ # Case 2: aggregation disabled
+ out_noagg <- MOSuite:::aggregate_duplicate_gene_names(
+ counts_dat = counts_dat,
+ gene_name_column_to_use_for_collapsing_duplicates = "gene_id",
+ aggregate_rows_with_duplicate_gene_names = FALSE,
+ split_gene_name = FALSE
+ )
+
+ expect_equal(nrow(out_noagg), 3)
+ expect_equal(sum(duplicated(out_noagg$gene_id)), 1)
+ expect_equal(out_noagg$sample1, counts_dat$sample1)
+ expect_equal(out_noagg$sample2, counts_dat$sample2)
+})
diff --git a/code/MOSuite/tests/testthat/test-cli.R b/code/MOSuite/tests/testthat/test-cli.R
new file mode 100644
index 0000000..8d580fb
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-cli.R
@@ -0,0 +1,152 @@
+write_example_json <- function() {
+ j <- list(
+ feature_counts_filepath = system.file(
+ "extdata",
+ "RSEM.genes.expected_count.all_samples.txt.gz",
+ package = "MOSuite"
+ ),
+ sample_meta_filepath = system.file(
+ "extdata",
+ "sample_metadata.tsv.gz",
+ package = "MOSuite"
+ ),
+ moo_output_rds = "moo.rds"
+ )
+ return(jsonlite::write_json(j, "inst/extdata/example.json"))
+}
+
+test_that("mosuite cli", {
+ command <- paste0(
+ system.file("exec", "mosuite", package = "MOSuite"),
+ " create_multiOmicDataSet_from_files --json=",
+ system.file("extdata", "example.json", package = "MOSuite")
+ )
+ expect_snapshot(system(command))
+})
+
+test_that("cli_exec parses args correctly", {
+ expect_equal(cli_exec("do_math"), 3)
+ expect_equal(cli_exec(c("do_math", "--subtract", "--no-add")), -1)
+ expect_equal(cli_exec(c("do_math", "left=2", "right=3")), 5)
+})
+
+test_that("cli_exec --json --debug", {
+ expect_equal(
+ deparse(cli_exec(
+ c(
+ "create_multiOmicDataSet_from_files",
+ paste0(
+ '--json="',
+ system.file("extdata", "example.json", package = "MOSuite"),
+ '"'
+ ),
+ "--debug"
+ )
+ )),
+ c(
+ paste0(
+ "MOSuite::create_multiOmicDataSet_from_files(",
+ "feature_counts_filepath = \"inst/extdata/RSEM.genes.expected_count.all_samples.txt.gz\", "
+ ),
+ " sample_meta_filepath = \"inst/extdata/sample_metadata.tsv.gz\")"
+ )
+ )
+ expect_error(
+ cli_exec(c(
+ "filter_counts",
+ paste0(
+ '--json="',
+ system.file("extdata", "example.json", package = "MOSuite"),
+ '"'
+ ),
+ "--debug"
+ )),
+ "moo_input_rds must be included"
+ )
+})
+
+test_that("mosuite --help", {
+ expect_snapshot(cli_exec("--help"))
+ expect_snapshot(system(paste(
+ system.file("exec", "mosuite", package = "MOSuite"),
+ "--help"
+ )))
+ expect_snapshot(cli_exec("help"))
+ expect_true(inherits(
+ cli_exec(c(
+ "filter_counts",
+ "--help"
+ )),
+ "help_files_with_topic"
+ ))
+ expect_error(cli_exec("not_a_function"), "not a known function")
+})
+
+test_that("cli_unknown suggests closest matching function", {
+ # Test with a typo that has a close match
+ result <- cli_unknown("filter_count", getNamespaceExports("MOSuite"))
+ expect_match(result, "filter_count is not a known function")
+ expect_match(result, "Did you mean 'filter_counts'")
+
+ # Test with another typo
+ result <- cli_unknown("batch_correct_count", getNamespaceExports("MOSuite"))
+ expect_match(result, "batch_correct_count is not a known function")
+ expect_match(result, "Did you mean 'batch_correct_counts'")
+
+ # Test with completely unrelated name (no suggestions)
+ result <- cli_unknown("xyz123", getNamespaceExports("MOSuite"))
+ expect_match(result, "xyz123 is not a known function")
+ expect_false(grepl("Did you mean", result))
+})
+
+test_that("mosuite cli E2E", {
+ new <- tempfile()
+ create_empty_dir(new)
+ # note: file paths in json files assume all files are in the current workdir
+ withr::with_dir(new = new, code = {
+ file.copy(
+ system.file("extdata", "nidap", "Raw_Counts.csv.gz", package = "MOSuite"),
+ "./"
+ )
+ file.copy(
+ system.file(
+ "extdata",
+ "nidap",
+ "Sample_Metadata_Bulk_RNA-seq_Training_Dataset_CCBR.csv.gz",
+ package = "MOSuite"
+ ),
+ "./"
+ )
+ json_paths <- system.file(
+ "extdata",
+ "json_args",
+ "common",
+ package = "MOSuite"
+ )
+ Sys.glob(glue::glue("{json_paths}/*.json")) |>
+ lapply(function(x) {
+ return(file.copy(x, "./"))
+ })
+
+ run_function_cli("create_multiOmicDataSet_from_files")
+ run_function_cli("clean_raw_counts")
+ run_function_cli("filter_counts")
+ run_function_cli("normalize_counts")
+ run_function_cli("batch_correct_counts")
+ run_function_cli("diff_counts")
+ run_function_cli("filter_diff")
+ run_function_cli("write_multiOmicDataSet_properties")
+ run_function_cli("plot_expr_heatmap")
+ # run_function_cli("plot_pca_2d")
+ # run_function_cli("plot_pca_3d")
+ # run_function_cli("plot_volcano_enhanced")
+ # run_function_cli("plot_volcano_summary")
+ # run_function_cli("plot_venn_diagram")
+
+ expect_true(file.exists(file.path("moo", "sample_metadata.csv")))
+ expect_true(file.exists("moo_diff_filter.rds"))
+ moo <- readr::read_rds("moo_diff_filter.rds")
+ expect_equal(names(moo@counts), c("raw", "clean", "filt", "norm", "batch"))
+ expect_equal(names(moo@analyses), c("colors", "diff", "diff_filt"))
+ })
+})
diff --git a/code/MOSuite/tests/testthat/test-colors.R b/code/MOSuite/tests/testthat/test-colors.R
new file mode 100644
index 0000000..3aea8b4
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-colors.R
@@ -0,0 +1,173 @@
+test_that("get_random_colors works", {
+ set.seed(10)
+ expect_equal(
+ get_random_colors(5),
+ c("#B85CD0", "#B4E16D", "#DC967D", "#A6DCC5", "#B5AAD3")
+ )
+ expect_equal(get_random_colors(3), c("#B3C4C7", "#B7D579", "#C56BC8"))
+ expect_error(get_random_colors(0), "num_colors must be at least 1")
+})
+
+test_that("get_colors_lst works on nidap_sample_metadata", {
+ expect_equal(
+ get_colors_lst(nidap_sample_metadata),
+ list(
+ Sample = c(
+ A1 = "#000000",
+ A2 = "#E69F00",
+ A3 = "#56B4E9",
+ B1 = "#009E73",
+ B2 = "#F0E442",
+ B3 = "#0072B2",
+ C1 = "#D55E00",
+ C2 = "#CC79A7",
+ C3 = "#999999"
+ ),
+ Group = c(
+ A = "#000000",
+ B = "#E69F00",
+ C = "#56B4E9"
+ ),
+ Replicate = c(
+ `1` = "#000000",
+ `2` = "#E69F00",
+ `3` = "#56B4E9"
+ ),
+ Batch = c(`1` = "#000000", `2` = "#E69F00"),
+ Label = c(
+ A1 = "#000000",
+ A2 = "#E69F00",
+ A3 = "#56B4E9",
+ B1 = "#009E73",
+ B2 = "#F0E442",
+ B3 = "#0072B2",
+ C1 = "#D55E00",
+ C2 = "#CC79A7",
+ C3 = "#999999"
+ )
+ )
+ )
+})
+test_that("get_colors_lst handles alternative palette functions", {
+ sample_meta <- system.file(
+ "extdata",
+ "sample_metadata.tsv.gz",
+ package = "MOSuite"
+ ) |>
+ readr::read_tsv()
+ expect_message(
+ expect_warning(
+ get_colors_lst(
+ sample_meta,
+ palette_fun = RColorBrewer::brewer.pal,
+ name = "Set3"
+ ),
+ "minimal value for n is 3"
+ ),
+ "Warning raised in "
+ )
+})
+test_that("get_colors_vctr falls back to random colors when n exceeds palette max", {
+ # Okabe-Ito palette has a maximum of 9 colors. When n > 9, the function
+ # should fall back to get_random_colors() and emit a message.
+ dat_many_cats <- data.frame(
+ group = paste0("cat", seq_len(12))
+ )
+ expect_no_warning(
+ expect_message(
+ result <- get_colors_vctr(dat_many_cats, "group"),
+ "exceeds the palette maximum"
+ )
+ )
+ expect_length(result, 12)
+ expect_named(result, paste0("cat", seq_len(12)))
+})
+
+test_that("set_color_pal overrides the color palette", {
+ moo <- create_multiOmicDataSet_from_dataframes(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ counts_dat = as.data.frame(nidap_raw_counts)
+ )
+ expect_equal(
+ moo@analyses$colors,
+ list(
+ Sample = c(
+ A1 = "#000000",
+ A2 = "#E69F00",
+ A3 = "#56B4E9",
+ B1 = "#009E73",
+ B2 = "#F0E442",
+ B3 = "#0072B2",
+ C1 = "#D55E00",
+ C2 = "#CC79A7",
+ C3 = "#999999"
+ ),
+ Group = c(
+ A = "#000000",
+ B = "#E69F00",
+ C = "#56B4E9"
+ ),
+ Replicate = c(
+ `1` = "#000000",
+ `2` = "#E69F00",
+ `3` = "#56B4E9"
+ ),
+ Batch = c(`1` = "#000000", `2` = "#E69F00"),
+ Label = c(
+ A1 = "#000000",
+ A2 = "#E69F00",
+ A3 = "#56B4E9",
+ B1 = "#009E73",
+ B2 = "#F0E442",
+ B3 = "#0072B2",
+ C1 = "#D55E00",
+ C2 = "#CC79A7",
+ C3 = "#999999"
+ )
+ )
+ )
+ moo2 <- moo |>
+ set_color_pal(
+ colname = "Group",
+ palette_fun = RColorBrewer::brewer.pal,
+ name = "Set2"
+ )
+ expect_equal(
+ moo2@analyses$colors,
+ list(
+ Sample = c(
+ A1 = "#000000",
+ A2 = "#E69F00",
+ A3 = "#56B4E9",
+ B1 = "#009E73",
+ B2 = "#F0E442",
+ B3 = "#0072B2",
+ C1 = "#D55E00",
+ C2 = "#CC79A7",
+ C3 = "#999999"
+ ),
+ Group = c(
+ A = "#66C2A5",
+ B = "#FC8D62",
+ C = "#8DA0CB"
+ ),
+ Replicate = c(
+ `1` = "#000000",
+ `2` = "#E69F00",
+ `3` = "#56B4E9"
+ ),
+ Batch = c(`1` = "#000000", `2` = "#E69F00"),
+ Label = c(
+ A1 = "#000000",
+ A2 = "#E69F00",
+ A3 = "#56B4E9",
+ B1 = "#009E73",
+ B2 = "#F0E442",
+ B3 = "#0072B2",
+ C1 = "#D55E00",
+ C2 = "#CC79A7",
+ C3 = "#999999"
+ )
+ )
+ )
+})
diff --git a/code/MOSuite/tests/testthat/test-counts.R b/code/MOSuite/tests/testthat/test-counts.R
new file mode 100644
index 0000000..8633706
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-counts.R
@@ -0,0 +1,106 @@
+test_that("counts_dat_to_matrix works", {
+ expect_equal(
+ gene_counts |>
+ dplyr::select(-GeneName) |>
+ head() |>
+ counts_dat_to_matrix(),
+ structure(
+ c(
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L,
+ 0L
+ ),
+ dim = c(6L, 4L),
+ dimnames = list(
+ c(
+ "ENSG00000121410.11",
+ "ENSG00000268895.5",
+ "ENSG00000148584.15",
+ "ENSG00000175899.14",
+ "ENSG00000245105.3",
+ "ENSG00000166535.20"
+ ),
+ c("KO_S3", "KO_S4", "WT_S1", "WT_S2")
+ )
+ )
+ )
+})
+
+test_that("calc_cpm works on RENEE data", {
+ sample_meta <- data.frame(
+ sample_id = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"),
+ condition = factor(
+ c("knockout", "knockout", "wildtype", "wildtype"),
+ levels = c("wildtype", "knockout")
+ )
+ )
+ moo <- create_multiOmicDataSet_from_dataframes(
+ sample_meta,
+ gene_counts |> dplyr::select(-GeneName)
+ )
+ moo <- moo |> calc_cpm()
+ cpm_edger <- gene_counts |>
+ dplyr::select(-GeneName) |>
+ counts_dat_to_matrix() |>
+ edgeR::cpm() |>
+ as.data.frame() |>
+ tibble::rownames_to_column("gene_id")
+ expect_equal(moo@counts$cpm, cpm_edger)
+})
+
+test_that("calc_cpm_df works on NIDAP data", {
+ df <- nidap_clean_raw_counts |> as.data.frame()
+ trans.df <- df
+ trans.df[, -1] <- edgeR::cpm(as.matrix(df[, -1]))
+
+ expect_equal(
+ calc_cpm_df(df, feature_id_colname = "Gene"),
+ trans.df,
+ ignore_attr = TRUE
+ )
+})
+test_that("calc_cpm_df preserves rownames", {
+ df <- nidap_clean_raw_counts |>
+ as.data.frame() |>
+ tail()
+ trans.df <- df
+ trans.df[, -1] <- edgeR::cpm(as.matrix(df[, -1]))
+
+ expect_equal(
+ calc_cpm_df(df, feature_id_colname = "Gene"),
+ trans.df,
+ ignore_attr = TRUE
+ )
+})
+
+test_that("calc_cpm_df preserves non-integer character rownames", {
+ df <- nidap_clean_raw_counts |> as.data.frame()
+ rownames(df) <- paste0("row_", seq_len(nrow(df)))
+ trans.df <- df
+ trans.df[, -1] <- edgeR::cpm(as.matrix(df[, -1]))
+
+ result <- calc_cpm_df(df, feature_id_colname = "Gene")
+ expect_equal(rownames(result), rownames(trans.df))
+ expect_equal(result, trans.df, ignore_attr = TRUE)
+})
diff --git a/code/MOSuite/tests/testthat/test-deseq2.R b/code/MOSuite/tests/testthat/test-deseq2.R
new file mode 100644
index 0000000..5d62d7f
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-deseq2.R
@@ -0,0 +1,104 @@
+set.seed(20231228)
+moo <- create_multiOmicDataSet_from_files(
+ sample_meta_filepath = system.file(
+ "extdata",
+ "sample_metadata.tsv.gz",
+ package = "MOSuite"
+ ),
+ feature_counts_filepath = system.file(
+ "extdata",
+ "RSEM.genes.expected_count.all_samples.txt.gz",
+ package = "MOSuite"
+ )
+) |>
+ suppressMessages()
+moo@sample_meta <- moo@sample_meta |>
+ dplyr::mutate(
+ condition = factor(condition, levels = c("wildtype", "knockout"))
+ )
+
+test_that("run_deseq2 works", {
+ expect_error(
+ run_deseq2(moo, design = ~condition),
+ "moo must contain filtered counts"
+ )
+
+ min_count <- 10
+ genes_above_threshold <- moo@counts$raw |>
+ tidyr::pivot_longer(
+ !tidyselect::any_of(c("gene_id", "GeneName")),
+ names_to = "sample_id",
+ values_to = "count"
+ ) |>
+ dplyr::group_by(gene_id) |>
+ dplyr::summarize(count_sum = sum(count)) |>
+ dplyr::filter(count_sum >= min_count) |>
+ dplyr::pull(gene_id)
+ moo@counts$filt <- moo@counts$raw |>
+ dplyr::filter(gene_id %in% (genes_above_threshold))
+ moo <- moo |>
+ run_deseq2(
+ moo,
+ design = ~condition,
+ fitType = "local",
+ feature_id_colname = "gene_id"
+ ) |>
+ suppressMessages()
+ dds <- moo@analyses$deseq2_ds
+
+ # check colData
+ expect_equal(
+ dds@colData |> as.data.frame(),
+ structure(
+ list(
+ sample_id = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"),
+ condition = structure(
+ c(2L, 2L, 1L, 1L),
+ levels = c("wildtype", "knockout"),
+ class = "factor"
+ ),
+ sizeFactor = c(
+ 0.739974960000608,
+ 0.717118872451827,
+ 1.34164078649987,
+ 1.69303431346171
+ )
+ ),
+ class = "data.frame",
+ row.names = c("KO_S3", "KO_S4", "WT_S1", "WT_S2")
+ )
+ )
+
+ # check some of the counts
+ expect_equal(
+ dds@assays@data@listData |>
+ as.data.frame() |>
+ dplyr::filter(counts.KO_S3 > 15),
+ structure(
+ list(
+ counts.KO_S3 = c(25L, 16L, 19L),
+ counts.KO_S4 = c(22L, 10L, 26L),
+ counts.WT_S1 = c(74L, 0L, 10L),
+ counts.WT_S2 = c(104L, 0L, 8L),
+ mu.1 = c(23.8682703018296, 13.1709993621292, 22.8847421174049),
+ mu.2 = c(23.131035523431, 12.7641781441173, 22.1778862132993),
+ mu.3 = c(78.660598086526, 0.163744933328453, 8.02129153499216),
+ mu.4 = c(99.2628526338571, 0.20663190443383, 10.1221742389411),
+ H.1 = c(0.507689382133648, 0.502052019764842, 0.503779946658291),
+ H.2 = c(0.492310572696009, 0.497947673454118, 0.496219957488281),
+ H.3 = c(0.446124908477968, 0.499997692127505, 0.459429993613616),
+ H.4 = c(0.553875066333655, 0.499997692127505, 0.540569676700414),
+ cooks.1 = c(0.0157542703049353, 0.0289351707384892, 0.0400561954057841),
+ cooks.2 = c(0.0151418002975461, 0.0286548510838955, 0.0393806789741918),
+ cooks.3 = c(0.0212216674791077, 0.130797161405866, 0.058560061396526),
+ cooks.4 = c(0.026938300726748, 0.156792088506772, 0.0711568961752663)
+ ),
+ class = "data.frame",
+ row.names = c(
+ "ENSG00000185658.13",
+ "ENSG00000233922.2",
+ "ENSG00000157601.14"
+ )
+ )
+ )
+})
diff --git a/code/MOSuite/tests/testthat/test-differential.R b/code/MOSuite/tests/testthat/test-differential.R
new file mode 100644
index 0000000..c2aedf1
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-differential.R
@@ -0,0 +1,496 @@
+moo_nidap <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts),
+ "norm" = list("voom" = as.data.frame(nidap_norm_counts))
+ )
+)
+
+test_that("differential analysis works for NIDAP", {
+ options(moo_print_plots = FALSE)
+ deg_moo <- moo_nidap |>
+ diff_counts(
+ count_type = "filt",
+ sub_count_type = NULL,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"),
+ contrasts = c("B-A", "C-A", "B-C"),
+ voom_normalization_method = "quantile",
+ )
+
+ deg_moo_wide <- deg_moo@analyses$diff |>
+ join_dfs_wide() |>
+ dplyr::arrange(Gene)
+ deg_moo_wide <- deg_moo_wide |>
+ dplyr::select(colnames(deg_moo_wide) |> sort())
+
+ nidap_wide <- nidap_deg_analysis_2 |>
+ join_dfs_wide() |>
+ dplyr::arrange(Gene)
+ nidap_wide <- nidap_wide |>
+ dplyr::select(colnames(nidap_wide) |> sort())
+
+ expect_equal(
+ deg_moo_wide,
+ nidap_wide,
+ tolerance = 0.01
+ )
+})
+
+test_that("diff_counts works for RENEE on macOS", {
+ skip_on_os("linux") # these expected values only work for macOS
+ options(moo_print_plots = FALSE)
+ moo_renee <- create_multiOmicDataSet_from_dataframes(
+ readr::read_tsv(
+ system.file("extdata", "sample_metadata.tsv.gz", package = "MOSuite")
+ ),
+ counts_dat = gene_counts
+ ) |>
+ clean_raw_counts() |>
+ filter_counts(
+ group_colname = "condition",
+ label_colname = "sample_id",
+ minimum_count_value_to_be_considered_nonzero = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_total = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_a_group = 1
+ ) |>
+ normalize_counts(group_colname = "condition", label_colname = "sample_id")
+ moo_renee <- moo_renee |>
+ diff_counts(
+ count_type = "norm",
+ sub_count_type = "voom",
+ sample_id_colname = NULL,
+ feature_id_colname = NULL,
+ covariates_colnames = c("condition"),
+ contrast_colname = c("condition"),
+ # , 'condition2'), # TODO does not currently work for more than one contrast column
+ contrasts = c("knockout-wildtype"),
+ voom_normalization_method = "TMM",
+ return_mean_and_sd = TRUE,
+ input_in_log_counts = TRUE
+ )
+ actual <- moo_renee@analyses$diff[[1]] |> head()
+ expected <- structure(
+ list(
+ gene_id = c(
+ "ENSG00000160179.18",
+ "ENSG00000258017.1",
+ "ENSG00000282393.1",
+ "ENSG00000286104.1",
+ "ENSG00000274422.1",
+ "ENSG00000154734.15"
+ ),
+ knockout_mean = c(
+ 10.9805713961628,
+ 9.00423753343925,
+ 9.00423753343925,
+ 9.00423753343925,
+ 9.00423753343925,
+ 8.60833480887895
+ ),
+ knockout_sd = c(
+ 2.1542262015539,
+ 0.640731950886978,
+ 0.479050054020298,
+ 0.640731950886978,
+ 0.479050054020298,
+ 0.0808409484333391
+ ),
+ wildtype_mean = c(
+ 12.3499548758012,
+ 8.87501967069015,
+ 8.87501967069015,
+ 8.87501967069015,
+ 8.87501967069015,
+ 14.6328231986934
+ ),
+ wildtype_sd = c(
+ 0.082485020673847,
+ 0.00393703924789543,
+ 0.00393703924789543,
+ 0.00393703924789543,
+ 0.00393703924789543,
+ 0.00393703924789669
+ ),
+ FC = c(
+ -2.54684822818721,
+ 1.11617198002536,
+ 1.07719571726849,
+ 1.11617198002536,
+ 1.07719571726849,
+ -65.0581764060579
+ ),
+ logFC = c(
+ -1.34871298907199,
+ 0.158559335064906,
+ 0.107280399074217,
+ 0.158559335064906,
+ 0.107280399074217,
+ -6.02365847879717
+ ),
+ tstat = c(
+ -1.27583132251236,
+ 0.432232276101188,
+ 0.344942879338297,
+ 0.432232276101188,
+ 0.344942879338297,
+ -32.9652734167629
+ ),
+ pval = c(
+ 0.273465120057361,
+ 0.688646721521334,
+ 0.748130864876644,
+ 0.688646721521334,
+ 0.748130864876644,
+ 7.17809796209428e-06
+ ),
+ adjpval = c(
+ 0.506868470934344,
+ 0.79745817464873,
+ 0.79745817464873,
+ 0.79745817464873,
+ 0.79745817464873,
+ 0.00102459282397239
+ )
+ ),
+ row.names = c(NA, 6L),
+ class = "data.frame"
+ )
+ expect_equal(actual, expected)
+})
+
+test_that("diff_counts behaves consistently across platforms", {
+ # these expectations do not test exact numerical values that may be inconsistent
+ # across BLAS/LAPACK implementations, but rather test structure and reasonable
+ # value ranges.
+ options(moo_print_plots = FALSE)
+ moo_renee <- create_multiOmicDataSet_from_dataframes(
+ readr::read_tsv(
+ system.file("extdata", "sample_metadata.tsv.gz", package = "MOSuite")
+ ),
+ counts_dat = gene_counts
+ ) |>
+ clean_raw_counts() |>
+ filter_counts(
+ group_colname = "condition",
+ label_colname = "sample_id",
+ minimum_count_value_to_be_considered_nonzero = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_total = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_a_group = 1
+ ) |>
+ normalize_counts(group_colname = "condition", label_colname = "sample_id")
+ moo_renee <- moo_renee |>
+ diff_counts(
+ count_type = "norm",
+ sub_count_type = "voom",
+ sample_id_colname = NULL,
+ feature_id_colname = NULL,
+ covariates_colnames = c("condition"),
+ contrast_colname = c("condition"),
+ # , 'condition2'), # TODO does not currently work for more than one contrast column
+ contrasts = c("knockout-wildtype"),
+ voom_normalization_method = "TMM",
+ return_mean_and_sd = TRUE,
+ input_in_log_counts = TRUE
+ )
+
+ # Test structure and behavior instead of exact numerical values
+ # (exact values vary across BLAS/LAPACK implementations)
+ result <- moo_renee@analyses$diff[[1]]
+
+ # Check that result is a data frame
+ expect_s3_class(result, "data.frame")
+
+ # Check expected columns are present
+ expected_cols <- c(
+ "gene_id",
+ "knockout_mean",
+ "knockout_sd",
+ "wildtype_mean",
+ "wildtype_sd",
+ "FC",
+ "logFC",
+ "tstat",
+ "pval",
+ "adjpval"
+ )
+ expect_true(all(expected_cols %in% names(result)))
+
+ # Check data types
+ expect_type(result$gene_id, "character")
+ expect_type(result$knockout_mean, "double")
+ expect_type(result$knockout_sd, "double")
+ expect_type(result$wildtype_mean, "double")
+ expect_type(result$wildtype_sd, "double")
+ expect_type(result$FC, "double")
+ expect_type(result$logFC, "double")
+ expect_type(result$tstat, "double")
+ expect_type(result$pval, "double")
+ expect_type(result$adjpval, "double")
+
+ # Check reasonable value ranges
+ expect_true(all(result$pval >= 0 & result$pval <= 1))
+ expect_true(all(result$adjpval >= 0 & result$adjpval <= 1))
+ expect_true(all(result$knockout_sd >= 0))
+ expect_true(all(result$wildtype_sd >= 0))
+ expect_true(all(is.finite(result$knockout_mean)))
+ expect_true(all(is.finite(result$wildtype_mean)))
+
+ # Check that specific genes are present in expected order
+ expect_equal(
+ head(result$gene_id, 6),
+ c(
+ "ENSG00000160179.18",
+ "ENSG00000258017.1",
+ "ENSG00000282393.1",
+ "ENSG00000286104.1",
+ "ENSG00000274422.1",
+ "ENSG00000154734.15"
+ )
+ )
+
+ # Check that the most significant gene (row 6) has small p-value
+ expect_true(result$pval[6] < 0.001)
+ expect_true(result$adjpval[6] < 0.01)
+})
+
+test_that("diff_counts errors", {
+ expect_error(
+ moo_nidap |> diff_counts(count_type = "DoesNotExist"),
+ "count_type DoesNotExist not in"
+ )
+ expect_error(
+ moo_nidap |>
+ diff_counts(count_type = "raw", sub_count_type = "DoesNotExist"),
+ "raw counts is not a named list"
+ )
+ expect_error(
+ moo_nidap |>
+ diff_counts(count_type = "norm", sub_count_type = "DoesNotExist"),
+ "sub_count_type DoesNotExist is not in"
+ )
+})
+
+test_that("filter_diff works for NIDAP on macOS", {
+ skip_on_os("linux")
+ options(moo_print_plots = FALSE)
+ moo <- moo_nidap |>
+ diff_counts(
+ count_type = "filt",
+ sub_count_type = NULL,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"),
+ contrasts = c("B-A", "C-A", "B-C"),
+ voom_normalization_method = "quantile",
+ ) |>
+ filter_diff(
+ significance_column = "adjpval",
+ significance_cutoff = 0.05,
+ change_column = "logFC",
+ change_cutoff = 1,
+ filtering_mode = "any",
+ include_estimates = c("FC", "logFC", "tstat", "pval", "adjpval"),
+ round_estimates = TRUE,
+ rounding_decimal_for_percent_cells = 0,
+ contrast_filter = "none",
+ contrasts = c(),
+ groups = c(),
+ groups_filter = "none",
+ label_font_size = 6,
+ label_distance = 1,
+ y_axis_expansion = 0.08,
+ fill_colors = c("steelblue1", "whitesmoke"),
+ pie_chart_in_3d = TRUE,
+ bar_width = 0.4,
+ draw_bar_border = TRUE,
+ plot_type = "bar",
+ plot_titles_fontsize = 12
+ )
+ expect_equal(moo@analyses$diff_filt, nidap_deg_gene_list)
+})
+
+test_that("filter_diff works for NIDAP on linux", {
+ skip_on_os("mac")
+ options(moo_print_plots = FALSE)
+ moo <- moo_nidap |>
+ diff_counts(
+ count_type = "filt",
+ sub_count_type = NULL,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"),
+ contrasts = c("B-A", "C-A", "B-C"),
+ voom_normalization_method = "quantile",
+ ) |>
+ filter_diff(
+ significance_column = "adjpval",
+ significance_cutoff = 0.05,
+ change_column = "logFC",
+ change_cutoff = 1,
+ filtering_mode = "any",
+ include_estimates = c("FC", "logFC", "tstat", "pval", "adjpval"),
+ round_estimates = TRUE,
+ rounding_decimal_for_percent_cells = 0,
+ contrast_filter = "none",
+ contrasts = c(),
+ groups = c(),
+ groups_filter = "none",
+ label_font_size = 6,
+ label_distance = 1,
+ y_axis_expansion = 0.08,
+ fill_colors = c("steelblue1", "whitesmoke"),
+ pie_chart_in_3d = TRUE,
+ bar_width = 0.4,
+ draw_bar_border = TRUE,
+ plot_type = "bar",
+ plot_titles_fontsize = 12
+ )
+ expected_head <- structure(
+ list(
+ Gene = c(
+ "1110034G24Rik",
+ "3110082I17Rik",
+ "4632428N05Rik",
+ "4833439L19Rik",
+ "4930523C07Rik",
+ "5430427O19Rik"
+ ),
+ `B-A_FC` = c(21.7, -1.73, 2.43, -1.38, -2.3, -2.22),
+ `B-A_logFC` = c(4.44, -0.789, 1.28, -0.46, -1.2, -1.15),
+ `B-A_tstat` = c(3.2, -1.35, 2.76, -1.18, -1.62, -2.46),
+ `B-A_pval` = c(0.00782, 0.203, 0.0177, 0.26, 0.133, 0.0307),
+ `B-A_adjpval` = c(0.21, 0.71, 0.303, 0.758, 0.617, 0.377),
+ `C-A_FC` = c(36.6, -21.9, 4.66, -3.59, 4.5, -4.49),
+ `C-A_logFC` = c(5.2, -4.46, 2.22, -1.84, 2.17, -2.17),
+ `C-A_tstat` = c(4.15, -3.8, 5.25, -3.76, 4.5, -3.68),
+ `C-A_pval` = c(0.00141, 0.00265, 0.000222, 0.00281, 0.000767, 0.00327),
+ `C-A_adjpval` = c(0.027, 0.0383, 0.00929, 0.0395, 0.0191, 0.0432),
+ `B-C_FC` = c(-1.69, 12.7, -1.92, 2.61, -10.3, 2.02),
+ `B-C_logFC` = c(-0.758, 3.67, -0.941, 1.38, -3.37, 1.01),
+ `B-C_tstat` = c(-0.838, 2.93, -3.15, 2.63, -5.04, 1.53),
+ `B-C_pval` = c(0.419, 0.0129, 0.00859, 0.0222, 0.000311, 0.153),
+ `B-C_adjpval` = c(0.707, 0.144, 0.124, 0.186, 0.0224, 0.442)
+ ),
+ row.names = c(NA, 6L),
+ class = "data.frame"
+ )
+ expected_tail <- structure(
+ list(
+ Gene = c("Zfand6", "Zfp35", "Zfp422", "Zfp706", "Zfp945", "Zhx1"),
+ `B-A_FC` = c(1.22, 1.15, -1.43, -1.92, 10.5, -1.28),
+ `B-A_logFC` = c(0.282, 0.198, -0.515, -0.938, 3.39, -0.356),
+ `B-A_tstat` = c(0.808, 0.725, -1.92, -5.49, 2.94, -1.33),
+ `B-A_pval` = c(0.435, 0.483, 0.0802, 0.00015, 0.0126, 0.209),
+ `B-A_adjpval` = c(0.859, 0.871, 0.529, 0.0247, 0.258, 0.712),
+ `C-A_FC` = c(2.19, -2.35, -2.39, -2.89, 21.5, 1.6),
+ `C-A_logFC` = c(1.13, -1.23, -1.26, -1.53, 4.43, 0.68),
+ `C-A_tstat` = c(3.61, -4.08, -4.61, -8.9, 4.26, 2.9),
+ `C-A_pval` = c(0.00372, 0.00161, 0.000638, 1.47e-06, 0.00117, 0.0137),
+ `C-A_adjpval` = c(0.0462, 0.0295, 0.0176, 0.000377, 0.0241, 0.0971),
+ `B-C_FC` = c(-1.8, 2.69, 1.67, 1.51, -2.06, -2.05),
+ `B-C_logFC` = c(-0.845, 1.43, 0.744, 0.594, -1.04, -1.04),
+ `B-C_tstat` = c(-2.76, 4.59, 2.56, 3.16, -1.24, -4.23),
+ `B-C_pval` = c(0.0175, 0.000656, 0.0253, 0.00845, 0.239, 0.00123),
+ `B-C_adjpval` = c(0.165, 0.0329, 0.199, 0.123, 0.542, 0.0455)
+ ),
+ row.names = 630:635,
+ class = "data.frame"
+ )
+ # Use tolerance for numerical precision across different systems/BLAS implementations
+ expect_equal(head(moo@analyses$diff_filt), expected_head, tolerance = 0.02)
+ expect_equal(tail(moo@analyses$diff_filt), expected_tail, tolerance = 0.02)
+})
+
+test_that("filter_diff rejects invalid filtering_mode", {
+ options(moo_print_plots = FALSE)
+ moo <- moo_nidap |>
+ diff_counts(
+ count_type = "filt",
+ sub_count_type = NULL,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"),
+ contrasts = c("B-A", "C-A", "B-C"),
+ voom_normalization_method = "quantile"
+ )
+
+ expect_error(
+ moo |> filter_diff(filtering_mode = "invalid"),
+ "filtering_mode not recognized"
+ )
+})
+
+test_that("filter_diff accepts valid filtering_mode values", {
+ options(moo_print_plots = FALSE)
+ moo <- moo_nidap |>
+ diff_counts(
+ count_type = "filt",
+ sub_count_type = NULL,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"),
+ contrasts = c("B-A", "C-A", "B-C"),
+ voom_normalization_method = "quantile"
+ )
+
+ expect_no_error(
+ moo |> filter_diff(filtering_mode = "any")
+ )
+
+ expect_no_error(
+ moo |> filter_diff(filtering_mode = "all")
+ )
+})
+
+test_that("filter_diff rejects invalid plot_type", {
+ options(moo_print_plots = FALSE)
+ moo <- moo_nidap |>
+ diff_counts(
+ count_type = "filt",
+ sub_count_type = NULL,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"),
+ contrasts = c("B-A", "C-A", "B-C"),
+ voom_normalization_method = "quantile"
+ )
+
+ expect_error(
+ moo |> filter_diff(plot_type = "invalid"),
+ "plot_type not recognized"
+ )
+})
+
+test_that("filter_diff accepts valid plot_type values", {
+ options(moo_print_plots = FALSE)
+ moo <- moo_nidap |>
+ diff_counts(
+ count_type = "filt",
+ sub_count_type = NULL,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"),
+ contrasts = c("B-A", "C-A", "B-C"),
+ voom_normalization_method = "quantile"
+ )
+
+ expect_no_error(
+ moo |> filter_diff(plot_type = "bar")
+ )
+
+ expect_no_error(
+ moo |> filter_diff(plot_type = "pie")
+ )
+})
diff --git a/code/MOSuite/tests/testthat/test-filter.R b/code/MOSuite/tests/testthat/test-filter.R
new file mode 100644
index 0000000..aabfc78
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-filter.R
@@ -0,0 +1,253 @@
+test_that("filter_counts reproduces NIDAP results", {
+ set.seed(10)
+ moo <- create_multiOmicDataSet_from_dataframes(
+ as.data.frame(nidap_sample_metadata),
+ as.data.frame(nidap_clean_raw_counts),
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene"
+ ) |>
+ calc_cpm(feature_id_colname = "Gene") |>
+ filter_counts(
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ count_type = "raw",
+ print_plots = TRUE
+ )
+ rds_counts_filt <- moo@counts$filt |>
+ dplyr::arrange(desc(Gene))
+ nidap_counts_filt <- as.data.frame(nidap_filtered_counts) |>
+ dplyr::arrange(desc(Gene))
+
+ expect_true(equal_dfs(rds_counts_filt, nidap_counts_filt))
+})
+
+# TODO get filter_counts() to work on tibbles too, not only dataframes
+
+test_that("filter_counts works on RENEE dataset", {
+ moo <- create_multiOmicDataSet_from_dataframes(
+ readr::read_tsv(
+ system.file("extdata", "sample_metadata.tsv.gz", package = "MOSuite")
+ ),
+ gene_counts |> glue_gene_symbols()
+ )
+ rds2 <- moo |>
+ filter_counts(
+ feature_id_colname = "gene_id",
+ sample_id_colname = "sample_id",
+ group_colname = "condition",
+ label_colname = "sample_id",
+ samples_to_include = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"),
+ minimum_count_value_to_be_considered_nonzero = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_total = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_a_group = 1,
+ print_plots = TRUE,
+ count_type = "raw"
+ )
+ expect_equal(dim(rds2@counts$filt), c(291, 5))
+ expect_equal(
+ rds2@counts$filt |> dplyr::arrange(gene_id) |> head(),
+ structure(
+ list(
+ gene_id = c(
+ "ENSG00000072803.17|FBXW11",
+ "ENSG00000083845.9|RPS5",
+ "ENSG00000107371.13|EXOSC3",
+ "ENSG00000111639.8|MRPL51",
+ "ENSG00000111640.15|GAPDH",
+ "ENSG00000111786.9|SRSF9"
+ ),
+ KO_S3 = c(2, 1, 1, 0, 0, 0),
+ KO_S4 = c(0, 0, 1, 1, 1, 1),
+ WT_S1 = c(0, 0, 0, 0, 0, 0),
+ WT_S2 = c(0, 0, 0, 0, 0, 0)
+ ),
+ row.names = c(NA, 6L),
+ class = "data.frame"
+ )
+ )
+ expect_equal(
+ rds2@counts$filt |> dplyr::arrange(gene_id) |> tail(),
+ structure(
+ list(
+ gene_id = c(
+ "ENSG00000281903.2|LINC02246",
+ "ENSG00000282393.1|AC016588.2",
+ "ENSG00000283886.2|BX664615.2",
+ "ENSG00000285413.1|AP001056.2",
+ "ENSG00000286018.1|AF129075.3",
+ "ENSG00000286104.1|AC016629.3"
+ ),
+ KO_S3 = c(0.85, 0, 1, 3, 2, 1),
+ KO_S4 = c(0, 1, 0, 1, 0, 0),
+ WT_S1 = c(0, 0, 0, 0, 0, 0),
+ WT_S2 = c(0.71, 0, 0, 0, 0, 0)
+ ),
+ row.names = 286:291,
+ class = "data.frame"
+ )
+ )
+})
+
+test_that("remove_low_count_genes works", {
+ df <- data.frame(
+ Gene = c(
+ "mt-Nd5_43275",
+ "mt-Nd6_43276",
+ "mt-Te_43277",
+ "mt-Cytb_43278",
+ "mt-Tt_43279",
+ "mt-Tp_43280"
+ ),
+ A1 = c(6155, 858, 0, 20542, 0, 12),
+ A2 = c(10823, 1420, 1, 29677, 9, 16),
+ A3 = c(9482, 1167, 2, 31730, 0, 13),
+ B1 = c(6162, 1181, 0, 28293, 0, 15),
+ B2 = c(8002, 845, 1, 25617, 7, 19),
+ B3 = c(7225, 1198, 3, 30370, 3, 26),
+ C1 = c(4141, 515, 0, 21310, 0, 32),
+ C2 = c(9058, 1147, 4, 30108, 0, 33),
+ C3 = c(8481, 1124, 2, 30893, 2, 50),
+ row.names = seq(5, 10)
+ )
+ sample_meta <- structure(
+ list(
+ Sample = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3"),
+ Group = c("A", "A", "A", "B", "B", "B", "C", "C", "C"),
+ Replicate = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
+ Batch = c(1, 2, 2, 1, 1, 2, 1, 2, 2),
+ Label = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3")
+ ),
+ row.names = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3"),
+ class = "data.frame"
+ )
+
+ # test default params
+ expect_equal(
+ remove_low_count_genes(
+ counts_dat = df,
+ sample_metadata = sample_meta,
+ feature_id_colname = "Gene",
+ group_colname = "Group",
+ use_cpm_counts_to_filter = TRUE,
+ use_group_based_filtering = FALSE,
+ minimum_count_value_to_be_considered_nonzero = 8,
+ minimum_number_of_samples_with_nonzero_counts_in_total = 7,
+ minimum_number_of_samples_with_nonzero_counts_in_a_group = 3
+ ),
+ structure(
+ list(
+ Gene = c(
+ "mt-Nd5_43275",
+ "mt-Nd6_43276",
+ "mt-Cytb_43278",
+ "mt-Tp_43280"
+ ),
+ A1 = c(
+ 223274.204664998,
+ 31124.1702035042,
+ 745166.322051728,
+ 435.303079769289
+ ),
+ A2 = c(
+ 258022.219043532,
+ 33853.0491584418,
+ 707504.88723597,
+ 381.442807419063
+ ),
+ A3 = c(
+ 223663.725998962,
+ 27527.4803038166,
+ 748454.970042931,
+ 306.647167051941
+ ),
+ B1 = c(
+ 172842.276513983,
+ 33126.7005133096,
+ 793610.277411573,
+ 420.74556113433
+ ),
+ B2 = c(
+ 232002.551390218,
+ 24499.1447044156,
+ 742715.490997652,
+ 550.868342466151
+ ),
+ B3 = c(
+ 186091.435930457,
+ 30856.406954282,
+ 782227.94591114,
+ 669.671603348358
+ ),
+ C1 = c(
+ 159281.483191015,
+ 19809.2160935457,
+ 819678.436802831,
+ 1230.86391260866
+ ),
+ C2 = c(
+ 224485.749690211,
+ 28426.2701363073,
+ 746171.003717472,
+ 817.843866171004
+ ),
+ C3 = c(
+ 209138.883408956,
+ 27717.4985204182,
+ 761811.994476228,
+ 1232.98480962715
+ )
+ ),
+ row.names = 1:4,
+ class = "data.frame"
+ )
+ )
+})
+
+test_that("remove_low_count_genes works with group-based filtering (no grouped tibble crash)", {
+ df <- data.frame(
+ Gene = c(
+ "mt-Nd5_43275",
+ "mt-Nd6_43276",
+ "mt-Te_43277",
+ "mt-Cytb_43278",
+ "mt-Tt_43279",
+ "mt-Tp_43280"
+ ),
+ A1 = c(6155, 858, 0, 20542, 0, 12),
+ A2 = c(10823, 1420, 1, 29677, 9, 16),
+ A3 = c(9482, 1167, 2, 31730, 0, 13),
+ B1 = c(6162, 1181, 0, 28293, 0, 15),
+ B2 = c(8002, 845, 1, 25617, 7, 19),
+ B3 = c(7225, 1198, 3, 30370, 3, 26),
+ C1 = c(4141, 515, 0, 21310, 0, 32),
+ C2 = c(9058, 1147, 4, 30108, 0, 33),
+ C3 = c(8481, 1124, 2, 30893, 2, 50),
+ row.names = seq(5, 10)
+ )
+ sample_meta <- structure(
+ list(
+ Sample = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3"),
+ Group = c("A", "A", "A", "B", "B", "B", "C", "C", "C"),
+ Replicate = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
+ Batch = c(1, 2, 2, 1, 1, 2, 1, 2, 2),
+ Label = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3")
+ ),
+ row.names = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3"),
+ class = "data.frame"
+ )
+
+ result <- remove_low_count_genes(
+ counts_dat = df,
+ sample_metadata = sample_meta,
+ feature_id_colname = "Gene",
+ group_colname = "Group",
+ use_cpm_counts_to_filter = TRUE,
+ use_group_based_filtering = TRUE,
+ minimum_count_value_to_be_considered_nonzero = 8,
+ minimum_number_of_samples_with_nonzero_counts_in_total = 7,
+ minimum_number_of_samples_with_nonzero_counts_in_a_group = 3
+ )
+ expect_s3_class(result, "data.frame")
+ expect_true("Gene" %in% colnames(result))
+ expect_true(nrow(result) > 0)
+})
diff --git a/code/MOSuite/tests/testthat/test-normalize.R b/code/MOSuite/tests/testthat/test-normalize.R
new file mode 100644
index 0000000..87318d4
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-normalize.R
@@ -0,0 +1,139 @@
+test_that("normalize works for NIDAP", {
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts)
+ )
+ ) |>
+ normalize_counts(
+ group_colname = "Group",
+ label_colname = "Label",
+ print_plots = TRUE
+ )
+ expect_true(equal_dfs(
+ moo@counts[["norm"]][["voom"]] |>
+ dplyr::arrange(desc(Gene)),
+ as.data.frame(nidap_norm_counts) |>
+ dplyr::arrange(desc(Gene))
+ ))
+})
+
+test_that("normalize works for RENEE", {
+ moo <- create_multiOmicDataSet_from_dataframes(
+ readr::read_tsv(
+ system.file("extdata", "sample_metadata.tsv.gz", package = "MOSuite")
+ ),
+ gene_counts
+ ) |>
+ clean_raw_counts() |>
+ filter_counts(
+ group_colname = "condition",
+ label_colname = "sample_id",
+ minimum_count_value_to_be_considered_nonzero = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_total = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_a_group = 1,
+ print_plots = FALSE
+ ) |>
+ normalize_counts(group_colname = "condition", label_colname = "sample_id")
+ expect_equal(
+ head(moo@counts$norm$voom),
+ structure(
+ list(
+ gene_id = c(
+ "ENSG00000215458.8",
+ "ENSG00000160179.18",
+ "ENSG00000258017.1",
+ "ENSG00000282393.1",
+ "ENSG00000286104.1",
+ "ENSG00000274422.1"
+ ),
+ KO_S3 = c(
+ 11.0751960068561,
+ 9.6086338540783,
+ 9.6086338540783,
+ 8.81615260371772,
+ 9.6086338540783,
+ 8.81615260371772
+ ),
+ KO_S4 = c(
+ 12.3480907442867,
+ 12.7703165561761,
+ 8.81615260371772,
+ 9.6086338540783,
+ 8.81615260371772,
+ 9.6086338540783
+ ),
+ WT_S1 = c(
+ 8.81615260371772,
+ 12.3480907442867,
+ 8.81615260371772,
+ 8.81615260371772,
+ 8.81615260371772,
+ 8.81615260371772
+ ),
+ WT_S2 = c(
+ 10.0048744792586,
+ 12.2369960496953,
+ 8.81615260371772,
+ 8.81615260371772,
+ 8.81615260371772,
+ 8.81615260371772
+ )
+ ),
+ row.names = c(NA, 6L),
+ class = "data.frame"
+ )
+ )
+ expect_equal(
+ tail(moo@counts$norm$voom),
+ structure(
+ list(
+ gene_id = c(
+ "ENSG00000157538.14",
+ "ENSG00000160193.11",
+ "ENSG00000182093.15",
+ "ENSG00000182362.14",
+ "ENSG00000173276.14",
+ "ENSG00000237232.7"
+ ),
+ KO_S3 = c(
+ 12.3480907442867,
+ 9.6086338540783,
+ 11.8597009422769,
+ 11.0751960068561,
+ 11.8597009422769,
+ 8.81615260371772
+ ),
+ KO_S4 = c(
+ 12.7703165561761,
+ 9.6086338540783,
+ 9.6086338540783,
+ 8.81615260371772,
+ 12.7703165561761,
+ 9.6086338540783
+ ),
+ WT_S1 = c(
+ 12.2426956580003,
+ 10.5853565029804,
+ 11.7865266999202,
+ 8.81615260371772,
+ 11.7865266999202,
+ 8.81615260371772
+ ),
+ WT_S2 = c(
+ 12.4720029602325,
+ 10.9249210977479,
+ 11.4357186116131,
+ 8.81615260371772,
+ 12.2369960496953,
+ 8.81615260371772
+ )
+ ),
+ row.names = 286:291,
+ class = "data.frame"
+ )
+ )
+})
diff --git a/code/MOSuite/tests/testthat/test-plot_heatmap.R b/code/MOSuite/tests/testthat/test-plot_heatmap.R
new file mode 100644
index 0000000..ba9fb1a
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-plot_heatmap.R
@@ -0,0 +1,263 @@
+colors_vec <- c(
+ "#5954d6",
+ "#e1562c",
+ "#b80058",
+ "#00c6f8",
+ "#d163e6",
+ "#00a76c",
+ "#ff9287",
+ "#008cf9",
+ "#006e00",
+ "#796880",
+ "#FFA500",
+ "#878500"
+)
+test_that("correlation heatmap works", {
+ p <- plot_corr_heatmap(
+ nidap_filtered_counts |>
+ dplyr::select(tidyselect::all_of(
+ c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3")
+ )) |>
+ as.data.frame(),
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ sample_id_colname = "Sample",
+ label_colname = "Label",
+ group_colname = "Group",
+ color_values = colors_vec
+ )
+ expect_s4_class(p, "Heatmap")
+ expect_equal(
+ p@matrix,
+ structure(
+ c(
+ 0,
+ 0.0349436686640265,
+ 0.0346707459478316,
+ 0.136436292332774,
+ 0.145504435322238,
+ 0.165715414451367,
+ 0.266892740192968,
+ 0.310669867608233,
+ 0.281044606820525,
+ 0.0349436686640265,
+ 0,
+ 0.026894587617103,
+ 0.139740412452416,
+ 0.144024768162842,
+ 0.156995761981556,
+ 0.256516858290949,
+ 0.289312631501573,
+ 0.275306551391499,
+ 0.0346707459478316,
+ 0.026894587617103,
+ 0,
+ 0.113462670330904,
+ 0.132707949438905,
+ 0.146137196349944,
+ 0.2518982836951,
+ 0.307893394909586,
+ 0.277982555134354,
+ 0.136436292332774,
+ 0.139740412452416,
+ 0.113462670330904,
+ 0,
+ 0.0467104077868874,
+ 0.0778256905442303,
+ 0.18935488124329,
+ 0.238494284141649,
+ 0.209007629325352,
+ 0.145504435322238,
+ 0.144024768162842,
+ 0.132707949438905,
+ 0.0467104077868874,
+ 0,
+ 0.0532124359450156,
+ 0.140242067145314,
+ 0.179723372754429,
+ 0.15251602311055,
+ 0.165715414451367,
+ 0.156995761981556,
+ 0.146137196349944,
+ 0.0778256905442303,
+ 0.0532124359450156,
+ 0,
+ 0.141067943113981,
+ 0.160160263560895,
+ 0.14605974755951,
+ 0.266892740192968,
+ 0.256516858290949,
+ 0.2518982836951,
+ 0.18935488124329,
+ 0.140242067145314,
+ 0.141067943113981,
+ 0,
+ 0.104501003317621,
+ 0.0500950924722408,
+ 0.310669867608233,
+ 0.289312631501573,
+ 0.307893394909586,
+ 0.238494284141649,
+ 0.179723372754429,
+ 0.160160263560895,
+ 0.104501003317621,
+ 0,
+ 0.0899444885709063,
+ 0.281044606820525,
+ 0.275306551391499,
+ 0.277982555134354,
+ 0.209007629325352,
+ 0.15251602311055,
+ 0.14605974755951,
+ 0.0500950924722408,
+ 0.0899444885709063,
+ 0
+ ),
+ dim = c(9L, 9L),
+ dimnames = list(
+ c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3"),
+ c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3")
+ )
+ )
+ )
+})
+
+test_that("plot_corr_heatmap method dispatch works", {
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts),
+ "norm" = list("voom" = as.data.frame(nidap_norm_counts))
+ )
+ )
+ expect_equal(
+ plot_corr_heatmap(moo, "filt")@matrix,
+ plot_corr_heatmap(
+ as.data.frame(nidap_filtered_counts),
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ feature_id_colname = "Gene"
+ )@matrix
+ )
+})
+
+# TODO get heatmap working on tibbles also
+# test_that("heatmap works", {
+# corHM <- plot_corr_heatmap(
+# counts_dat = nidap_filtered_counts |>
+# dplyr::select(tidyselect::all_of(c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3"))),
+# sample_metadata = nidap_sample_metadata,
+# sample_id_colname = "Sample",
+# label_colname = "Label",
+# group_column = "Group",
+# color_values = colors_vec
+# )
+# })
+
+test_that("plot_expr_heatmap works", {
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "clean" = as.data.frame(nidap_clean_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts),
+ "norm" = list("voom" = as.data.frame(nidap_norm_counts))
+ )
+ )
+ expect_message(
+ {
+ set.seed(20250226)
+ p_moo <- plot_expr_heatmap(
+ moo,
+ count_type = "norm",
+ sub_count_type = "voom",
+ feature_id_colname = "Gene"
+ )
+ },
+ "total number of genes in heatmap",
+ fixed = FALSE
+ )
+ expect_equal(
+ head(p_moo@matrix),
+ structure(
+ c(
+ -0.469646298150098,
+ -0.999122775178692,
+ -1.63914985230916,
+ -1.11065487632401,
+ 1.06508157716121,
+ 0.129357631742822,
+ -1.6308124461643,
+ -0.700059853143015,
+ -1.5189464375787,
+ -0.224167735591629,
+ 1.06508157716121,
+ -1.56168918461362,
+ -1.6308124461643,
+ -1.89235880186714,
+ -0.541702967261003,
+ -2.0463972593436,
+ 1.06508157716121,
+ -1.79694425364586,
+ 0.329750044812768,
+ 0.0301570520160167,
+ 0.167894484181845,
+ -0.0423239634814564,
+ -0.120254965774652,
+ 0.199342022111398,
+ 0.642860459417498,
+ 0.367952795408648,
+ 0.373030023826938,
+ 0.474515861027971,
+ -0.0390725679377215,
+ 0.117730807477454,
+ 0.728033358831939,
+ 0.385380968911897,
+ 0.599797184786654,
+ 0.453174107982841,
+ -0.104093868102577,
+ 0.461536877004385,
+ 0.623852071863989,
+ 0.835927627565177,
+ 0.888284230577201,
+ 0.725967683142828,
+ -1.99037275585028,
+ 0.831065997888202,
+ 0.825036578059704,
+ 1.0208559858833,
+ 0.853098375022516,
+ 0.922704248230578,
+ -0.758155232754779,
+ 0.657242141825761,
+ 0.581738677492799,
+ 0.951267000403811,
+ 0.817694958753715,
+ 0.847181934356472,
+ -0.183295341063619,
+ 0.962357960209459
+ ),
+ dim = c(6L, 9L),
+ dimnames = list(
+ c("Il2rb", "Rora", "Tcrg-C1", "Pdcd1", "Dntt", "Eya2"),
+ c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3")
+ )
+ )
+ )
+
+ expect_message(
+ {
+ set.seed(20250226)
+ p_dat <- plot_expr_heatmap(
+ as.data.frame(nidap_norm_counts),
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ feature_id_colname = "Gene"
+ )
+ },
+ "total number of genes in heatmap",
+ fixed = FALSE
+ )
+
+ expect_equal(p_moo@matrix, p_dat@matrix)
+})
diff --git a/code/MOSuite/tests/testthat/test-plot_histogram.R b/code/MOSuite/tests/testthat/test-plot_histogram.R
new file mode 100644
index 0000000..598289d
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-plot_histogram.R
@@ -0,0 +1,603 @@
+log_counts <- structure(
+ c(
+ 4.68203758078952,
+ 4.85622577980595,
+ 4.78525385564269,
+ 4.49631900610197,
+ 4.06045359796882,
+ 4.23984554358195,
+ 4.5945977606547,
+ 4.64452454872198,
+ 5.01435137189661,
+ 4.97202092328668,
+ 4.63530665635318,
+ 4.2685504137767,
+ 4.55963475259408,
+ 4.48551456875509,
+ 4.8871583679813,
+ 4.80572893488115,
+ 4.62485692334614,
+ 3.28500783834613,
+ 4.27910086185478,
+ 4.44991091582054,
+ 5.1543875974913,
+ 5.04097648388479,
+ 3.93341992672261,
+ 3.60201340344135,
+ 4.38819757498796,
+ 4.52837375303351,
+ 5.04374399504142,
+ 5.16774070067169,
+ 4.88762227733767,
+ 3.4002608223214,
+ 4.86037842060906,
+ 5.24497940734463,
+ 4.91946017962122,
+ 5.04632856830695,
+ 4.96825190546972,
+ 4.23362208987531,
+ 4.11790170891073,
+ 4.05303076635639,
+ 4.92868500786216,
+ 4.94726383191083,
+ 3.80832003906478,
+ 4.59490650795107,
+ 4.85738793032812,
+ 4.40655427188147,
+ 4.22382743940071,
+ 4.58770826515446,
+ 4.96057466313368,
+ 4.99824283163569,
+ 4.5437556841171,
+ 4.97124300205374,
+ 4.93192370480255,
+ 4.13835014251084,
+ 5.0939015418123,
+ 5.03285949305828
+ ),
+ dim = c(6L, 9L),
+ dimnames = list(
+ c(
+ "Mrpl15_32",
+ "Lypla1_34",
+ "Tcea1_36",
+ "Atp6v1h_44",
+ "Rb1cc1_54",
+ "Pcmtd1_68"
+ ),
+ c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3")
+ )
+)
+sample_meta <- structure(
+ list(
+ Sample = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3"),
+ Group = c("A", "A", "A", "B", "B", "B", "C", "C", "C"),
+ Replicate = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
+ Batch = c(1, 2, 2, 1, 1, 2, 1, 2, 2),
+ Label = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3")
+ ),
+ row.names = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3"),
+ class = "data.frame"
+)
+test_that("plot_histogram works with rownames", {
+ p <- plot_histogram(
+ log_counts |> as.data.frame() |> tibble::rownames_to_column("Gene"),
+ sample_meta,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ group_colname = "Group",
+ label_colname = "Label",
+ color_values = c(
+ indigo = "#5954d6",
+ carrot = "#e1562c",
+ lipstick = "#b80058",
+ turquoise = "#00c6f8",
+ lavender = "#d163e6",
+ jade = "#00a76c",
+ coral = "#ff9287",
+ azure = "#008cf9",
+ green = "#006e00",
+ rum = "#796880",
+ orange = "#FFA500",
+ olive = "#878500"
+ ),
+ color_by_group = FALSE,
+ set_min_max_for_x_axis = FALSE,
+ minimum_for_x_axis = -1,
+ maximum_for_x_axis = 1,
+ legend_position = "top",
+ legend_font_size = 10,
+ number_of_legend_columns = 6
+ )
+
+ expect_s3_class(p$layers[[1]], "ggproto")
+ expect_s3_class(p$layers[[1]]$geom, "GeomArea")
+ expect_equal(
+ p$data,
+ structure(
+ list(
+ Gene = c(
+ "Mrpl15_32",
+ "Mrpl15_32",
+ "Mrpl15_32",
+ "Mrpl15_32",
+ "Mrpl15_32",
+ "Mrpl15_32",
+ "Mrpl15_32",
+ "Mrpl15_32",
+ "Mrpl15_32",
+ "Lypla1_34",
+ "Lypla1_34",
+ "Lypla1_34",
+ "Lypla1_34",
+ "Lypla1_34",
+ "Lypla1_34",
+ "Lypla1_34",
+ "Lypla1_34",
+ "Lypla1_34",
+ "Tcea1_36",
+ "Tcea1_36",
+ "Tcea1_36",
+ "Tcea1_36",
+ "Tcea1_36",
+ "Tcea1_36",
+ "Tcea1_36",
+ "Tcea1_36",
+ "Tcea1_36",
+ "Atp6v1h_44",
+ "Atp6v1h_44",
+ "Atp6v1h_44",
+ "Atp6v1h_44",
+ "Atp6v1h_44",
+ "Atp6v1h_44",
+ "Atp6v1h_44",
+ "Atp6v1h_44",
+ "Atp6v1h_44",
+ "Rb1cc1_54",
+ "Rb1cc1_54",
+ "Rb1cc1_54",
+ "Rb1cc1_54",
+ "Rb1cc1_54",
+ "Rb1cc1_54",
+ "Rb1cc1_54",
+ "Rb1cc1_54",
+ "Rb1cc1_54",
+ "Pcmtd1_68",
+ "Pcmtd1_68",
+ "Pcmtd1_68",
+ "Pcmtd1_68",
+ "Pcmtd1_68",
+ "Pcmtd1_68",
+ "Pcmtd1_68",
+ "Pcmtd1_68",
+ "Pcmtd1_68"
+ ),
+ Sample = c(
+ "A1",
+ "A2",
+ "A3",
+ "B1",
+ "B2",
+ "B3",
+ "C1",
+ "C2",
+ "C3",
+ "A1",
+ "A2",
+ "A3",
+ "B1",
+ "B2",
+ "B3",
+ "C1",
+ "C2",
+ "C3",
+ "A1",
+ "A2",
+ "A3",
+ "B1",
+ "B2",
+ "B3",
+ "C1",
+ "C2",
+ "C3",
+ "A1",
+ "A2",
+ "A3",
+ "B1",
+ "B2",
+ "B3",
+ "C1",
+ "C2",
+ "C3",
+ "A1",
+ "A2",
+ "A3",
+ "B1",
+ "B2",
+ "B3",
+ "C1",
+ "C2",
+ "C3",
+ "A1",
+ "A2",
+ "A3",
+ "B1",
+ "B2",
+ "B3",
+ "C1",
+ "C2",
+ "C3"
+ ),
+ count = c(
+ 4.68203758078952,
+ 4.5945977606547,
+ 4.55963475259408,
+ 4.27910086185478,
+ 4.38819757498796,
+ 4.86037842060906,
+ 4.11790170891073,
+ 4.85738793032812,
+ 4.5437556841171,
+ 4.85622577980595,
+ 4.64452454872198,
+ 4.48551456875509,
+ 4.44991091582054,
+ 4.52837375303351,
+ 5.24497940734463,
+ 4.05303076635639,
+ 4.40655427188147,
+ 4.97124300205374,
+ 4.78525385564269,
+ 5.01435137189661,
+ 4.8871583679813,
+ 5.1543875974913,
+ 5.04374399504142,
+ 4.91946017962122,
+ 4.92868500786216,
+ 4.22382743940071,
+ 4.93192370480255,
+ 4.49631900610197,
+ 4.97202092328668,
+ 4.80572893488115,
+ 5.04097648388479,
+ 5.16774070067169,
+ 5.04632856830695,
+ 4.94726383191083,
+ 4.58770826515446,
+ 4.13835014251084,
+ 4.06045359796882,
+ 4.63530665635318,
+ 4.62485692334614,
+ 3.93341992672261,
+ 4.88762227733767,
+ 4.96825190546972,
+ 3.80832003906478,
+ 4.96057466313368,
+ 5.0939015418123,
+ 4.23984554358195,
+ 4.2685504137767,
+ 3.28500783834613,
+ 3.60201340344135,
+ 3.4002608223214,
+ 4.23362208987531,
+ 4.59490650795107,
+ 4.99824283163569,
+ 5.03285949305828
+ ),
+ Group = c(
+ "A",
+ "A",
+ "A",
+ "B",
+ "B",
+ "B",
+ "C",
+ "C",
+ "C",
+ "A",
+ "A",
+ "A",
+ "B",
+ "B",
+ "B",
+ "C",
+ "C",
+ "C",
+ "A",
+ "A",
+ "A",
+ "B",
+ "B",
+ "B",
+ "C",
+ "C",
+ "C",
+ "A",
+ "A",
+ "A",
+ "B",
+ "B",
+ "B",
+ "C",
+ "C",
+ "C",
+ "A",
+ "A",
+ "A",
+ "B",
+ "B",
+ "B",
+ "C",
+ "C",
+ "C",
+ "A",
+ "A",
+ "A",
+ "B",
+ "B",
+ "B",
+ "C",
+ "C",
+ "C"
+ ),
+ Replicate = c(
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3,
+ 1,
+ 2,
+ 3
+ ),
+ Batch = c(
+ 1,
+ 2,
+ 2,
+ 1,
+ 1,
+ 2,
+ 1,
+ 2,
+ 2,
+ 1,
+ 2,
+ 2,
+ 1,
+ 1,
+ 2,
+ 1,
+ 2,
+ 2,
+ 1,
+ 2,
+ 2,
+ 1,
+ 1,
+ 2,
+ 1,
+ 2,
+ 2,
+ 1,
+ 2,
+ 2,
+ 1,
+ 1,
+ 2,
+ 1,
+ 2,
+ 2,
+ 1,
+ 2,
+ 2,
+ 1,
+ 1,
+ 2,
+ 1,
+ 2,
+ 2,
+ 1,
+ 2,
+ 2,
+ 1,
+ 1,
+ 2,
+ 1,
+ 2,
+ 2
+ ),
+ Label = c(
+ "A1",
+ "A2",
+ "A3",
+ "B1",
+ "B2",
+ "B3",
+ "C1",
+ "C2",
+ "C3",
+ "A1",
+ "A2",
+ "A3",
+ "B1",
+ "B2",
+ "B3",
+ "C1",
+ "C2",
+ "C3",
+ "A1",
+ "A2",
+ "A3",
+ "B1",
+ "B2",
+ "B3",
+ "C1",
+ "C2",
+ "C3",
+ "A1",
+ "A2",
+ "A3",
+ "B1",
+ "B2",
+ "B3",
+ "C1",
+ "C2",
+ "C3",
+ "A1",
+ "A2",
+ "A3",
+ "B1",
+ "B2",
+ "B3",
+ "C1",
+ "C2",
+ "C3",
+ "A1",
+ "A2",
+ "A3",
+ "B1",
+ "B2",
+ "B3",
+ "C1",
+ "C2",
+ "C3"
+ )
+ ),
+ row.names = c(NA, -54L),
+ class = c("tbl_df", "tbl", "data.frame")
+ )
+ )
+})
+
+test_that("plot_histogram works with tibbles", {
+ p <- plot_histogram(
+ nidap_filtered_counts,
+ sample_metadata = nidap_sample_metadata,
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ group_colname = "Group",
+ label_colname = "Label",
+ color_values = c(
+ "#5954d6",
+ "#e1562c",
+ "#b80058",
+ "#00c6f8",
+ "#d163e6",
+ "#00a76c",
+ "#ff9287",
+ "#008cf9",
+ "#006e00",
+ "#796880",
+ "#FFA500",
+ "#878500"
+ ),
+ color_by_group = FALSE,
+ set_min_max_for_x_axis = FALSE,
+ minimum_for_x_axis = -1,
+ maximum_for_x_axis = 1,
+ legend_position = "top",
+ legend_font_size = 10,
+ number_of_legend_columns = 6
+ )
+ expect_s3_class(p$layers[[1]], "ggproto")
+ expect_s3_class(p$layers[[1]]$geom, "GeomArea")
+ expect_equal(
+ head(p$data),
+ structure(
+ list(
+ Gene = c(
+ "0610007P14Rik",
+ "0610007P14Rik",
+ "0610007P14Rik",
+ "0610007P14Rik",
+ "0610007P14Rik",
+ "0610007P14Rik"
+ ),
+ Sample = c("A1", "A2", "A3", "B1", "B2", "B3"),
+ count = c(1049, 950, 934, 1068, 1140, 947),
+ Group = c("A", "A", "A", "B", "B", "B"),
+ Replicate = c(1, 2, 3, 1, 2, 3),
+ Batch = c(1, 2, 2, 1, 1, 2),
+ Label = c("A1", "A2", "A3", "B1", "B2", "B3")
+ ),
+ row.names = c(NA, -6L),
+ class = c("tbl_df", "tbl", "data.frame")
+ )
+ )
+})
+
+test_that("plot_histogram result is the same for MOO or dataframe", {
+ moo <- multiOmicDataSet(
+ sample_metadata = nidap_sample_metadata,
+ anno_dat = data.frame(),
+ counts_lst = list("raw" = nidap_raw_counts)
+ )
+ expect_equal(
+ plot_histogram(moo, count_type = "raw"),
+ plot_histogram(nidap_raw_counts, sample_meta = nidap_sample_metadata)
+ )
+})
+
+test_that("plot_histogram accepts print_plots and save_plots via moo dispatch without error", {
+ moo <- multiOmicDataSet(
+ sample_metadata = nidap_sample_metadata,
+ anno_dat = data.frame(),
+ counts_lst = list("raw" = nidap_raw_counts)
+ )
+ expect_no_error(
+ plot_histogram(
+ moo,
+ count_type = "raw",
+ group_colname = "Group",
+ color_by_group = TRUE,
+ print_plots = FALSE,
+ save_plots = FALSE
+ )
+ )
+})
diff --git a/code/MOSuite/tests/testthat/test-plot_pca.R b/code/MOSuite/tests/testthat/test-plot_pca.R
new file mode 100644
index 0000000..db003f3
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-plot_pca.R
@@ -0,0 +1,347 @@
+test_that("calc_pca works", {
+ pca_dat <- calc_pca(nidap_clean_raw_counts, nidap_sample_metadata) |>
+ dplyr::filter(PC %in% c(1, 2))
+ expect_equal(
+ pca_dat,
+ structure(
+ list(
+ Sample = c(
+ "A1",
+ "A1",
+ "A2",
+ "A2",
+ "A3",
+ "A3",
+ "B1",
+ "B1",
+ "B2",
+ "B2",
+ "B3",
+ "B3",
+ "C1",
+ "C1",
+ "C2",
+ "C2",
+ "C3",
+ "C3"
+ ),
+ PC = c(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2),
+ value = c(
+ -40.6241668816455,
+ 25.2297268619146,
+ -56.2133160433603,
+ 6.13385771612248,
+ -69.1070711020441,
+ -21.8952345106934,
+ -36.1660251215743,
+ 7.80504297978752,
+ -25.865255255388,
+ -11.2138080494717,
+ -9.6232450176941,
+ 9.32724696042314,
+ 74.3345576680281,
+ -86.7286802229905,
+ 85.0442226808989,
+ 117.992340543509,
+ 78.2202990727852,
+ -46.6504922786012
+ ),
+ std.dev = c(
+ 61.7780383925471,
+ 55.9548424792563,
+ 61.7780383925471,
+ 55.9548424792563,
+ 61.7780383925471,
+ 55.9548424792563,
+ 61.7780383925471,
+ 55.9548424792563,
+ 61.7780383925471,
+ 55.9548424792563,
+ 61.7780383925471,
+ 55.9548424792563,
+ 61.7780383925471,
+ 55.9548424792563,
+ 61.7780383925471,
+ 55.9548424792563,
+ 61.7780383925471,
+ 55.9548424792563
+ ),
+ percent = c(
+ 21.219,
+ 17.408,
+ 21.219,
+ 17.408,
+ 21.219,
+ 17.408,
+ 21.219,
+ 17.408,
+ 21.219,
+ 17.408,
+ 21.219,
+ 17.408,
+ 21.219,
+ 17.408,
+ 21.219,
+ 17.408,
+ 21.219,
+ 17.408
+ ),
+ cumulative = c(
+ 0.21219,
+ 0.38627,
+ 0.21219,
+ 0.38627,
+ 0.21219,
+ 0.38627,
+ 0.21219,
+ 0.38627,
+ 0.21219,
+ 0.38627,
+ 0.21219,
+ 0.38627,
+ 0.21219,
+ 0.38627,
+ 0.21219,
+ 0.38627,
+ 0.21219,
+ 0.38627
+ ),
+ Group = c(
+ "A",
+ "A",
+ "A",
+ "A",
+ "A",
+ "A",
+ "B",
+ "B",
+ "B",
+ "B",
+ "B",
+ "B",
+ "C",
+ "C",
+ "C",
+ "C",
+ "C",
+ "C"
+ ),
+ Replicate = c(1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3),
+ Batch = c(1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2),
+ Label = c(
+ "A1",
+ "A1",
+ "A2",
+ "A2",
+ "A3",
+ "A3",
+ "B1",
+ "B1",
+ "B2",
+ "B2",
+ "B3",
+ "B3",
+ "C1",
+ "C1",
+ "C2",
+ "C2",
+ "C3",
+ "C3"
+ )
+ ),
+ class = c("tbl_df", "tbl", "data.frame"),
+ row.names = c(NA, -18L)
+ )
+ )
+})
+
+test_that("plot_pca layers are expected", {
+ p <- plot_pca(
+ moo_counts = nidap_filtered_counts,
+ sample_metadata = nidap_sample_metadata,
+ principal_components = c(1, 2),
+ samples_to_rename = NULL,
+ group_colname = "Group",
+ label_colname = "Label",
+ color_values = c(
+ "#5954d6",
+ "#e1562c",
+ "#b80058",
+ "#00c6f8",
+ "#d163e6",
+ "#00a76c",
+ "#ff9287",
+ "#008cf9",
+ "#006e00",
+ "#796880",
+ "#FFA500",
+ "#878500"
+ ),
+ legend_position = "top",
+ point_size = 1,
+ add_label = TRUE,
+ label_font_size = 3,
+ label_offset_y_ = 2,
+ label_offset_x_ = 2
+ )
+
+ expect_s3_class(p$layers[[1]], "ggproto")
+ expect_s3_class(p$layers[[1]]$geom, "GeomPoint")
+})
+
+
+test_that("2D & 3D PCA method dispatch works", {
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts)
+ )
+ )
+ expect_equal(
+ plot_pca(
+ moo,
+ count_type = "filt",
+ principal_components = c(1, 2)
+ ),
+ plot_pca(
+ moo@counts$filt,
+ moo@sample_meta,
+ principal_components = c(1, 2)
+ )
+ )
+
+ # 3D PCA
+ p1 <- plot_pca(moo, count_type = "filt", principal_components = c(1, 2, 3))
+ p2 <- plot_pca(
+ moo@counts$filt,
+ moo@sample_meta,
+ principal_components = c(1, 2, 3)
+ )
+ # see compare_proxy.plotly
+ # expect_equal(p1, p2)
+})
+
+test_that("plot_pca_3d returns plotly object and has correct structure", {
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts)
+ )
+ )
+
+ # Test with multiOmicDataSet
+ fig_moo <- plot_pca_3d(
+ moo,
+ count_type = "filt",
+ principal_components = c(1, 2, 3),
+ group_colname = "Group",
+ label_colname = "Label",
+ save_plots = FALSE,
+ print_plots = FALSE
+ )
+
+ expect_s3_class(fig_moo, "plotly")
+ expect_type(fig_moo$x, "list")
+
+ # Test with data.frame
+ fig_df <- plot_pca_3d(
+ moo@counts$filt,
+ sample_metadata = moo@sample_meta,
+ principal_components = c(1, 2, 3),
+ group_colname = "Group",
+ save_plots = FALSE,
+ print_plots = FALSE
+ )
+
+ expect_s3_class(fig_df, "plotly")
+ expect_type(fig_df$x, "list")
+})
+
+test_that("plot_pca_3d validates principal_components length", {
+ expect_error(
+ plot_pca_3d(
+ nidap_filtered_counts,
+ sample_metadata = nidap_sample_metadata,
+ principal_components = c(1, 2),
+ save_plots = FALSE,
+ print_plots = FALSE
+ ),
+ "principal_components must contain 3 values"
+ )
+
+ expect_error(
+ plot_pca_3d(
+ nidap_filtered_counts,
+ sample_metadata = nidap_sample_metadata,
+ principal_components = c(1, 2, 3, 4),
+ save_plots = FALSE,
+ print_plots = FALSE
+ ),
+ "principal_components must contain 3 values"
+ )
+})
+
+test_that("plot_pca_2d works on multiOmicDataSet object", {
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts)
+ )
+ )
+
+ # Test with multiOmicDataSet
+ p_moo <- plot_pca_2d(
+ moo,
+ count_type = "filt",
+ principal_components = c(1, 2),
+ group_colname = "Group",
+ label_colname = "Label",
+ save_plots = FALSE,
+ print_plots = FALSE
+ )
+
+ expect_s3_class(p_moo, "ggplot")
+ # Should have geom_point and geom_text_repel layers
+ expect_gte(length(p_moo$layers), 2)
+ expect_s3_class(p_moo$layers[[1]]$geom, "GeomPoint")
+})
+
+test_that("plot_pca_2d works with and without labels", {
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts)
+ )
+ )
+
+ # With labels
+ p_with_labels <- plot_pca_2d(
+ moo,
+ count_type = "filt",
+ principal_components = c(1, 2),
+ add_label = TRUE,
+ save_plots = FALSE,
+ print_plots = FALSE
+ )
+
+ # Without labels
+ p_without_labels <- plot_pca_2d(
+ moo,
+ count_type = "filt",
+ principal_components = c(1, 2),
+ add_label = FALSE,
+ save_plots = FALSE,
+ print_plots = FALSE
+ )
+
+ # With labels should have more layers (geom_text_repel)
+ expect_gt(length(p_with_labels$layers), length(p_without_labels$layers))
+})
diff --git a/code/MOSuite/tests/testthat/test-plot_read_depth.R b/code/MOSuite/tests/testthat/test-plot_read_depth.R
new file mode 100644
index 0000000..df2f4e1
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-plot_read_depth.R
@@ -0,0 +1,30 @@
+test_that("plot_read_depth works on moo & dataframes", {
+ moo <- multiOmicDataSet(
+ sample_metadata = nidap_sample_metadata,
+ anno_dat = data.frame(),
+ counts_lst = list("raw" = nidap_raw_counts)
+ )
+ expect_equal(
+ plot_read_depth(moo, "raw"),
+ plot_read_depth(nidap_raw_counts)
+ )
+})
+
+test_that("plot_read_depth accepts extra args via moo dispatch without error", {
+ moo <- multiOmicDataSet(
+ sample_metadata = nidap_sample_metadata,
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = nidap_raw_counts,
+ "clean" = nidap_clean_raw_counts
+ )
+ )
+ expect_no_error(
+ plot_read_depth(
+ moo,
+ count_type = "clean",
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene"
+ )
+ )
+})
diff --git a/code/MOSuite/tests/testthat/test-plot_venn_diagram.R b/code/MOSuite/tests/testthat/test-plot_venn_diagram.R
new file mode 100644
index 0000000..9c63715
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-plot_venn_diagram.R
@@ -0,0 +1,56 @@
+test_that("plot_venn_diagram works with defaults", {
+ expect_snapshot(
+ p <- plot_venn_diagram(
+ nidap_volcano_summary_dat,
+ print_plots = FALSE,
+ save_plots = TRUE
+ )
+ )
+ expect_equal(
+ plot_venn_diagram(nidap_volcano_summary_dat),
+ as.data.frame(nidap_venn_diagram_dat)
+ )
+})
+test_that("plot_venn_diagram raises condition for empty df", {
+ expect_error(
+ plot_venn_diagram(structure(
+ list(
+ GeneName = character(0),
+ Contrast = character(0),
+ FC = numeric(0),
+ logFC = numeric(0),
+ tstat = numeric(0),
+ pval = numeric(0),
+ adjpval = numeric(0)
+ ),
+ class = "data.frame",
+ row.names = integer(0)
+ )),
+ "Dataframe is empty"
+ )
+})
+
+test_that("intersection matrix assignment avoids recursive evaluation error", {
+ # This test demonstrates the fix for the recursive default argument reference error
+ # The error occurred with this pattern:
+ # Intersection <- sapply(colnames(Intersection), function(x) Intersection[, x])
+ # The fix uses a temporary variable:
+ # intersection_matrix <- Intersection;
+ # Intersection <- sapply(colnames(intersection_matrix), ...)
+
+ # Call plot_venn_diagram directly to ensure the fix works in practice
+ expect_no_error({
+ result <- plot_venn_diagram(
+ nidap_volcano_summary_dat,
+ print_plots = FALSE,
+ save_plots = FALSE
+ )
+ })
+
+ # Verify the result has the expected structure
+ expect_s3_class(result, "data.frame")
+ expect_true("Gene" %in% colnames(result))
+ expect_true("Intersection" %in% colnames(result))
+ expect_true("Id" %in% colnames(result))
+ expect_true("Size" %in% colnames(result))
+})
diff --git a/code/MOSuite/tests/testthat/test-plot_volcano.R b/code/MOSuite/tests/testthat/test-plot_volcano.R
new file mode 100644
index 0000000..c44bf93
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-plot_volcano.R
@@ -0,0 +1,16 @@
+# moo_nidap <- multiOmicDataSet(
+# sample_metadata = as.data.frame(nidap_sample_metadata),
+# anno_dat = data.frame(),
+# counts_lst = list(
+# "raw" = as.data.frame(nidap_raw_counts),
+# "clean" = as.data.frame(nidap_clean_raw_counts),
+# "filt" = as.data.frame(nidap_filtered_counts),
+# "norm" = list("voom" = as.data.frame(nidap_norm_counts))
+# )
+# )
+# moo_nidap@analyses$diff <- nidap_deg_analysis_2
+#
+# test_that("volcano plots work on MOO", {
+# volc_sum <- plot_volcano_summary(moo_nidap)
+# volc_enh <- plot_volcano_enhanced(moo_nidap)
+# })
diff --git a/code/MOSuite/tests/testthat/test-plot_volcano_enhanced.R b/code/MOSuite/tests/testthat/test-plot_volcano_enhanced.R
new file mode 100644
index 0000000..6f351ca
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-plot_volcano_enhanced.R
@@ -0,0 +1,58 @@
+test_that("plot_volcano_enhanced works on nidap dataset", {
+ expect_snapshot(
+ df_volc_enh <- plot_volcano_enhanced(
+ nidap_deg_analysis,
+ save_plots = FALSE,
+ print_plots = FALSE
+ )
+ )
+})
+
+test_that("plot_volcano_enhanced returns a data frame", {
+ result <- plot_volcano_enhanced(
+ nidap_deg_analysis,
+ save_plots = FALSE,
+ print_plots = FALSE
+ )
+
+ expect_s3_class(result, "data.frame")
+ expect_true(ncol(result) > 0)
+ expect_true(nrow(result) > 0)
+})
+
+test_that("plot_volcano_enhanced respects num_features_to_label", {
+ result <- plot_volcano_enhanced(
+ nidap_deg_analysis,
+ num_features_to_label = 10,
+ save_plots = FALSE,
+ print_plots = FALSE
+ )
+
+ expect_s3_class(result, "data.frame")
+})
+
+test_that("plot_volcano_enhanced works with multiOmicDataSet", {
+ # Create a multiOmicDataSet with differential analysis results
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts)
+ ),
+ analyses_lst = list(
+ diff = nidap_deg_analysis_2
+ )
+ )
+
+ # Test that it returns a data frame
+ result <- plot_volcano_enhanced(
+ moo,
+ save_plots = FALSE,
+ print_plots = FALSE
+ )
+
+ expect_s3_class(result, "data.frame")
+ expect_true(ncol(result) > 0)
+ expect_true(nrow(result) > 0)
+})
diff --git a/code/MOSuite/tests/testthat/test-plot_volcano_summary.R b/code/MOSuite/tests/testthat/test-plot_volcano_summary.R
new file mode 100644
index 0000000..bfaa429
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-plot_volcano_summary.R
@@ -0,0 +1,146 @@
+test_that("plot_volcano_summary works on nidap dataset", {
+ expect_snapshot(
+ df_volc_sum <- plot_volcano_summary(
+ nidap_deg_analysis,
+ save_plots = FALSE,
+ print_plots = FALSE
+ )
+ )
+ expect_equal(
+ head(df_volc_sum),
+ structure(
+ list(
+ Gene = c("Dntt", "Tmsb4x", "Flt3", "Tspan13", "Tapt1", "Itgb7"),
+ Contrast = c("B-A", "B-A", "B-A", "B-A", "B-A", "B-A"),
+ FC = c(
+ -42.7465863415622,
+ 3.85002020608143,
+ -7.71439441748029,
+ -7.03849783123801,
+ -5.29181569343323,
+ 8.87382341151917
+ ),
+ logFC = c(
+ -5.41773730869316,
+ 1.94486601753143,
+ -2.94755290920186,
+ -2.81526755916543,
+ -2.40376281543362,
+ 3.14955584391085
+ ),
+ tstat = c(
+ -15.6879749543426,
+ 12.9102607749226,
+ -11.3808403447749,
+ -11.0312744854072,
+ -10.6584674633331,
+ 10.5614738819538
+ ),
+ pval = c(
+ 3.15934346857821e-09,
+ 2.76055502226637e-08,
+ 1.09340538530663e-07,
+ 1.53110956271563e-07,
+ 2.21459280934843e-07,
+ 2.44206995658642e-07
+ ),
+ adjpval = c(
+ 2.50946651709167e-05,
+ 0.000109635442709309,
+ 0.000289497299183018,
+ 0.000304040081416256,
+ 0.000323289361086099,
+ 0.000323289361086099
+ )
+ ),
+ row.names = c("B-A.1", "B-A.2", "B-A.3", "B-A.4", "B-A.5", "B-A.6"),
+ class = "data.frame"
+ )
+ )
+ expect_equal(
+ tail(df_volc_sum),
+ structure(
+ list(
+ Gene = c("Tecpr1", "Lap3", "Zfp952", "Tsr3", "Nbas", "Slc50a1"),
+ Contrast = c("B-C", "B-C", "B-C", "B-C", "B-C", "B-C"),
+ FC = c(
+ -17.6925615963148,
+ 2.57712293045075,
+ -10.2589472087027,
+ -3.22520189762021,
+ 4.43444692871868,
+ -2.36807519790042
+ ),
+ logFC = c(
+ -4.14507103690983,
+ 1.36576135633674,
+ -3.35881078151314,
+ -1.68938947606405,
+ 2.1487541805238,
+ -1.24371489427577
+ ),
+ tstat = c(
+ -2.19458425130448,
+ 2.1944817392618,
+ -2.19287280238278,
+ -2.19094226223025,
+ 2.19079653013039,
+ -2.18921321212647
+ ),
+ pval = c(
+ 0.0491166107800282,
+ 0.0491255772255026,
+ 0.0492665099590459,
+ 0.0494361189306691,
+ 0.0494489447669875,
+ 0.0495884954830029
+ ),
+ adjpval = c(
+ 0.265806852794392,
+ 0.265806852794392,
+ 0.266387943229885,
+ 0.26682946214958,
+ 0.26682946214958,
+ 0.26740082798472
+ )
+ ),
+ row.names = c(
+ "B-C.957",
+ "B-C.958",
+ "B-C.959",
+ "B-C.960",
+ "B-C.961",
+ "B-C.962"
+ ),
+ class = "data.frame"
+ )
+ )
+})
+
+test_that("plot_volcano_summary works with multiOmicDataSet", {
+ # Create a multiOmicDataSet with differential analysis results
+ moo <- multiOmicDataSet(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ anno_dat = data.frame(),
+ counts_lst = list(
+ "raw" = as.data.frame(nidap_raw_counts),
+ "filt" = as.data.frame(nidap_filtered_counts)
+ ),
+ analyses_lst = list(
+ diff = nidap_deg_analysis_2
+ )
+ )
+
+ # Test that it returns a data frame
+ result <- plot_volcano_summary(
+ moo,
+ save_plots = FALSE,
+ print_plots = FALSE
+ )
+
+ expect_s3_class(result, "data.frame")
+ expect_true(ncol(result) > 0)
+ expect_true(nrow(result) > 0)
+ expect_true("Gene" %in% colnames(result))
+ expect_true("Contrast" %in% colnames(result))
+})
diff --git a/code/MOSuite/tests/testthat/test-plots.R b/code/MOSuite/tests/testthat/test-plots.R
new file mode 100644
index 0000000..43ada4f
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-plots.R
@@ -0,0 +1,51 @@
+set.seed(20250225)
+test_that("save_or_print_plot works for ComplexHeatmap", {
+ p <- plot_corr_heatmap(
+ nidap_filtered_counts |>
+ as.data.frame(),
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ sample_id_colname = "Sample",
+ feature_id_colname = "Gene",
+ label_colname = "Label",
+ group_colname = "Group",
+ color_values = c(
+ "#5954d6",
+ "#e1562c",
+ "#b80058",
+ "#00c6f8",
+ "#d163e6",
+ "#00a76c",
+ "#ff9287",
+ "#008cf9",
+ "#006e00",
+ "#796880",
+ "#FFA500",
+ "#878500"
+ )
+ )
+ skip()
+ expect_snapshot_file(
+ print_or_save_plot(
+ p,
+ filename = "heatmap.png",
+ print_plots = FALSE,
+ save_plots = TRUE,
+ plots_dir = "."
+ ),
+ "heatmap.png"
+ )
+})
+test_that("save_or_print_plot works for ggplot", {
+ p <- plot_read_depth(nidap_clean_raw_counts)
+ skip()
+ expect_snapshot_file(
+ print_or_save_plot(
+ p,
+ filename = "read_depth.png",
+ print_plots = FALSE,
+ save_plots = TRUE,
+ plots_dir = "."
+ ),
+ "read_depth.png"
+ )
+})
diff --git a/code/MOSuite/tests/testthat/test-rename.R b/code/MOSuite/tests/testthat/test-rename.R
new file mode 100644
index 0000000..977e9f8
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-rename.R
@@ -0,0 +1,61 @@
+pca_df <- structure(
+ list(
+ PC1 = c(
+ -30.8218758482181,
+ -30.4702492624378,
+ -33.2153357604014,
+ -29.4300387598409,
+ -17.3928202952984,
+ -15.0796711447318,
+ 12.5428903958151,
+ 125.002561238028,
+ 18.8645394370853
+ ),
+ PC2 = c(
+ 21.782786691387,
+ 26.7296573707873,
+ 22.1341837691228,
+ 16.1535492176278,
+ 0.595950972197422,
+ 2.94923156300579,
+ -62.2311789028092,
+ 38.2727901160822,
+ -66.3869707974014
+ ),
+ group = c("A", "A", "A", "B", "B", "B", "C", "C", "C"),
+ sample = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3"),
+ xdata = c(
+ -30.8218758482181,
+ -30.4702492624378,
+ -33.2153357604014,
+ -29.4300387598409,
+ -17.3928202952984,
+ -15.0796711447318,
+ 12.5428903958151,
+ 125.002561238028,
+ 18.8645394370853
+ ),
+ ydata = c(
+ 21.782786691387,
+ 26.7296573707873,
+ 22.1341837691228,
+ 16.1535492176278,
+ 0.595950972197422,
+ 2.94923156300579,
+ -62.2311789028092,
+ 38.2727901160822,
+ -66.3869707974014
+ )
+ ),
+ row.names = c("A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3"),
+ class = "data.frame"
+)
+
+test_that("rename_samples does nothing with empty vector", {
+ expect_equal(rename_samples(pca_df, c("")), pca_df)
+ expect_equal(rename_samples(pca_df, ""), pca_df)
+ expect_equal(rename_samples(pca_df, NULL), pca_df)
+ expect_equal(rename_samples(pca_df, numeric(0)), pca_df)
+})
+
+# TODO need to know expected format of samples_to_rename_manually
diff --git a/code/MOSuite/tests/testthat/test-utils.R b/code/MOSuite/tests/testthat/test-utils.R
new file mode 100644
index 0000000..7540a91
--- /dev/null
+++ b/code/MOSuite/tests/testthat/test-utils.R
@@ -0,0 +1,313 @@
+test_that("abort_packages_not_installed works", {
+ expect_no_error(abort_packages_not_installed("base"))
+ expect_error(
+ abort_packages_not_installed("not-a-package-name"),
+ "The following package\\(s\\) are required but are not installed"
+ )
+})
+test_that("check_packages_installed works", {
+ expect_equal(
+ check_packages_installed("base"),
+ c(base = TRUE)
+ )
+ expect_equal(
+ check_packages_installed("not-a-package-name"),
+ c(`not-a-package-name` = FALSE)
+ )
+})
+
+# Tests for parse_optional_vector
+test_that("parse_optional_vector handles normal input", {
+ result <- parse_optional_vector("a, b, c")
+ expect_equal(result, c("a", "b", "c"))
+})
+
+test_that("parse_optional_vector trims whitespace", {
+ result <- parse_optional_vector(" a , b , c ")
+ expect_equal(result, c("a", "b", "c"))
+})
+
+test_that("parse_optional_vector returns NULL for empty string", {
+ result <- parse_optional_vector("")
+ expect_null(result)
+})
+
+test_that("parse_optional_vector returns NULL for NULL input", {
+ result <- parse_optional_vector(NULL)
+ expect_null(result)
+})
+
+test_that("parse_optional_vector returns NULL for zero-length vector", {
+ result <- parse_optional_vector(character(0))
+ expect_null(result)
+})
+
+test_that("parse_optional_vector handles single value", {
+ result <- parse_optional_vector("single")
+ expect_equal(result, "single")
+})
+
+test_that("parse_optional_vector handles numeric-like strings", {
+ result <- parse_optional_vector("1, 2, 3")
+ expect_equal(result, c("1", "2", "3"))
+})
+
+# Tests for parse_vector_with_default
+test_that("parse_vector_with_default parses normal input", {
+ result <- parse_vector_with_default("a, b, c", "default")
+ expect_equal(result, c("a", "b", "c"))
+})
+
+test_that("parse_vector_with_default returns default for empty string", {
+ result <- parse_vector_with_default("", "default")
+ expect_equal(result, "default")
+})
+
+test_that("parse_vector_with_default returns default for NULL", {
+ result <- parse_vector_with_default(NULL, "default")
+ expect_equal(result, "default")
+})
+
+test_that("parse_vector_with_default returns default for zero-length vector", {
+ result <- parse_vector_with_default(character(0), "default")
+ expect_equal(result, "default")
+})
+
+test_that("parse_vector_with_default handles vector defaults", {
+ default_vec <- c("x", "y", "z")
+ result <- parse_vector_with_default("", default_vec)
+ expect_equal(result, default_vec)
+})
+
+test_that("parse_vector_with_default handles numeric defaults", {
+ result <- parse_vector_with_default("", 42)
+ expect_equal(result, 42)
+})
+
+# Tests for parse_samples_to_rename
+test_that("parse_samples_to_rename parses single pair", {
+ result <- parse_samples_to_rename("old:new")
+ expect_equal(result, list(old = "new"))
+})
+
+test_that("parse_samples_to_rename parses multiple pairs", {
+ result <- parse_samples_to_rename("sample1:S1,sample2:S2,sample3:S3")
+ expect_equal(result, list(sample1 = "S1", sample2 = "S2", sample3 = "S3"))
+})
+
+test_that("parse_samples_to_rename handles many sample pairs", {
+ result <- parse_samples_to_rename(
+ "ctrl_1:Control_Rep1,ctrl_2:Control_Rep2,treat_1:Treatment_Rep1,treat_2:Treatment_Rep2,treat_3:Treatment_Rep3"
+ )
+ expected <- list(
+ ctrl_1 = "Control_Rep1",
+ ctrl_2 = "Control_Rep2",
+ treat_1 = "Treatment_Rep1",
+ treat_2 = "Treatment_Rep2",
+ treat_3 = "Treatment_Rep3"
+ )
+ expect_equal(result, expected)
+})
+
+test_that("parse_samples_to_rename trims whitespace", {
+ result <- parse_samples_to_rename(" old : new , old2 : new2 ")
+ expect_equal(result, list(old = "new", old2 = "new2"))
+})
+
+test_that("parse_samples_to_rename returns NULL for empty string", {
+ result <- parse_samples_to_rename("")
+ expect_null(result)
+})
+
+test_that("parse_samples_to_rename returns NULL for NULL input", {
+ result <- parse_samples_to_rename(NULL)
+ expect_null(result)
+})
+
+test_that("parse_samples_to_rename returns NULL for zero-length vector", {
+ result <- parse_samples_to_rename(character(0))
+ expect_null(result)
+})
+
+test_that("parse_samples_to_rename ignores malformed pairs", {
+ result <- parse_samples_to_rename("valid:pair,invalid_no_colon")
+ expect_equal(result, list(valid = "pair"))
+})
+
+test_that("parse_samples_to_rename returns NULL if all pairs malformed", {
+ result <- parse_samples_to_rename("malformed,also_malformed")
+ expect_null(result)
+})
+
+test_that("parse_samples_to_rename handles colons in values", {
+ result <- parse_samples_to_rename("old:http://new")
+ # Only uses pairs with exactly 2 parts (one colon), so this is ignored
+ expect_null(result)
+})
+
+test_that("parse_samples_to_rename silently ignores pairs with colons in names", {
+ # When a column name contains colon, pair is silently skipped
+ result <- parse_samples_to_rename("old:with:colons:new,sample1:S1")
+ # Only sample1:S1 is valid, the other is ignored
+ expect_equal(result, list(sample1 = "S1"))
+})
+
+# Tests for setup_capsule_environment
+test_that("setup_capsule_environment creates correct directory paths", {
+ tmpdir <- tempdir()
+
+ result <- setup_capsule_environment(base_results_dir = tmpdir)
+
+ expect_equal(result$results_dir, tmpdir)
+ expect_equal(result$plots_dir, file.path(tmpdir, "figures"))
+})
+
+test_that("setup_capsule_environment sets options", {
+ tmpdir <- tempdir()
+
+ setup_capsule_environment(base_results_dir = tmpdir)
+
+ expect_equal(getOption("moo_plots_dir"), file.path(tmpdir, "figures"))
+ expect_equal(getOption("moo_save_plots"), TRUE)
+})
+
+test_that("setup_capsule_environment creates r-packages.csv", {
+ tmpdir <- tempfile()
+ dir.create(tmpdir)
+ on.exit(unlink(tmpdir, recursive = TRUE))
+
+ setup_capsule_environment(base_results_dir = tmpdir)
+
+ csv_path <- file.path(tmpdir, "r-packages.csv")
+ expect_true(file.exists(csv_path))
+
+ # Check it's a valid CSV
+ csv_content <- readr::read_csv(csv_path, show_col_types = FALSE)
+ expect_gt(nrow(csv_content), 0)
+})
+
+test_that("setup_capsule_environment returns invisibly", {
+ tmpdir <- tempdir()
+
+ result <- setup_capsule_environment(base_results_dir = tmpdir)
+
+ expect_type(result, "list")
+ expect_named(result, c("results_dir", "plots_dir"))
+})
+
+# Tests for load_moo_from_data_dir
+test_that("load_moo_from_data_dir stops when no .rds files found", {
+ tmpdir <- tempfile()
+ dir.create(tmpdir)
+ on.exit(unlink(tmpdir, recursive = TRUE))
+
+ expect_error(
+ load_moo_from_data_dir(data_dir = tmpdir),
+ "No files matching regex"
+ )
+})
+
+test_that("load_moo_from_data_dir loads valid MOO object", {
+ tmpdir <- tempfile()
+ dir.create(tmpdir)
+ on.exit(unlink(tmpdir, recursive = TRUE))
+
+ # Create a mock multiOmicDataSet object
+ moo <- structure(
+ list(data = "test"),
+ class = c("multiOmicDataSet", "MOSuite::multiOmicDataSet")
+ )
+
+ rds_file <- file.path(tmpdir, "test.rds")
+ readr::write_rds(moo, rds_file)
+
+ result <- load_moo_from_data_dir(data_dir = tmpdir)
+
+ expect_s3_class(result, "multiOmicDataSet")
+ expect_equal(result$data, "test")
+})
+
+test_that("load_moo_from_data_dir stops for invalid class", {
+ tmpdir <- tempfile()
+ dir.create(tmpdir)
+ on.exit(unlink(tmpdir, recursive = TRUE))
+
+ # Create an invalid object (not a multiOmicDataSet)
+ invalid_obj <- list(data = "test")
+
+ rds_file <- file.path(tmpdir, "test.rds")
+ readr::write_rds(invalid_obj, rds_file)
+
+ expect_error(
+ load_moo_from_data_dir(data_dir = tmpdir),
+ "The input is not a multiOmicDataSet"
+ )
+})
+
+test_that("load_moo_from_data_dir finds file recursively", {
+ tmpdir <- tempfile()
+ dir.create(tmpdir)
+ subdir <- file.path(tmpdir, "subdir")
+ dir.create(subdir)
+ on.exit(unlink(tmpdir, recursive = TRUE))
+
+ # Create a mock multiOmicDataSet in subdirectory
+ moo <- structure(
+ list(data = "test"),
+ class = c("multiOmicDataSet", "MOSuite::multiOmicDataSet")
+ )
+
+ rds_file <- file.path(subdir, "test.rds")
+ readr::write_rds(moo, rds_file)
+
+ result <- load_moo_from_data_dir(data_dir = tmpdir)
+
+ expect_s3_class(result, "multiOmicDataSet")
+})
+
+test_that("load_moo_from_data_dir uses first matching file", {
+ tmpdir <- tempfile()
+ dir.create(tmpdir)
+ on.exit(unlink(tmpdir, recursive = TRUE))
+
+ # Create two mock multiOmicDataSet objects
+ moo1 <- structure(
+ list(data = "first"),
+ class = c("multiOmicDataSet", "MOSuite::multiOmicDataSet")
+ )
+ moo2 <- structure(
+ list(data = "second"),
+ class = c("multiOmicDataSet", "MOSuite::multiOmicDataSet")
+ )
+
+ rds_file1 <- file.path(tmpdir, "a_test.rds")
+ rds_file2 <- file.path(tmpdir, "z_test.rds")
+ readr::write_rds(moo1, rds_file1)
+ readr::write_rds(moo2, rds_file2)
+
+ result <- load_moo_from_data_dir(data_dir = tmpdir)
+
+ # Should load one of them (order may vary, so just check it's valid)
+ expect_s3_class(result, "multiOmicDataSet")
+ expect_true(result$data %in% c("first", "second"))
+})
+
+test_that("load_moo_from_data_dir prints message", {
+ tmpdir <- tempfile()
+ dir.create(tmpdir)
+ on.exit(unlink(tmpdir, recursive = TRUE))
+
+ # Create a mock multiOmicDataSet object
+ moo <- structure(
+ list(data = "test"),
+ class = c("multiOmicDataSet", "MOSuite::multiOmicDataSet")
+ )
+
+ rds_file <- file.path(tmpdir, "test.rds")
+ readr::write_rds(moo, rds_file)
+
+ expect_message(
+ load_moo_from_data_dir(data_dir = tmpdir),
+ "Reading multiOmicDataSet from"
+ )
+})
diff --git a/code/MOSuite/vignettes/.gitignore b/code/MOSuite/vignettes/.gitignore
new file mode 100644
index 0000000..02c2733
--- /dev/null
+++ b/code/MOSuite/vignettes/.gitignore
@@ -0,0 +1,5 @@
+*.html
+*.R
+*.json
+moo.rds
+/figures
diff --git a/code/MOSuite/vignettes/cli.Rmd b/code/MOSuite/vignettes/cli.Rmd
new file mode 100644
index 0000000..5de79f6
--- /dev/null
+++ b/code/MOSuite/vignettes/cli.Rmd
@@ -0,0 +1,218 @@
+---
+title: "Calling MOSuite from the CLI"
+output: rmarkdown::html_vignette
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ comment = "#>",
+ warning = FALSE
+)
+```
+
+
+> ⚠️ **Most users do not need to use the CLI.**
+> We recommend using MOSuite within R scripts, R Markdown, or Quarto documents
+> for the vast majority of use-cases, as shown in the
+> [**introductory vignette**](https://ccbr.github.io/MOSuite/articles/intro.html).
+> The CLI is provided for a very specialized situation where MOSuite is run in
+> an environment that cannot use R scripts natively.
+
+MOSuite includes an executable file called `mosuite`.
+Any user-facing function in the MOSuite R package can be called with
+`mosuite [function]` from the unix CLI.
+Function arguments are passed in via a JSON file.
+In addition to arguments used by the function,
+the JSON file can contain the following keys:
+
+ - `moo_input_rds` - file path to an existing MultiOmicsDataset object in RDS format. This is required if the MOSuite function has `moo` as an argument (most user-facing functions do).
+ - `moo_output_rds` - file path to write the result to.
+
+## Usage
+
+Run `mosuite --help` in a unix shell to see the full CLI usage:
+
+```{r help, echo=FALSE, results='asis'}
+cat("```sh")
+MOSuite:::cli_usage(con = stdout())
+cat("```")
+```
+
+## Installing the MOSuite CLI
+
+### Docker Container
+
+We provide a docker container with the MOSuite R package and CLI installed as of
+v0.2.0 and later.
+
+
+Running this container with docker or singularity is the recommend way to run
+MOSuite in pipelines and HPC environments.
+
+```sh
+singularity exec docker://nciccbr/mosuite:v0.2.0 bash mosuite --help
+singularity exec docker://nciccbr/mosuite:v0.2.0 R -s -e \
+ 'cat("MOSuite version:", installed.packages()["MOSuite",][["Version"]])'
+```
+
+### Installation on a personal computer
+
+After installing the R package, you can use `system.file()` to locate the
+`mosuite` executable file with R:
+
+```{r install}
+# remotes::install_github("CCBR/MOSuite", dependencies = TRUE)
+system.file("exec", "mosuite", package = "MOSuite")
+```
+
+You should add this executable to your `PATH` environment variable.
+
+```sh
+export PATH="$PATH:/path/to/exec/mosuite"
+```
+
+If you're using the [MOSuite docker container](#docker-container),
+it is already included in the path.
+
+## Example end-to-end script
+
+You can create a shell script to run the full MOSuite pipeline.
+This script assumes you have a directory `json_args/` with JSON files to set
+each function's arguments.
+
+```{r script_e2e, echo=FALSE, results='asis'}
+cat("```bash\n")
+cat(
+ readr::read_lines(system.file(
+ "extdata",
+ "example_script.sh",
+ package = "MOSuite"
+ )),
+ sep = "\n"
+)
+cat("\n```")
+```
+
+The example script and accompanying JSON files are included in the package data.
+You can copy them to your working directory with R:
+
+```{r example_data_paths, eval=FALSE}
+# copy the example script
+file.copy(
+ system.file("extdata", "example_script.sh", package = "MOSuite"),
+ to = "./"
+)
+# copy the JSON files
+file.copy(
+ system.file("extdata", "json_args", package = "MOSuite"),
+ to = "./",
+ recursive = TRUE
+)
+# copy the raw counts & sample metadata
+file.copy(
+ system.file("extdata", "nidap", "Raw_Counts.csv.gz", package = "MOSuite"),
+ to = "./"
+)
+file.copy(
+ system.file(
+ "extdata",
+ "nidap",
+ "Sample_Metadata_Bulk_RNA-seq_Training_Dataset_CCBR.csv.gz",
+ package = "MOSuite"
+ ),
+ to = "./"
+)
+```
+
+Then run the script from the CLI:
+
+```bash
+bash ./example_script.sh
+```
+
+The final multiOmicDataSet will be in `moo.rds` and figures from each step will
+be in `./figures/`.
+
+## Writing JSON files
+
+Create a JSON file with arguments for `create_multiOmicDataSet_from_files()`.
+You can use R code as below or write it by hand.
+
+```{r create_json}
+j <- list(
+ feature_counts_filepath = system.file(
+ "extdata",
+ "RSEM.genes.expected_count.all_samples.txt.gz",
+ package = "MOSuite"
+ ),
+ sample_meta_filepath = system.file(
+ "extdata",
+ "sample_metadata.tsv.gz",
+ package = "MOSuite"
+ ),
+ moo_output_rds = "moo.rds"
+)
+jsonlite::write_json(j, "args_1.json")
+```
+
+In a unix shell, call `create_multiOmicDataSet_from_files()` and specify the path to the JSON file:
+
+```{bash create_moo, eval=FALSE}
+mosuite create_multiOmicDataSet_from_files --json=args_1.json
+```
+
+This is equivalent to running the following R code:
+
+```{r create_moo_R}
+library(MOSuite)
+moo <- create_multiOmicDataSet_from_files(
+ feature_counts_filepath = system.file(
+ "extdata",
+ "RSEM.genes.expected_count.all_samples.txt.gz",
+ package = "MOSuite"
+ ),
+ sample_meta_filepath = system.file(
+ "extdata",
+ "sample_metadata.tsv.gz",
+ package = "MOSuite"
+ )
+)
+readr::write_rds(moo, "moo.rds")
+```
+
+You can use the `moo` object you just created as input to other MOSuite functions.
+
+Create a JSON file of arguments for `clean_raw_counts()` with R (or write it by hand):
+
+```{r create_json_filter}
+j <- list(
+ moo_input_rds = "moo.rds",
+ moo_output_rds = "moo.rds",
+ save_plots = TRUE
+)
+jsonlite::write_json(j, "args_2.json")
+```
+
+Then run `clean_raw_counts()`:
+
+```{bash clean_raw_counts, eval=FALSE}
+mosuite clean_raw_counts --json=args_2.json
+```
+
+Results are saved to `moo.rds`.
+Overwriting the same `moo` file is recommended to save disk space, as the
+multiOmicDataset object saves intermediate results within its data structure.
+
+## Template JSON files
+
+JSON file templates with default arguments for the main functions are bundled with the package.
+You can copy them to your current directory like so:
+
+```{r json_template}
+file.copy(
+ system.file("extdata", "json_args", "defaults", package = "MOSuite"),
+ to = "./",
+ recursive = TRUE
+)
+```
diff --git a/code/MOSuite/vignettes/intro.Rmd b/code/MOSuite/vignettes/intro.Rmd
new file mode 100644
index 0000000..88783c7
--- /dev/null
+++ b/code/MOSuite/vignettes/intro.Rmd
@@ -0,0 +1,60 @@
+---
+title: "Introduction to MultiOmicsSuite"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{intro}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ comment = "#>"
+)
+```
+
+```{r setup}
+library(MOSuite)
+library(dplyr)
+```
+
+```{r nidap_data}
+options(moo_print_plots = TRUE)
+
+moo_nidap <- create_multiOmicDataSet_from_dataframes(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ counts_dat = as.data.frame(nidap_raw_counts)
+) |>
+ clean_raw_counts() |>
+ filter_counts(group_colname = "Group") |>
+ normalize_counts(group_colname = "Group") |>
+ batch_correct_counts(
+ covariates_colname = "Group",
+ batch_colname = "Batch",
+ label_colname = "Label"
+ ) |>
+ diff_counts(
+ count_type = "filt",
+ covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"),
+ contrasts = c("B-A", "C-A", "B-C"),
+ input_in_log_counts = FALSE,
+ return_mean_and_sd = FALSE,
+ voom_normalization_method = "quantile",
+ ) |>
+ filter_diff()
+
+moo_nidap@analyses$diff |>
+ join_dfs_wide() |>
+ head()
+
+moo_nidap@analyses$diff_filt |> head()
+```
+
+## The multiOmicDataSet object structure
+
+```{r str_moo}
+str(moo_nidap)
+```
diff --git a/code/MOSuite/vignettes/memory.Rmd b/code/MOSuite/vignettes/memory.Rmd
new file mode 100644
index 0000000..6d69fd0
--- /dev/null
+++ b/code/MOSuite/vignettes/memory.Rmd
@@ -0,0 +1,143 @@
+---
+title: "Memory Usage"
+output: rmarkdown::html_vignette
+self_contained: true
+vignette: >
+ %\VignetteIndexEntry{memory}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ comment = "#>",
+ echo = FALSE,
+ # eval = FALSE,
+ message = FALSE,
+ warning = FALSE
+)
+options(rmarkdown.html_vignette.check_title = FALSE)
+```
+
+```{r setup}
+library(dplyr)
+library(ggplot2)
+library(glue)
+library(readr)
+library(MOSuite)
+```
+
+Dataset from:
+
+```{r data}
+counts <- read_tsv(system.file(
+ "extdata", "LIHC_HTseqCounts.txt.gz",
+ package = "MOSuite"
+)) |>
+ rename(gene_id = Gene)
+metadat <- read_tsv(system.file(
+ "extdata", "LIHC_PatientData.txt.gz",
+ package = "MOSuite"
+)) |>
+ select(-sample_id) |>
+ rename(sample_id = barcode)
+
+run_mosuite <- function(metadat, counts) {
+ return(
+ create_multiOmicDataSet_from_dataframes(metadat, counts) |>
+ clean_raw_counts() |>
+ calc_cpm() |>
+ filter_counts(
+ group_colname = "treatments_radiation_treatment_type",
+ label_colname = "sample"
+ )
+ )
+}
+moo <- run_mosuite(metadat, counts)
+```
+
+
+```{r subset}
+subset_mem <- lapply(c(10, 50, 100, 200, nrow(moo@sample_meta)), function(nsamples) {
+ samples_subset <- moo@sample_meta |> slice_sample(n = nsamples)
+ counts_subset <- moo@counts$raw |>
+ dplyr::select(gene_id, tidyselect::all_of(samples_subset |> dplyr::pull(sample_id)))
+
+ moo_subset <- run_mosuite(samples_subset, counts_subset)
+ return(tibble(
+ n_samples = nsamples,
+ object_size = lobstr::obj_size(moo_subset)
+ ))
+}) |>
+ bind_rows()
+```
+
+```{r single_cell}
+to_bytes <- function(x, unit, base = 1024) {
+ bytes_units <- list(
+ K = 1,
+ M = 2,
+ G = 3,
+ T = 4
+ )
+ return(x * (base^bytes_units[[unit]]))
+}
+sc_dat <- tibble::tribble(
+ ~"project", ~"n_cells", ~"n_samples", ~"n_genes", ~"object_size_GB",
+ "CCBR1329/CCBR1243", 64642, 10, 26359, 9.35,
+ "CCBR1297", 9991, 2, 20989, 1.78,
+ "CCBR1035", 170789, 19, 32100, 31.54,
+ "CCBR1203", 208169, 35, 30858, 37.38
+) |>
+ mutate(
+ object_size = to_bytes(object_size_GB, "G", base = 1000),
+ dataset_type = "single-cell (Seurat)"
+ )
+```
+
+
+```{r plot_memory}
+# what is the resource spec of default NIDAP allocation?
+palette_name <- "Set2"
+subset_mem |>
+ ggplot(aes(n_samples, object_size, color = "")) +
+ geom_point() +
+ geom_line(linewidth = 0.3) +
+ scale_y_continuous(labels = scales::label_bytes(units = "GB", accuracy = 0.1)) +
+ scale_color_manual(values = RColorBrewer::brewer.pal(3, palette_name)[[1]]) +
+ labs(
+ title = "Memory usage of S7 object",
+ x = "Number of samples",
+ y = "Memory usage",
+ caption = glue(
+ "The object contains sample metadata and count data as raw, CPM-transformed, and filtered counts.\n",
+ "Each dataset has the same number of genes: ",
+ format(length(moo@counts$raw$gene_id), big.mark = ",")
+ )
+ ) +
+ theme_bw() +
+ theme(legend.position = "none")
+```
+
+```{r plot_memory_comp}
+dat_comp <- bind_rows(
+ subset_mem |> mutate(dataset_type = "bulk (S7)", object_size = as.double(object_size)),
+ sc_dat
+)
+dat_comp |>
+ ggplot(aes(n_samples, object_size, colour = dataset_type)) +
+ geom_point() +
+ geom_line(linewidth = 0.3) +
+ scale_y_continuous(labels = scales::label_bytes(units = "GB", accuracy = 0.1)) +
+ scale_color_brewer(palette = palette_name) +
+ guides(colour = guide_legend(
+ title = "",
+ position = "top",
+ reverse = TRUE,
+ theme = theme(legend.text.position = "top")
+ )) +
+ labs(title = "Memory usage of bulk & single-cell RNA-seq objects", x = "Number of samples", y = "Memory usage") +
+ theme_bw()
+```
diff --git a/code/MOSuite/vignettes/renee.Rmd b/code/MOSuite/vignettes/renee.Rmd
new file mode 100644
index 0000000..f7804f3
--- /dev/null
+++ b/code/MOSuite/vignettes/renee.Rmd
@@ -0,0 +1,79 @@
+---
+title: "RSEM counts from RENEE"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{renee}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ comment = "#>"
+)
+```
+
+```{r setup}
+library(MOSuite)
+library(dplyr)
+```
+
+## RENEE dataset
+
+```{r data}
+# replace these lines with the actual paths to your files
+gene_counts_tsv <- system.file("extdata",
+ "RSEM.genes.expected_count.all_samples.txt.gz",
+ package = "MOSuite"
+)
+metadata_tsv <- system.file("extdata", "sample_metadata.tsv.gz",
+ package = "MOSuite"
+)
+
+# create multi-omic object
+moo <- create_multiOmicDataSet_from_files(
+ sample_meta_filepath = metadata_tsv,
+ feature_counts_filepath = gene_counts_tsv
+)
+
+head(moo@counts$raw)
+head(moo@sample_meta)
+head(moo@annotation)
+```
+
+```{r analysis}
+moo <- moo |>
+ clean_raw_counts() |>
+ filter_counts(
+ group_colname = "condition",
+ label_colname = "sample_id",
+ minimum_count_value_to_be_considered_nonzero = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_total = 1,
+ minimum_number_of_samples_with_nonzero_counts_in_a_group = 1,
+ ) |>
+ normalize_counts(
+ group_colname = "condition",
+ label_colname = "sample_id"
+ ) |>
+ diff_counts(
+ covariates_colnames = "condition",
+ contrast_colname = "condition",
+ contrasts = c("knockout-wildtype")
+ ) |>
+ filter_diff(
+ significance_cutoff = 0.05,
+ significance_column = "adjpval",
+ change_column = "logFC",
+ change_cutoff = 1
+ )
+
+moo@counts$norm$voom |> head()
+```
+
+## The multiOmicDataSet object structure
+
+```{r str_moo}
+str(moo)
+```
diff --git a/code/MOSuite/vignettes/visualization.Rmd b/code/MOSuite/vignettes/visualization.Rmd
new file mode 100644
index 0000000..c46d6a0
--- /dev/null
+++ b/code/MOSuite/vignettes/visualization.Rmd
@@ -0,0 +1,152 @@
+---
+title: "Visualization with built-in plots"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{visualization}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r setup, include = FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+knitr::opts_chunk$set(
+ collapse = TRUE,
+ comment = "#>",
+ fig.width = 5,
+ fig.height = 4
+)
+```
+
+```{r load}
+library(MOSuite)
+```
+
+## Default plots from each step
+
+Default plots can be printed to the screen and/or saved to the disk.
+
+```{r options}
+# set options to print & save the plots
+options(moo_print_plots = TRUE)
+options(moo_save_plots = TRUE)
+# when moo_save_plots is TRUE, plots are saved to this directory:
+options(moo_plots_dir = "./figures")
+```
+
+See `?MOSuite::options` for more information.
+
+### clean
+
+```{r nidap_data_clean}
+moo <- create_multiOmicDataSet_from_dataframes(
+ sample_metadata = as.data.frame(nidap_sample_metadata),
+ counts_dat = as.data.frame(nidap_raw_counts)
+) |>
+ clean_raw_counts()
+```
+
+### filter
+
+```{r nidap_filter}
+moo <- moo |>
+ filter_counts(group_colname = "Group")
+```
+
+### normalize
+
+```{r nidap_norm}
+moo <- moo |>
+ normalize_counts(group_colname = "Group")
+```
+
+### batch correct
+
+```{r nidap_batch}
+moo <- moo |>
+ batch_correct_counts(
+ covariates_colname = "Group",
+ batch_colname = "Batch",
+ label_colname = "Label"
+ )
+```
+
+### differential expression
+
+```{r diff_counts}
+moo <- moo |>
+ diff_counts(
+ count_type = "filt",
+ covariates_colnames = c("Group", "Batch"),
+ contrast_colname = c("Group"),
+ contrasts = c("B-A", "C-A", "B-C"),
+ input_in_log_counts = FALSE,
+ return_mean_and_sd = FALSE,
+ voom_normalization_method = "quantile",
+ )
+```
+
+### filter differential features
+
+```{r filter_diff}
+moo <- moo |> filter_diff()
+```
+
+## Customize plots
+
+TODO
+
+- show how to use individual plotting functions
+- how to customize & override default color palettes
+- how to customize ggplot objects
+
+### 3D PCA
+
+```{r pca_3D}
+plot_pca(
+ moo@counts$batch,
+ moo@sample_meta,
+ principal_components = c(1, 2, 3),
+ group_colname = "Group",
+ label_colname = "Label",
+ color_values = moo@analyses[["colors"]][["Group"]]
+)
+```
+
+### Expression Heatmap
+
+```{r expr_heatmap}
+heatmap_plot <- plot_expr_heatmap(
+ moo,
+ count_type = "norm",
+ sub_count_type = "voom"
+)
+print(heatmap_plot)
+```
+
+### Volcano
+
+#### Summary
+
+```{r volcano_summary}
+dat_volcano_summary <- moo@analyses$diff |>
+ join_dfs_wide() |>
+ plot_volcano_summary()
+
+head(dat_volcano_summary)
+```
+
+#### Enhanced
+
+```{r volcano_enhanced}
+dat_volcano_enhanced <- moo@analyses$diff |>
+ join_dfs_wide() |>
+ plot_volcano_enhanced()
+```
+
+
+### Venn Diagram
+
+```{r venn_diagram}
+venn_dat <- dat_volcano_summary |> plot_venn_diagram()
+head(venn_dat)
+```
diff --git a/code/main.R b/code/main.R
index 9b69cdd..a3eee69 100644
--- a/code/main.R
+++ b/code/main.R
@@ -2,10 +2,10 @@
rlang::global_entrace()
library(argparse)
library(glue)
-library(MOSuite)
library(readr)
library(stringr)
library(dplyr)
+devtools::load_all("/code/MOSuite")
# set up capsule environment
setup_capsule_environment()
@@ -13,48 +13,253 @@ setup_capsule_environment()
# parse CLI arguments
parser <- ArgumentParser()
-parser$add_argument("--count_type", type="character", default="filt")
-parser$add_argument("--sub_count_type", type="character", default=NULL, help="Sub count type if count_type is a list")
-parser$add_argument("--sample_id_colname", type="character", default=NULL, help="Column name for sample IDs")
-parser$add_argument("--feature_id_colname", type="character", default=NULL, help="Column name for feature IDs")
-parser$add_argument("--group_colname", type="character", default="Group", help="Column name for sample groups")
-parser$add_argument("--label_colname", type="character", default=NULL, help="Column name for sample labels")
-parser$add_argument("--samples_to_include", type="character", default="", help="Comma-separated list of samples to include")
-parser$add_argument("--color_values", type="character", default="#5954d6,#e1562c,#b80058,#00c6f8,#d163e6,#00a76c,#ff9287,#008cf9,#006e00,#796880,#FFA500,#878500", help="Comma-separated color values")
-parser$add_argument("--include_all_genes", type="logical", default=FALSE, help="Include all genes")
-parser$add_argument("--filter_top_genes_by_variance", type="logical", default=TRUE, help="Filter top genes by variance")
-parser$add_argument("--top_genes_by_variance_to_include", type="integer", default=500, help="Number of top genes by variance")
-parser$add_argument("--specific_genes_to_include_in_heatmap", type="character", default="None", help="Comma-separated list of specific genes")
-parser$add_argument("--cluster_genes", type="logical", default=TRUE, help="Cluster genes")
-parser$add_argument("--gene_distance_metric", type="character", default="correlation", help="Gene distance metric")
-parser$add_argument("--gene_clustering_method", type="character", default="average", help="Gene clustering method")
-parser$add_argument("--display_gene_dendrograms", type="logical", default=TRUE, help="Display gene dendrograms")
-parser$add_argument("--display_gene_names", type="logical", default=FALSE, help="Display gene names")
-parser$add_argument("--center_and_rescale_expression", type="logical", default=TRUE, help="Center and rescale expression")
-parser$add_argument("--cluster_samples", type="logical", default=FALSE, help="Cluster samples")
-parser$add_argument("--arrange_sample_columns", type="logical", default=TRUE, help="Arrange sample columns")
-parser$add_argument("--order_by_gene_expression", type="logical", default=FALSE, help="Order by gene expression")
-parser$add_argument("--gene_to_order_columns", type="character", default=" ", help="Gene to order columns")
-parser$add_argument("--gene_expression_order", type="character", default="low_to_high", help="Gene expression order")
-parser$add_argument("--smpl_distance_metric", type="character", default="correlation", help="Sample distance metric")
-parser$add_argument("--smpl_clustering_method", type="character", default="average", help="Sample clustering method")
-parser$add_argument("--display_smpl_dendrograms", type="logical", default=TRUE, help="Display sample dendrograms")
-parser$add_argument("--reorder_dendrogram", type="logical", default=FALSE, help="Reorder dendrogram")
-parser$add_argument("--reorder_dendrogram_order", type="character", default="", help="Reorder dendrogram order")
-parser$add_argument("--display_sample_names", type="logical", default=TRUE, help="Display sample names")
-parser$add_argument("--group_columns", type="character", default="Group,Replicate,Batch", help="Columns for groups")
-parser$add_argument("--assign_group_colors", type="logical", default=FALSE, help="Assign group colors")
-parser$add_argument("--assign_color_to_sample_groups", type="character", default="", help="Assign color to sample groups")
-parser$add_argument("--group_colors", type="character", default="#5954d6,#e1562c,#b80058,#00c6f8,#d163e6,#00a76c,#ff9287,#008cf9,#006e00,#796880,#FFA500,#878500", help="Group colors")
-parser$add_argument("--heatmap_color_scheme", type="character", default="Default", help="Heatmap color scheme")
-parser$add_argument("--autoscale_heatmap_color", type="logical", default=TRUE, help="Autoscale heatmap color")
-parser$add_argument("--set_min_heatmap_color", type="double", default=-2, help="Minimum heatmap color value")
-parser$add_argument("--set_max_heatmap_color", type="double", default=2, help="Maximum heatmap color value")
-parser$add_argument("--aspect_ratio", type="character", default="Auto", help="Aspect ratio")
-parser$add_argument("--legend_font_size", type="integer", default=10, help="Legend font size")
-parser$add_argument("--gene_name_font_size", type="integer", default=4, help="Gene name font size")
-parser$add_argument("--sample_name_font_size", type="integer", default=8, help="Sample name font size")
-parser$add_argument("--display_numbers", type="logical", default=FALSE, help="Display numbers in heatmap")
+parser$add_argument("--count_type", type = "character", default = "filt")
+parser$add_argument(
+ "--sub_count_type",
+ type = "character",
+ default = NULL,
+ help = "Sub count type if count_type is a list"
+)
+parser$add_argument(
+ "--sample_id_colname",
+ type = "character",
+ default = NULL,
+ help = "Column name for sample IDs"
+)
+parser$add_argument(
+ "--feature_id_colname",
+ type = "character",
+ default = NULL,
+ help = "Column name for feature IDs"
+)
+parser$add_argument(
+ "--group_colname",
+ type = "character",
+ default = "Group",
+ help = "Column name for sample groups"
+)
+parser$add_argument(
+ "--label_colname",
+ type = "character",
+ default = NULL,
+ help = "Column name for sample labels"
+)
+parser$add_argument(
+ "--samples_to_include",
+ type = "character",
+ default = "",
+ help = "Comma-separated list of samples to include"
+)
+parser$add_argument(
+ "--color_values",
+ type = "character",
+ default = "#5954d6,#e1562c,#b80058,#00c6f8,#d163e6,#00a76c,#ff9287,#008cf9,#006e00,#796880,#FFA500,#878500",
+ help = "Comma-separated color values"
+)
+parser$add_argument(
+ "--include_all_genes",
+ type = "logical",
+ default = FALSE,
+ help = "Include all genes"
+)
+parser$add_argument(
+ "--filter_top_genes_by_variance",
+ type = "logical",
+ default = TRUE,
+ help = "Filter top genes by variance"
+)
+parser$add_argument(
+ "--top_genes_by_variance_to_include",
+ type = "integer",
+ default = 500,
+ help = "Number of top genes by variance"
+)
+parser$add_argument(
+ "--specific_genes_to_include_in_heatmap",
+ type = "character",
+ default = "None",
+ help = "Comma-separated list of specific genes"
+)
+parser$add_argument(
+ "--cluster_genes",
+ type = "logical",
+ default = TRUE,
+ help = "Cluster genes"
+)
+parser$add_argument(
+ "--gene_distance_metric",
+ type = "character",
+ default = "correlation",
+ help = "Gene distance metric"
+)
+parser$add_argument(
+ "--gene_clustering_method",
+ type = "character",
+ default = "average",
+ help = "Gene clustering method"
+)
+parser$add_argument(
+ "--display_gene_dendrograms",
+ type = "logical",
+ default = TRUE,
+ help = "Display gene dendrograms"
+)
+parser$add_argument(
+ "--display_gene_names",
+ type = "logical",
+ default = FALSE,
+ help = "Display gene names"
+)
+parser$add_argument(
+ "--center_and_rescale_expression",
+ type = "logical",
+ default = TRUE,
+ help = "Center and rescale expression"
+)
+parser$add_argument(
+ "--cluster_samples",
+ type = "logical",
+ default = FALSE,
+ help = "Cluster samples"
+)
+parser$add_argument(
+ "--arrange_sample_columns",
+ type = "logical",
+ default = TRUE,
+ help = "Arrange sample columns"
+)
+parser$add_argument(
+ "--order_by_gene_expression",
+ type = "logical",
+ default = FALSE,
+ help = "Order by gene expression"
+)
+parser$add_argument(
+ "--gene_to_order_columns",
+ type = "character",
+ default = " ",
+ help = "Gene to order columns"
+)
+parser$add_argument(
+ "--gene_expression_order",
+ type = "character",
+ default = "low_to_high",
+ help = "Gene expression order"
+)
+parser$add_argument(
+ "--smpl_distance_metric",
+ type = "character",
+ default = "correlation",
+ help = "Sample distance metric"
+)
+parser$add_argument(
+ "--smpl_clustering_method",
+ type = "character",
+ default = "average",
+ help = "Sample clustering method"
+)
+parser$add_argument(
+ "--display_smpl_dendrograms",
+ type = "logical",
+ default = TRUE,
+ help = "Display sample dendrograms"
+)
+parser$add_argument(
+ "--reorder_dendrogram",
+ type = "logical",
+ default = FALSE,
+ help = "Reorder dendrogram"
+)
+parser$add_argument(
+ "--reorder_dendrogram_order",
+ type = "character",
+ default = "",
+ help = "Reorder dendrogram order"
+)
+parser$add_argument(
+ "--display_sample_names",
+ type = "logical",
+ default = TRUE,
+ help = "Display sample names"
+)
+parser$add_argument(
+ "--group_columns",
+ type = "character",
+ default = "Group,Replicate,Batch",
+ help = "Columns for groups"
+)
+parser$add_argument(
+ "--assign_group_colors",
+ type = "logical",
+ default = FALSE,
+ help = "Assign group colors"
+)
+parser$add_argument(
+ "--assign_color_to_sample_groups",
+ type = "character",
+ default = "",
+ help = "Assign color to sample groups"
+)
+parser$add_argument(
+ "--group_colors",
+ type = "character",
+ default = "#5954d6,#e1562c,#b80058,#00c6f8,#d163e6,#00a76c,#ff9287,#008cf9,#006e00,#796880,#FFA500,#878500",
+ help = "Group colors"
+)
+parser$add_argument(
+ "--heatmap_color_scheme",
+ type = "character",
+ default = "Default",
+ help = "Heatmap color scheme"
+)
+parser$add_argument(
+ "--autoscale_heatmap_color",
+ type = "logical",
+ default = TRUE,
+ help = "Autoscale heatmap color"
+)
+parser$add_argument(
+ "--set_min_heatmap_color",
+ type = "double",
+ default = -2,
+ help = "Minimum heatmap color value"
+)
+parser$add_argument(
+ "--set_max_heatmap_color",
+ type = "double",
+ default = 2,
+ help = "Maximum heatmap color value"
+)
+parser$add_argument(
+ "--aspect_ratio",
+ type = "character",
+ default = "Auto",
+ help = "Aspect ratio"
+)
+parser$add_argument(
+ "--legend_font_size",
+ type = "integer",
+ default = 10,
+ help = "Legend font size"
+)
+parser$add_argument(
+ "--gene_name_font_size",
+ type = "integer",
+ default = 4,
+ help = "Gene name font size"
+)
+parser$add_argument(
+ "--sample_name_font_size",
+ type = "integer",
+ default = 8,
+ help = "Sample name font size"
+)
+parser$add_argument(
+ "--display_numbers",
+ type = "logical",
+ default = FALSE,
+ help = "Display numbers in heatmap"
+)
args <- parser$parse_args()
@@ -63,38 +268,45 @@ moo <- load_moo_from_data_dir()
# run MOSuite
plot_expr_heatmap(
- moo,
- count_type = args$count_type,
- sub_count_type = args$sub_count_type,
- sample_id_colname = args$sample_id_colname,
- feature_id_colname = args$feature_id_colname,
- group_colname = args$group_colname,
- label_colname = args$label_colname,
- samples_to_include = parse_optional_vector(args$samples_to_include),
- color_values = parse_optional_vector(args$color_values),
- include_all_genes = args$include_all_genes,
- filter_top_genes_by_variance = args$filter_top_genes_by_variance,
- top_genes_by_variance_to_include = args$top_genes_by_variance_to_include,
- specific_genes_to_include_in_heatmap = parse_vector_with_default(args$specific_genes_to_include_in_heatmap, "None"),
- cluster_genes = args$cluster_genes,
- gene_distance_metric = args$gene_distance_metric,
- gene_clustering_method = args$gene_clustering_method,
- display_gene_dendrograms = args$display_gene_dendrograms,
- display_gene_names = args$display_gene_names,
- center_and_rescale_expression = args$center_and_rescale_expression,
- cluster_samples = args$cluster_samples,
- arrange_sample_columns = args$arrange_sample_columns,
- order_by_gene_expression = args$order_by_gene_expression,
- gene_to_order_columns = args$gene_to_order_columns,
- gene_expression_order = args$gene_expression_order,
- smpl_distance_metric = args$smpl_distance_metric,
- smpl_clustering_method = args$smpl_clustering_method,
- display_smpl_dendrograms = args$display_smpl_dendrograms,
- reorder_dendrogram = args$reorder_dendrogram,
- reorder_dendrogram_order = parse_optional_vector(args$reorder_dendrogram_order),
- display_sample_names = args$display_sample_names,
- group_columns = parse_optional_vector(args$group_columns),
- assign_group_colors = args$assign_group_colors,
- assign_color_to_sample_groups = parse_optional_vector(args$assign_color_to_sample_groups),
- group_colors = parse_optional_vector(args$group_colors)
+ moo,
+ count_type = args$count_type,
+ sub_count_type = args$sub_count_type,
+ sample_id_colname = args$sample_id_colname,
+ feature_id_colname = args$feature_id_colname,
+ group_colname = args$group_colname,
+ label_colname = args$label_colname,
+ samples_to_include = parse_optional_vector(args$samples_to_include),
+ color_values = parse_optional_vector(args$color_values),
+ include_all_genes = args$include_all_genes,
+ filter_top_genes_by_variance = args$filter_top_genes_by_variance,
+ top_genes_by_variance_to_include = args$top_genes_by_variance_to_include,
+ specific_genes_to_include_in_heatmap = parse_vector_with_default(
+ args$specific_genes_to_include_in_heatmap,
+ "None"
+ ),
+ cluster_genes = args$cluster_genes,
+ gene_distance_metric = args$gene_distance_metric,
+ gene_clustering_method = args$gene_clustering_method,
+ display_gene_dendrograms = args$display_gene_dendrograms,
+ display_gene_names = args$display_gene_names,
+ center_and_rescale_expression = args$center_and_rescale_expression,
+ cluster_samples = args$cluster_samples,
+ arrange_sample_columns = args$arrange_sample_columns,
+ order_by_gene_expression = args$order_by_gene_expression,
+ gene_to_order_columns = args$gene_to_order_columns,
+ gene_expression_order = args$gene_expression_order,
+ smpl_distance_metric = args$smpl_distance_metric,
+ smpl_clustering_method = args$smpl_clustering_method,
+ display_smpl_dendrograms = args$display_smpl_dendrograms,
+ reorder_dendrogram = args$reorder_dendrogram,
+ reorder_dendrogram_order = parse_optional_vector(
+ args$reorder_dendrogram_order
+ ),
+ display_sample_names = args$display_sample_names,
+ group_columns = parse_optional_vector(args$group_columns),
+ assign_group_colors = args$assign_group_colors,
+ assign_color_to_sample_groups = parse_optional_vector(
+ args$assign_color_to_sample_groups
+ ),
+ group_colors = parse_optional_vector(args$group_colors)
)
diff --git a/environment/Dockerfile b/environment/Dockerfile
index 273dcf1..2823f50 100644
--- a/environment/Dockerfile
+++ b/environment/Dockerfile
@@ -1,10 +1,10 @@
-# hash:sha256:16a54343c83446772895fc0c0b1e88a6dbd136fee70bcacffff7886f30541b82
+# hash:sha256:3ddd04105ec5ac0dc17d676e30ef154cb818689953c1d6cdcd656c82272f4902
ARG REGISTRY_HOST
-FROM $REGISTRY_HOST/codeocean/mosuite:v0.3.0
+FROM $REGISTRY_HOST/codeocean/mosuite-minimal:v0.3.1
ARG DEBIAN_FRONTEND=noninteractive
ARG GIT_ASKPASS
-COPY git-askpass /
+COPY git-ask-pass /
COPY postInstall /
RUN --mount=type=secret,id=secrets . /run/secrets/secrets \