From 517635c79b4c3e351fa13a682bdf8ca23af4a7ad Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Mon, 19 Jan 2026 11:14:41 +0530 Subject: [PATCH 01/16] nse in cube --- R/data.table.R | 44 +++++++++++++++++++++++++++++++++++++++++++ R/groupingsets.R | 43 ++++++++++++++++++++++++++---------------- inst/tests/tests.Rraw | 43 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 16 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index a989538b1..ec8847f06 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -521,6 +521,50 @@ replace_dot_alias = function(e) { list(GForce=GForce, jsub=jsub, jvnames=jvnames) } +# Helper function to process SDcols +.processSDcols = function(SDcols_sub, SDcols_missing, x, jsub, by, enclos = parent.frame()) { + names_x = names(x) + bysub = substitute(by) + allbyvars = intersect(all.vars(bysub), names_x) + usesSD = ".SD" %chin% all.vars(jsub) + if (!usesSD) { + return(NULL) + } + if (SDcols_missing) { + ansvars = sdvars = setdiff(unique(names_x), union(by, allbyvars)) + ansvals = match(ansvars, names_x) + return(list(ansvars = ansvars, sdvars = sdvars, ansvals = ansvals)) + } + sub.result = SDcols_sub + if (sub.result %iscall% "patterns") { + .SDcols = eval_with_cols(sub.result, names_x) + } else { + .SDcols = eval(sub.result, enclos) + } + if (anyNA(.SDcols)) + stopf(".SDcols missing at the following indices: %s", brackify(which(is.na(.SDcols)))) + if (is.character(.SDcols)) { + idx = .SDcols %chin% names_x + if (!all(idx)) + stopf("Some items of .SDcols are not column names: %s", toString(.SDcols[!idx])) + ansvars = sdvars = .SDcols + ansvals = match(ansvars, names_x) + } else if (is.numeric(.SDcols)) { + ansvals = as.integer(.SDcols) + if (any(ansvals < 1L | ansvals > length(names_x))) + stopf(".SDcols contains indices out of bounds") + ansvars = sdvars = names_x[ansvals] + } else if (is.logical(.SDcols)) { + if (length(.SDcols) != length(names_x)) + stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(names_x)) + ansvals = which(.SDcols) + ansvars = sdvars = names_x[ansvals] + } else { + stopf(".SDcols must be character, numeric, or logical") + } + list(ansvars = ansvars, sdvars = sdvars, ansvals = ansvals) +} + "[.data.table" = function(x, i, j, by, keyby, with=TRUE, nomatch=NA, mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0.0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL, env=NULL, showProgress=getOption("datatable.showProgress", interactive())) { # ..selfcount <<- ..selfcount+1 # in dev, we check no self calls, each of which doubles overhead, or could diff --git a/R/groupingsets.R b/R/groupingsets.R index 885a64830..7cf6de86c 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -20,22 +20,33 @@ cube = function(x, ...) { UseMethod("cube") } cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { - # input data type basic validation - if (!is.data.table(x)) - stopf("'%s' must be a data.table", "x", class="dt_invalid_input_error") - if (!is.character(by)) - stopf("Argument 'by' must be a character vector of column names used in grouping.") - if (!is.logical(id)) - stopf("Argument 'id' must be a logical scalar.") - if (missing(j)) - stopf("Argument 'j' is required") - # generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497 - n = length(by) - keepBool = sapply(2L^(seq_len(n)-1L), function(k) rep(c(FALSE, TRUE), times=k, each=((2L^n)/(2L*k)))) - sets = lapply((2L^n):1L, function(jj) by[keepBool[jj, ]]) - # redirect to workhorse function - jj = substitute(j) - groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label, enclos = parent.frame()) + # input data type basic validation + if (!is.data.table(x)) + stopf("Argument 'x' must be a data.table object", class="dt_invalid_input_error") + if (!is.character(by)) + stopf("Argument 'by' must be a character vector of column names used in grouping.") + if (!is.logical(id)) + stopf("Argument 'id' must be a logical scalar.") + if (missing(j)) + stopf("Argument 'j' is required") + # Implementing NSE in cube using the helper, .processSDcols + jj = substitute(j) + sdcols_result = .processSDcols(SDcols_sub = substitute(.SDcols), SDcols_missing = missing(.SDcols), x = x, jsub = jj, by = by, enclos = parent.frame()) + if (is.null(sdcols_result)) { + .SDcols = NULL + } else { + ansvars = sdcols_result$ansvars + sdvars = sdcols_result$sdvars + ansvals = sdcols_result$ansvals + .SDcols = sdvars + } + # generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497 + n = length(by) + keepBool = sapply(2L^(seq_len(n)-1L), function(k) rep(c(FALSE, TRUE), times=k, each=((2L^n)/(2L*k)))) + sets = lapply((2L^n):1L, function(jj) by[keepBool[jj, ]]) + # redirect to workhorse function + jj = substitute(j) + groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label, enclos = parent.frame()) } groupingsets = function(x, ...) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b73b2767a..10a468e23 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11106,6 +11106,49 @@ test(1750.34, character(0)), id = TRUE) ) +test(1750.25, + cube(copy(dt), j = lapply(.SD, mean), by = "color", .SDcols = 4, id=TRUE), + groupingsets(dt, j = lapply(.SD, mean), by = "color", .SDcols = "amount", + sets = list("color", character(0)), id = TRUE) +) + +test(1750.35, + cube(dt, j = lapply(.SD, sum), by = c("color","year","status"), id=TRUE, .SDcols=patterns("value")), + groupingsets(dt, j = lapply(.SD, sum), by = c("color","year","status"), .SDcols = "value", + sets = list(c("color","year","status"), + c("color","year"), + c("color","status"), + "color", + c("year","status"), + "year", + "status", + character(0)), + id = TRUE) +) +test(1750.36, + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c("value", "BADCOL")), + error = "Some items of \\.SDcols are not column names" +) + +test(1750.37, + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(TRUE, FALSE)), + error = "\\.SDcols is a logical vector of length" +) + +test(1750.38, +cube(dt, j = lapply(.SD, mean), by = "color", .SDcols = c(FALSE, FALSE, FALSE, TRUE, FALSE), id=TRUE), + groupingsets(dt, j = lapply(.SD, mean), by = "color", .SDcols = "amount", + sets = list("color", character(0)), + id = TRUE) +) +test(1750.39, + cube(dt, j = lapply(.SD, sum), by = "color", .SDcols = list("amount")), + error = ".SDcols must be character, numeric, or logical" +) +test(1750.40, + cube(dt, j = lapply(.SD, sum), by = "color", .SDcols = c(1, 99)), + error = "out of bounds" +) # grouping sets with integer64 if (test_bit64) { set.seed(26) From 86471b5eb8cc2a0bf9ee443e7f07c9acf5d46d78 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Mon, 19 Jan 2026 11:33:23 +0530 Subject: [PATCH 02/16] changes --- inst/tests/tests.Rraw | 6 ------ 1 file changed, 6 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 10a468e23..3a5d198e0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11106,12 +11106,6 @@ test(1750.34, character(0)), id = TRUE) ) -test(1750.25, - cube(copy(dt), j = lapply(.SD, mean), by = "color", .SDcols = 4, id=TRUE), - groupingsets(dt, j = lapply(.SD, mean), by = "color", .SDcols = "amount", - sets = list("color", character(0)), id = TRUE) -) - test(1750.35, cube(dt, j = lapply(.SD, sum), by = c("color","year","status"), id=TRUE, .SDcols=patterns("value")), groupingsets(dt, j = lapply(.SD, sum), by = c("color","year","status"), .SDcols = "value", From 31de69f3baefce6bcc11e2b26970185da092af79 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Sun, 22 Feb 2026 14:47:56 +0530 Subject: [PATCH 03/16] changes to groupingsets --- R/groupingsets.R | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/R/groupingsets.R b/R/groupingsets.R index 7cf6de86c..e31284831 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -20,17 +20,17 @@ cube = function(x, ...) { UseMethod("cube") } cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { - # input data type basic validation - if (!is.data.table(x)) - stopf("Argument 'x' must be a data.table object", class="dt_invalid_input_error") - if (!is.character(by)) - stopf("Argument 'by' must be a character vector of column names used in grouping.") - if (!is.logical(id)) - stopf("Argument 'id' must be a logical scalar.") - if (missing(j)) - stopf("Argument 'j' is required") - # Implementing NSE in cube using the helper, .processSDcols - jj = substitute(j) + # input data type basic validation + if (!is.data.table(x)) + stopf("'%s' must be a data.table", "x", class="dt_invalid_input_error") + if (!is.character(by)) + stopf("Argument 'by' must be a character vector of column names used in grouping.") + if (!is.logical(id)) + stopf("Argument 'id' must be a logical scalar.") + if (missing(j)) + stopf("Argument 'j' is required") + # Implementing NSE in cube using the helper, .processSDcols + jj = substitute(j) sdcols_result = .processSDcols(SDcols_sub = substitute(.SDcols), SDcols_missing = missing(.SDcols), x = x, jsub = jj, by = by, enclos = parent.frame()) if (is.null(sdcols_result)) { .SDcols = NULL @@ -40,13 +40,13 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { ansvals = sdcols_result$ansvals .SDcols = sdvars } - # generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497 - n = length(by) - keepBool = sapply(2L^(seq_len(n)-1L), function(k) rep(c(FALSE, TRUE), times=k, each=((2L^n)/(2L*k)))) - sets = lapply((2L^n):1L, function(jj) by[keepBool[jj, ]]) - # redirect to workhorse function - jj = substitute(j) - groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label, enclos = parent.frame()) + # generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497 + n = length(by) + keepBool = sapply(2L^(seq_len(n)-1L), function(k) rep(c(FALSE, TRUE), times=k, each=((2L^n)/(2L*k)))) + sets = lapply((2L^n):1L, function(jj) by[keepBool[jj, ]]) + # redirect to workhorse function + jj = substitute(j) + groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label, enclos = parent.frame()) } groupingsets = function(x, ...) { From d48ddde7e5eac48ba8fb70ce056059b19cd75007 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Sun, 22 Feb 2026 15:41:11 +0530 Subject: [PATCH 04/16] adding unit tests --- inst/tests/tests.Rraw | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3a5d198e0..30b890154 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11188,6 +11188,16 @@ if (test_bit64) { } # end Grouping Sets +# extra cube tests +test(1750.49, + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(NA_character_, "amount")), + error = "\\.SDcols missing at the following indices: \\[1\\]" +) +test(1750.50, + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(4L, 5L)), + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c("amount", "value")) +) + # for completeness, added test for NA problem to close #1837. DT = data.table(x=NA) test(1751.1, capture.output(fwrite(DT, verbose=FALSE)), c("x","")) From 99f1c790de96e9fc780236fa748a6a90c340def7 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Thu, 2 Apr 2026 15:57:09 +0530 Subject: [PATCH 05/16] addition of helper to [.data.table --- R/data.table.R | 119 ++++++++++++++++++++++++++---------------- inst/tests/tests.Rraw | 15 ------ 2 files changed, 74 insertions(+), 60 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index ec8847f06..4c525aba4 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -522,47 +522,57 @@ replace_dot_alias = function(e) { } # Helper function to process SDcols -.processSDcols = function(SDcols_sub, SDcols_missing, x, jsub, by, enclos = parent.frame()) { - names_x = names(x) - bysub = substitute(by) - allbyvars = intersect(all.vars(bysub), names_x) - usesSD = ".SD" %chin% all.vars(jsub) - if (!usesSD) { - return(NULL) - } - if (SDcols_missing) { - ansvars = sdvars = setdiff(unique(names_x), union(by, allbyvars)) - ansvals = match(ansvars, names_x) - return(list(ansvars = ansvars, sdvars = sdvars, ansvals = ansvals)) - } - sub.result = SDcols_sub - if (sub.result %iscall% "patterns") { - .SDcols = eval_with_cols(sub.result, names_x) - } else { - .SDcols = eval(sub.result, enclos) - } - if (anyNA(.SDcols)) - stopf(".SDcols missing at the following indices: %s", brackify(which(is.na(.SDcols)))) - if (is.character(.SDcols)) { - idx = .SDcols %chin% names_x - if (!all(idx)) - stopf("Some items of .SDcols are not column names: %s", toString(.SDcols[!idx])) - ansvars = sdvars = .SDcols - ansvals = match(ansvars, names_x) - } else if (is.numeric(.SDcols)) { - ansvals = as.integer(.SDcols) - if (any(ansvals < 1L | ansvals > length(names_x))) - stopf(".SDcols contains indices out of bounds") - ansvars = sdvars = names_x[ansvals] - } else if (is.logical(.SDcols)) { - if (length(.SDcols) != length(names_x)) - stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(names_x)) - ansvals = which(.SDcols) - ansvars = sdvars = names_x[ansvals] - } else { - stopf(".SDcols must be character, numeric, or logical") - } - list(ansvars = ansvars, sdvars = sdvars, ansvals = ansvals) +.processSDcols = function(SDcols_sub, SDcols_missing, x, jsub, by, enclos = parent.frame(), bynames = character(0L)) { + names_x = names(x) + allbyvars = intersect(all.vars(by), names_x) + usesSD = ".SD" %chin% all.vars(jsub) + if (!usesSD) { + return(NULL) + } + if (SDcols_missing) { + ansvars = sdvars = setdiff(unique(names_x), union(by, allbyvars)) + ansvals = match(ansvars, names_x) + return(list(ansvars = ansvars, sdvars = sdvars, ansvals = ansvals)) + } + sub.result = SDcols_sub + if (sub.result %iscall% ':' && length(sub.result) == 3L) { + return(NULL) + } + if (sub.result %iscall% c("!", "-") && length(sub.result) == 2L) { + negate_sdcols = TRUE + sub.result = sub.result[[2L]] + } else negate_sdcols = FALSE + if (sub.result %iscall% "patterns") { + .SDcols = eval_with_cols(sub.result, names_x) + } else { + .SDcols = eval(sub.result, enclos) + } + if (!is.character(.SDcols) && !is.numeric(.SDcols) && !is.logical(.SDcols)) { + return(NULL) + } + if (anyNA(.SDcols)) + stopf(".SDcols missing at the following indices: %s", brackify(which(is.na(.SDcols)))) + if (is.character(.SDcols)) { + idx = .SDcols %chin% names_x + if (!all(idx)) + stopf("Some items of .SDcols are not column names: %s", toString(.SDcols[!idx])) + ansvars = sdvars = .SDcols + ansvals = match(ansvars, names_x) + } else if (is.numeric(.SDcols)) { + ansvals = as.integer(.SDcols) + if (length(unique(sign(.SDcols))) > 1L) stopf(".SDcols is numeric but has both +ve and -ve indices") + if (any(idx <- abs(.SDcols) > ncol(x) | abs(.SDcols) < 1L)) stopf(".SDcols is numeric but out of bounds [1, %d] at: %s", ncol(x), brackify(which(idx))) + ansvals = if (negate_sdcols) setdiff(seq_along(names(x)), c(.SDcols, which(names(x) %chin% bynames))) else as.integer(.SDcols) + ansvars = sdvars = names_x[ansvals] + } else if (is.logical(.SDcols)) { + if (length(.SDcols) != length(names_x)) + stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(names_x)) + ansvals = which(.SDcols) + ansvars = sdvars = names_x[ansvals] + } else { + stopf(".SDcols must be character, numeric, or logical") + } + list(ansvars = ansvars, sdvars = sdvars, ansvals = ansvals) } "[.data.table" = function(x, i, j, by, keyby, with=TRUE, nomatch=NA, mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0.0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL, env=NULL, showProgress=getOption("datatable.showProgress", interactive())) @@ -1457,10 +1467,28 @@ replace_dot_alias = function(e) { while(colsub %iscall% "(") colsub = as.list(colsub)[[-1L]] # fix for R-Forge #5190. colsub[[1L]] gave error when it's a symbol. # NB: _unary_ '-', not _binary_ '-' (#5826). Test for '!' length-2 should be redundant but low-cost & keeps code concise. - if (colsub %iscall% c("!", "-") && length(colsub) == 2L) { - negate_sdcols = TRUE - colsub = colsub[[2L]] - } else negate_sdcols = FALSE + try_processSDcols = !(colsub %iscall% c("!", "-") && length(colsub) == 2L) && !(colsub %iscall% ':') && !(colsub %iscall% 'patterns') + if (try_processSDcols) { + sdcols_result = .processSDcols( + SDcols_sub = colsub, + SDcols_missing = FALSE, + x = x, + jsub = jsub, + by = substitute(by), + enclos = parent.frame() + ) + if (!is.null(sdcols_result)) { + ansvars = sdvars = sdcols_result$ansvars + ansvals = sdcols_result$ansvals + } else { + try_processSDcols = FALSE + } + } + if (!try_processSDcols) { + if (colsub %iscall% c("!", "-") && length(colsub) == 2L) { + negate_sdcols = TRUE + colsub = colsub[[2L]] + } else negate_sdcols = FALSE # fix for #1216, make sure the parentheses are peeled from expr of the form (((1:4))) while(colsub %iscall% "(") colsub = as.list(colsub)[[-1L]] if (colsub %iscall% ':' && length(colsub)==3L && !is.call(colsub[[2L]]) && !is.call(colsub[[3L]])) { @@ -1509,6 +1537,7 @@ replace_dot_alias = function(e) { ansvals = chmatch(ansvars, names_x) } } + } # fix for long standing FR/bug, #495 and #484 allcols = c(names_x, xdotprefix, names_i, idotprefix) non_sdvars = setdiff(intersect(av, allcols), c(bynames, ansvars)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 30b890154..da6bfb3ec 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11119,26 +11119,11 @@ test(1750.35, character(0)), id = TRUE) ) -test(1750.36, - cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c("value", "BADCOL")), - error = "Some items of \\.SDcols are not column names" -) test(1750.37, cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(TRUE, FALSE)), error = "\\.SDcols is a logical vector of length" ) - -test(1750.38, -cube(dt, j = lapply(.SD, mean), by = "color", .SDcols = c(FALSE, FALSE, FALSE, TRUE, FALSE), id=TRUE), - groupingsets(dt, j = lapply(.SD, mean), by = "color", .SDcols = "amount", - sets = list("color", character(0)), - id = TRUE) -) -test(1750.39, - cube(dt, j = lapply(.SD, sum), by = "color", .SDcols = list("amount")), - error = ".SDcols must be character, numeric, or logical" -) test(1750.40, cube(dt, j = lapply(.SD, sum), by = "color", .SDcols = c(1, 99)), error = "out of bounds" From 8ed3a4c0e78f911d523ff8a8c1027a8b196a9ee4 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Thu, 2 Apr 2026 16:42:46 +0530 Subject: [PATCH 06/16] tests --- R/data.table.R | 2 -- inst/tests/tests.Rraw | 10 +++++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 4c525aba4..04584e10b 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -569,8 +569,6 @@ replace_dot_alias = function(e) { stopf(".SDcols is a logical vector of length %d but there are %d columns", length(.SDcols), length(names_x)) ansvals = which(.SDcols) ansvars = sdvars = names_x[ansvals] - } else { - stopf(".SDcols must be character, numeric, or logical") } list(ansvars = ansvars, sdvars = sdvars, ansvals = ansvals) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index da6bfb3ec..7591b3cd8 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11124,7 +11124,11 @@ test(1750.37, cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(TRUE, FALSE)), error = "\\.SDcols is a logical vector of length" ) -test(1750.40, +test(1750.38, + cube(dt, j = lapply(.SD, mean), by = "color", .SDcols = c(FALSE, FALSE, FALSE, TRUE, FALSE), id=TRUE)[grouping==0L, .(color, amount)], + dt[, lapply(.SD, mean), by = "color", .SDcols = "amount"] +) +test(1750.39, cube(dt, j = lapply(.SD, sum), by = "color", .SDcols = c(1, 99)), error = "out of bounds" ) @@ -11179,8 +11183,8 @@ test(1750.49, error = "\\.SDcols missing at the following indices: \\[1\\]" ) test(1750.50, - cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(4L, 5L)), - cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c("amount", "value")) + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(4L, 5L), id=TRUE)[grouping==0L, .(year, amount, value)], + dt[, lapply(.SD, sum), by = "year", .SDcols = c("amount", "value")] ) # for completeness, added test for NA problem to close #1837. From 45759853eb6a1a0911142dd0d2ad7f338c5a905d Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Thu, 2 Apr 2026 17:18:52 +0530 Subject: [PATCH 07/16] tests --- inst/tests/tests.Rraw | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 7591b3cd8..6e1fc7ef7 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11186,6 +11186,10 @@ test(1750.50, cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(4L, 5L), id=TRUE)[grouping==0L, .(year, amount, value)], dt[, lapply(.SD, sum), by = "year", .SDcols = c("amount", "value")] ) +test(1750.52, + cube(dt, j = lapply(.SD, sum), by = "color", .SDcols = c(4L, 5L), id=TRUE)[grouping==0L, .(color, amount, value)], + dt[, lapply(.SD, sum), by = "color", .SDcols = c("amount", "value")] +) # for completeness, added test for NA problem to close #1837. DT = data.table(x=NA) From ed86626d9575989620526ac08fa2c349e35fb9ea Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Thu, 2 Apr 2026 18:24:19 +0530 Subject: [PATCH 08/16] addition of tests for code coverage --- inst/tests/tests.Rraw | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6e1fc7ef7..0bb7b98cc 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11119,7 +11119,11 @@ test(1750.35, character(0)), id = TRUE) ) - +test(1750.36, + names(cube(dt, j = lapply(.SD, sum), by = "color", + .SDcols = -c(1L, 2L, 3L), id = TRUE)), + c("grouping", "color", "amount", "value") +) test(1750.37, cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(TRUE, FALSE)), error = "\\.SDcols is a logical vector of length" @@ -11186,11 +11190,15 @@ test(1750.50, cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(4L, 5L), id=TRUE)[grouping==0L, .(year, amount, value)], dt[, lapply(.SD, sum), by = "year", .SDcols = c("amount", "value")] ) +test(1750.51, + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(4L, 5L), id = TRUE)[grouping == 0L, .(year, amount, value)], + dt[, lapply(.SD, sum), by = "year", .SDcols = c("amount", "value")] +) test(1750.52, - cube(dt, j = lapply(.SD, sum), by = "color", .SDcols = c(4L, 5L), id=TRUE)[grouping==0L, .(color, amount, value)], - dt[, lapply(.SD, sum), by = "color", .SDcols = c("amount", "value")] + names(cube(dt, j = lapply(.SD, sum), by = "color", + .SDcols = -(1:3), id = TRUE)), + c("grouping", "color", "amount", "value", "id1", "id2") ) - # for completeness, added test for NA problem to close #1837. DT = data.table(x=NA) test(1751.1, capture.output(fwrite(DT, verbose=FALSE)), c("x","")) From 33ecd143726fad84ed9d99ce89b45afc8092c9b6 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Thu, 2 Apr 2026 19:10:51 +0530 Subject: [PATCH 09/16] addition of more tests for code coverage --- inst/tests/tests.Rraw | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0bb7b98cc..e2b010752 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11191,14 +11191,14 @@ test(1750.50, dt[, lapply(.SD, sum), by = "year", .SDcols = c("amount", "value")] ) test(1750.51, - cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(4L, 5L), id = TRUE)[grouping == 0L, .(year, amount, value)], - dt[, lapply(.SD, sum), by = "year", .SDcols = c("amount", "value")] -) -test(1750.52, names(cube(dt, j = lapply(.SD, sum), by = "color", .SDcols = -(1:3), id = TRUE)), c("grouping", "color", "amount", "value", "id1", "id2") ) +test(1750.52, + data.table:::.processSDcols(quote(a:b), FALSE, dt, quote(lapply(.SD, sum)), "color"), + NULL +) # for completeness, added test for NA problem to close #1837. DT = data.table(x=NA) test(1751.1, capture.output(fwrite(DT, verbose=FALSE)), c("x","")) From c187349feae280dd6d6fea28a5fd3295d5931ae4 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Thu, 2 Apr 2026 19:34:59 +0530 Subject: [PATCH 10/16] addition of more tests for code coverage --- inst/tests/tests.Rraw | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e2b010752..9353a1011 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11199,6 +11199,10 @@ test(1750.52, data.table:::.processSDcols(quote(a:b), FALSE, dt, quote(lapply(.SD, sum)), "color"), NULL ) +test(1750.53, cube(dt, j=lapply(.SD,sum), by="color", .SDcols=c(4L,NA,5L)), error="\\.SDcols missing at the following indices") +test(1750.54, cube(dt, j=lapply(.SD,sum), by="color", .SDcols=c(1L,-2L)), error="\\.SDcols is numeric but has both") +test(1750.55, cube(dt, j=lapply(.SD,sum), by="color", .SDcols=99L), error="\\.SDcols is numeric but out of bounds") +test(1750.56, cube(dt, j=lapply(.SD,sum), by="color", .SDcols=c("amount","notacol")), error="Some items of \\.SDcols are not column names") # for completeness, added test for NA problem to close #1837. DT = data.table(x=NA) test(1751.1, capture.output(fwrite(DT, verbose=FALSE)), c("x","")) From 9d681d0df2a5f0dc69cf9401b4877dff2b2a9360 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Thu, 2 Apr 2026 23:46:15 +0530 Subject: [PATCH 11/16] code cov --- inst/tests/tests.Rraw | 4 ---- 1 file changed, 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9353a1011..e2b010752 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11199,10 +11199,6 @@ test(1750.52, data.table:::.processSDcols(quote(a:b), FALSE, dt, quote(lapply(.SD, sum)), "color"), NULL ) -test(1750.53, cube(dt, j=lapply(.SD,sum), by="color", .SDcols=c(4L,NA,5L)), error="\\.SDcols missing at the following indices") -test(1750.54, cube(dt, j=lapply(.SD,sum), by="color", .SDcols=c(1L,-2L)), error="\\.SDcols is numeric but has both") -test(1750.55, cube(dt, j=lapply(.SD,sum), by="color", .SDcols=99L), error="\\.SDcols is numeric but out of bounds") -test(1750.56, cube(dt, j=lapply(.SD,sum), by="color", .SDcols=c("amount","notacol")), error="Some items of \\.SDcols are not column names") # for completeness, added test for NA problem to close #1837. DT = data.table(x=NA) test(1751.1, capture.output(fwrite(DT, verbose=FALSE)), c("x","")) From cac741cc07838754ca7b660d9711ede75f6c26e8 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Fri, 3 Apr 2026 00:37:38 +0530 Subject: [PATCH 12/16] code cov --- inst/tests/tests.Rraw | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e2b010752..e99e7d0b6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11199,6 +11199,14 @@ test(1750.52, data.table:::.processSDcols(quote(a:b), FALSE, dt, quote(lapply(.SD, sum)), "color"), NULL ) +test(1750.53, + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(1L, -2L)), + error = "\\.SDcols is numeric but has both \\+ve and -ve indices" +) +test(1750.54, + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c("amount", "nonexistent")), + error = "Some items of .SDcols are not column names" +) # for completeness, added test for NA problem to close #1837. DT = data.table(x=NA) test(1751.1, capture.output(fwrite(DT, verbose=FALSE)), c("x","")) From a578b1a1e124eab92db30a8104a90da5a0deb9c9 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Fri, 3 Apr 2026 01:18:23 +0530 Subject: [PATCH 13/16] code cov --- inst/tests/tests.Rraw | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e99e7d0b6..b065800b1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11205,7 +11205,11 @@ test(1750.53, ) test(1750.54, cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c("amount", "nonexistent")), - error = "Some items of .SDcols are not column names" + error = "Some items of .SDcols are not column names: nonexistent" +) +test(1750.55, + cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(1L, 99L)), + error = "out of bounds" ) # for completeness, added test for NA problem to close #1837. DT = data.table(x=NA) From cc32cc11bd23849236f80dfe22726f3162aed678 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Fri, 3 Apr 2026 01:39:15 +0530 Subject: [PATCH 14/16] code cov --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b065800b1..8b2eda27f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11208,8 +11208,8 @@ test(1750.54, error = "Some items of .SDcols are not column names: nonexistent" ) test(1750.55, - cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(1L, 99L)), - error = "out of bounds" + data.table:::.processSDcols(quote(c("amount", "nonexistent")), FALSE, dt, quote(lapply(.SD, sum)), "year"), + error = "Some items of \\.SDcols are not column names" ) # for completeness, added test for NA problem to close #1837. DT = data.table(x=NA) From f688446a3534f5a18b3459f915a5538d8c8b454e Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Wed, 8 Apr 2026 15:07:17 +0530 Subject: [PATCH 15/16] removal of redundant checks --- inst/tests/tests.Rraw | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8b2eda27f..8eb3790f8 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11191,26 +11191,17 @@ test(1750.50, dt[, lapply(.SD, sum), by = "year", .SDcols = c("amount", "value")] ) test(1750.51, - names(cube(dt, j = lapply(.SD, sum), by = "color", - .SDcols = -(1:3), id = TRUE)), - c("grouping", "color", "amount", "value", "id1", "id2") -) -test(1750.52, data.table:::.processSDcols(quote(a:b), FALSE, dt, quote(lapply(.SD, sum)), "color"), NULL ) -test(1750.53, +test(1750.52, cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c(1L, -2L)), error = "\\.SDcols is numeric but has both \\+ve and -ve indices" ) -test(1750.54, +test(1750.53, cube(dt, j = lapply(.SD, sum), by = "year", .SDcols = c("amount", "nonexistent")), error = "Some items of .SDcols are not column names: nonexistent" ) -test(1750.55, - data.table:::.processSDcols(quote(c("amount", "nonexistent")), FALSE, dt, quote(lapply(.SD, sum)), "year"), - error = "Some items of \\.SDcols are not column names" -) # for completeness, added test for NA problem to close #1837. DT = data.table(x=NA) test(1751.1, capture.output(fwrite(DT, verbose=FALSE)), c("x","")) From c6277c9532e7c350a81bd6c532542e47488102f2 Mon Sep 17 00:00:00 2001 From: sisyphuswastaken Date: Wed, 8 Apr 2026 15:49:27 +0530 Subject: [PATCH 16/16] new tests for coverage --- inst/tests/tests.Rraw | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8eb3790f8..566aef2b2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -13128,7 +13128,10 @@ test(1956.3, DT[, .SD, .SDcols = NA_real_], error = 'missing at the following') test(1956.4, DT[, .SD, .SDcols = 2L], error = 'out of bounds.*1.*1.*at') test(1956.5, DT[, .SD, .SDcols = 'b'], error = 'not column names') test(1956.6, DT[, .SD, .SDcols = 3i], error = '.SDcols should be column numbers or names') - +test(1956.7, DT[, .SD, .SDcols = -c(1L, NA_integer_)], error = 'missing at the following') +test(1956.8, DT[, .SD, .SDcols = 1:-1], error = 'both.*ve and.*ve') +test(1956.9, DT[, .SD, .SDcols = 1:99], error = 'out of bounds') +test(1956.91, DT[, .SD, .SDcols = -c("a", "nonexistent")], error = 'not column names') # added brackify to utils for #3116 test(1957.1, brackify(1:3), '[1, 2, 3]') test(1957.2, brackify(1:11), "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ...]")