From 015093efce738c94e54d9de580a84578ff29f22e Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Wed, 13 May 2026 11:11:55 -0700 Subject: [PATCH 01/16] test cases --- tests/testthat/test-DID_EC_AIPW.R | 49 +++++++++++ tests/testthat/test-DID_EC_IPW.R | 53 +++++++++++ tests/testthat/test-DID_EC_OR.R | 73 ++++++++++++++++ tests/testthat/test-EC_AIPW_OPT.R | 109 +++++++++++++++++++++++ tests/testthat/test-EC_IPW_OPT.R | 140 ++++++++++++++++++++++++++++++ tests/testthat/test-SCM.R | 58 +++++++++++++ 6 files changed, 482 insertions(+) create mode 100644 tests/testthat/test-DID_EC_AIPW.R create mode 100644 tests/testthat/test-DID_EC_IPW.R create mode 100644 tests/testthat/test-DID_EC_OR.R create mode 100644 tests/testthat/test-EC_AIPW_OPT.R create mode 100644 tests/testthat/test-EC_IPW_OPT.R create mode 100644 tests/testthat/test-SCM.R diff --git a/tests/testthat/test-DID_EC_AIPW.R b/tests/testthat/test-DID_EC_AIPW.R new file mode 100644 index 0000000..9c6c96a --- /dev/null +++ b/tests/testthat/test-DID_EC_AIPW.R @@ -0,0 +1,49 @@ +test_that("DID_EC_AIPW returns expected structure", { + model_form_mu <- c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5", + "y3 ~ x1 + x2 + x3 + x4 + x5", + "y4 ~ x1 + x2 + x3 + x4 + x5" + ) + res <- rdborrow:::DID_EC_AIPW( + data = SyntheticData, + outcome_col_name = c("y1", "y2", "y3", "y4"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_piA = "A ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = model_form_mu, + Bootstrap = TRUE, + R = 50, + bootstrap_CI_type = "perc" + ) + expect_s3_class(res, "data.frame") + expect_identical(nrow(res), 2L) +}) + +test_that("DID_EC_AIPW produces finite estimates with valid CIs", { + model_form_mu <- c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5", + "y3 ~ x1 + x2 + x3 + x4 + x5", + "y4 ~ x1 + x2 + x3 + x4 + x5" + ) + res <- rdborrow:::DID_EC_AIPW( + data = SyntheticData, + outcome_col_name = c("y1", "y2", "y3", "y4"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_piA = "A ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = model_form_mu, + Bootstrap = TRUE, + R = 50, + bootstrap_CI_type = "perc" + ) + expect_all_true(is.finite(res$point_estimates)) + expect_all_true(res$lower_CI_boot < res$upper_CI_boot) +}) diff --git a/tests/testthat/test-DID_EC_IPW.R b/tests/testthat/test-DID_EC_IPW.R new file mode 100644 index 0000000..e09c3c4 --- /dev/null +++ b/tests/testthat/test-DID_EC_IPW.R @@ -0,0 +1,53 @@ +test_that("DID_EC_IPW returns expected structure", { + res <- rdborrow:::DID_EC_IPW( + data = SyntheticData, + outcome_col_name = c("y1", "y2", "y3", "y4"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_piA = "A ~ x1 + x2 + x3 + x4 + x5", + Bootstrap = TRUE, + R = 50, + bootstrap_CI_type = "perc" + ) + expect_s3_class(res, "data.frame") + expect_identical(nrow(res), 2L) + expect_named(res, c("point_estimates", "lower_CI_boot", "upper_CI_boot")) +}) + +test_that("DID_EC_IPW produces finite estimates with valid CIs", { + res <- rdborrow:::DID_EC_IPW( + data = SyntheticData, + outcome_col_name = c("y1", "y2", "y3", "y4"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_piA = "A ~ x1 + x2 + x3 + x4 + x5", + Bootstrap = TRUE, + R = 50, + bootstrap_CI_type = "perc" + ) + expect_all_true(is.finite(res$point_estimates)) + expect_all_true(res$lower_CI_boot < res$upper_CI_boot) +}) + +test_that("DID_EC_IPW estimates effects for OLE period only", { + res <- rdborrow:::DID_EC_IPW( + data = SyntheticData, + outcome_col_name = c("y1", "y2", "y3", "y4"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_piA = "A ~ x1 + x2 + x3 + x4 + x5", + Bootstrap = TRUE, + R = 50, + bootstrap_CI_type = "perc" + ) + expect_identical(rownames(res), c("tau3", "tau4")) +}) diff --git a/tests/testthat/test-DID_EC_OR.R b/tests/testthat/test-DID_EC_OR.R new file mode 100644 index 0000000..504be94 --- /dev/null +++ b/tests/testthat/test-DID_EC_OR.R @@ -0,0 +1,73 @@ +test_that("DID_EC_OR returns expected structure", { + model_form_mu <- c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5", + "y3 ~ x1 + x2 + x3 + x4 + x5", + "y4 ~ x1 + x2 + x3 + x4 + x5" + ) + res <- rdborrow:::DID_EC_OR( + data = SyntheticData, + outcome_col_name = c("y1", "y2", "y3", "y4"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + model_form_mu0_ext = model_form_mu, + model_form_mu0_rct = model_form_mu, + model_form_mu1_rct = model_form_mu, + Bootstrap = TRUE, + R = 50, + bootstrap_CI_type = "perc" + ) + expect_s3_class(res, "data.frame") + expect_identical(nrow(res), 2L) +}) + +test_that("DID_EC_OR produces finite estimates with valid CIs", { + model_form_mu <- c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5", + "y3 ~ x1 + x2 + x3 + x4 + x5", + "y4 ~ x1 + x2 + x3 + x4 + x5" + ) + res <- rdborrow:::DID_EC_OR( + data = SyntheticData, + outcome_col_name = c("y1", "y2", "y3", "y4"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + model_form_mu0_ext = model_form_mu, + model_form_mu0_rct = model_form_mu, + model_form_mu1_rct = model_form_mu, + Bootstrap = TRUE, + R = 50, + bootstrap_CI_type = "perc" + ) + expect_all_true(is.finite(res$point_estimates)) + expect_all_true(res$lower_CI_boot < res$upper_CI_boot) +}) + +test_that("DID_EC_OR estimates effects for OLE period only", { + model_form_mu <- c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5", + "y3 ~ x1 + x2 + x3 + x4 + x5", + "y4 ~ x1 + x2 + x3 + x4 + x5" + ) + res <- rdborrow:::DID_EC_OR( + data = SyntheticData, + outcome_col_name = c("y1", "y2", "y3", "y4"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + model_form_mu0_ext = model_form_mu, + model_form_mu0_rct = model_form_mu, + model_form_mu1_rct = model_form_mu, + Bootstrap = TRUE, + R = 50, + bootstrap_CI_type = "perc" + ) + expect_identical(rownames(res), c("tau3", "tau4")) +}) diff --git a/tests/testthat/test-EC_AIPW_OPT.R b/tests/testthat/test-EC_AIPW_OPT.R new file mode 100644 index 0000000..3039b92 --- /dev/null +++ b/tests/testthat/test-EC_AIPW_OPT.R @@ -0,0 +1,109 @@ +test_that("EC_AIPW_OPT returns expected structure", { + res <- rdborrow:::EC_AIPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ), + optimal_weight_flag = FALSE, + wt = 0 + ) + expect_type(res, "list") + expect_named(res, c("results", "borrow_weight")) + expect_s3_class(res$results, "data.frame") + expect_identical(nrow(res$results), 2L) +}) + +test_that("EC_AIPW_OPT with w=0 uses only trial data", { + res <- rdborrow:::EC_AIPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ), + optimal_weight_flag = FALSE, + wt = 0 + ) + expect_identical(res$borrow_weight, 0) + expect_all_true(is.finite(res$results$point_estimates)) + expect_all_true(res$results$standard_deviation > 0) +}) + +test_that("EC_AIPW_OPT with optimal weight borrows from external controls", { + res <- rdborrow:::EC_AIPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ), + optimal_weight_flag = TRUE + ) + expect_gt(res$borrow_weight, 0) + expect_lt(res$borrow_weight, 1) + expect_all_true(is.finite(res$results$point_estimates)) +}) + +test_that("EC_AIPW_OPT with fixed weight uses the specified weight", { + res <- rdborrow:::EC_AIPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ), + optimal_weight_flag = FALSE, + wt = 0.3 + ) + expect_equal(res$borrow_weight, 0.3) +}) + +test_that("EC_AIPW_OPT normal CIs contain the point estimates", { + res <- rdborrow:::EC_AIPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ), + optimal_weight_flag = TRUE + ) + expect_all_true(res$results$point_estimates >= res$results$lower_CI_normal) + expect_all_true(res$results$point_estimates <= res$results$upper_CI_normal) +}) + +test_that("EC_AIPW_OPT works with a single outcome", { + res <- rdborrow:::EC_AIPW_OPT( + data = SyntheticData, + outcome_col_name = "y1", + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = "y1 ~ x1 + x2 + x3 + x4 + x5", + optimal_weight_flag = TRUE + ) + expect_identical(nrow(res$results), 1L) +}) diff --git a/tests/testthat/test-EC_IPW_OPT.R b/tests/testthat/test-EC_IPW_OPT.R new file mode 100644 index 0000000..079e00e --- /dev/null +++ b/tests/testthat/test-EC_IPW_OPT.R @@ -0,0 +1,140 @@ +test_that("EC_IPW_OPT returns expected structure", { + res <- rdborrow:::EC_IPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + optimal_weight_flag = FALSE, + wt = 0 + ) + expect_type(res, "list") + expect_named(res, c("results", "borrow_weight")) + expect_s3_class(res$results, "data.frame") + expect_identical(nrow(res$results), 2L) + expect_named(res$results, c( + "point_estimates", "standard_deviation", + "lower_CI_normal", "upper_CI_normal" + )) +}) + +test_that("EC_IPW_OPT with w=0 uses only trial data", { + res <- rdborrow:::EC_IPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + optimal_weight_flag = FALSE, + wt = 0 + ) + expect_identical(res$borrow_weight, 0) + expect_all_true(is.finite(res$results$point_estimates)) + expect_all_true(res$results$standard_deviation > 0) + expect_all_true(res$results$lower_CI_normal < res$results$upper_CI_normal) +}) + +test_that("EC_IPW_OPT with optimal weight borrows from external controls", { + res <- rdborrow:::EC_IPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + optimal_weight_flag = TRUE + ) + expect_gt(res$borrow_weight, 0) + expect_lt(res$borrow_weight, 1) + expect_all_true(is.finite(res$results$point_estimates)) + expect_all_true(res$results$standard_deviation > 0) +}) + +test_that("EC_IPW_OPT with fixed weight uses the specified weight", { + res <- rdborrow:::EC_IPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + optimal_weight_flag = FALSE, + wt = 0.3 + ) + expect_equal(res$borrow_weight, 0.3) +}) + +test_that("EC_IPW_OPT normal CIs contain the point estimates", { + res <- rdborrow:::EC_IPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + optimal_weight_flag = TRUE + ) + expect_all_true(res$results$point_estimates >= res$results$lower_CI_normal) + expect_all_true(res$results$point_estimates <= res$results$upper_CI_normal) +}) + +test_that("EC_IPW_OPT optimal weight produces smaller or equal SE than w=0", { + res_no <- rdborrow:::EC_IPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + optimal_weight_flag = FALSE, + wt = 0 + ) + res_opt <- rdborrow:::EC_IPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + optimal_weight_flag = TRUE + ) + expect_all_true( + res_opt$results$standard_deviation <= res_no$results$standard_deviation + ) +}) + +test_that("EC_IPW_OPT works with a single outcome", { + res <- rdborrow:::EC_IPW_OPT( + data = SyntheticData, + outcome_col_name = "y1", + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + optimal_weight_flag = TRUE + ) + expect_identical(nrow(res$results), 1L) + expect_gt(res$borrow_weight, 0) +}) + +test_that("EC_IPW_OPT bootstrap returns boot CIs", { + res <- rdborrow:::EC_IPW_OPT( + data = SyntheticData, + outcome_col_name = c("y1", "y2"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + optimal_weight_flag = TRUE, + Bootstrap = TRUE, + R = 50, + bootstrap_CI_type = "perc" + ) + expect_named(res$results, c( + "point_estimates", "standard_deviation", + "lower_CI_boot", "upper_CI_boot" + )) + expect_all_true(res$results$lower_CI_boot < res$results$upper_CI_boot) +}) diff --git a/tests/testthat/test-SCM.R b/tests/testthat/test-SCM.R new file mode 100644 index 0000000..e660b7d --- /dev/null +++ b/tests/testthat/test-SCM.R @@ -0,0 +1,58 @@ +test_that("SCM returns expected structure", { + res <- rdborrow:::SCM( + data = SyntheticData, + outcome_col_name = c("y1", "y2", "y3", "y4"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + Bootstrap = TRUE, + R = 50, + bootstrap_CI_type = "perc", + lambda.min = 0, + lambda.max = 1e-3, + nlambda = 2, + parallel = "no" + ) + expect_s3_class(res, "data.frame") + expect_identical(nrow(res), 2L) +}) + +test_that("SCM produces finite estimates with valid CIs", { + res <- rdborrow:::SCM( + data = SyntheticData, + outcome_col_name = c("y1", "y2", "y3", "y4"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + Bootstrap = TRUE, + R = 50, + bootstrap_CI_type = "perc", + lambda.min = 0, + lambda.max = 1e-3, + nlambda = 2, + parallel = "no" + ) + expect_all_true(is.finite(res$point_estimates)) + expect_all_true(res$lower_CI_boot < res$upper_CI_boot) +}) + +test_that("SCM estimates effects for OLE period only", { + res <- rdborrow:::SCM( + data = SyntheticData, + outcome_col_name = c("y1", "y2", "y3", "y4"), + trial_status_col_name = "S", + treatment_col_name = "A", + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + Bootstrap = TRUE, + R = 50, + bootstrap_CI_type = "perc", + lambda.min = 0, + lambda.max = 1e-3, + nlambda = 2, + parallel = "no" + ) + expect_identical(rownames(res), c("tau3", "tau4")) +}) From 68230c78031dd78926a1bfb6036dff81658a673f Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Wed, 13 May 2026 13:52:59 -0700 Subject: [PATCH 02/16] also drop the vignettes --- .github/workflows/R-CMD-check.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index cc87f35..8bc530c 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -47,5 +47,5 @@ jobs: - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true - build_args: '"--no-manual"' - args: 'c("--no-manual", "--as-cran")' \ No newline at end of file + build_args: 'c("--no-manual", "--no-build-vignettes")' + args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' \ No newline at end of file From 1aaadcbfa1096c4f9a3a7d2d3db9e74dd2b7efce Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Thu, 14 May 2026 11:39:25 -0700 Subject: [PATCH 03/16] rename news --- NOTES.md | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 NOTES.md diff --git a/NOTES.md b/NOTES.md deleted file mode 100644 index 4fa0a0d..0000000 --- a/NOTES.md +++ /dev/null @@ -1,9 +0,0 @@ -# rdborrow 0.0.2.0 - -## Changes -- Refactor out imports, prepare for CRAN - -# rdborrow 0.0.1.0 - -## Changes -- Original package with boht IPW and AIPW methods \ No newline at end of file From a8a0848aa7bcac4597d747d95b18fdc1eca09b18 Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Thu, 14 May 2026 11:55:38 -0700 Subject: [PATCH 04/16] update claude.md --- .claude/CLAUDE.md | 97 +++++++++++++++++++++++++++++++++++++++++++++++ NEWS.md | 9 +++++ 2 files changed, 106 insertions(+) create mode 100644 NEWS.md diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 6576998..6ea7292 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -109,3 +109,100 @@ Run this one-liner to validate the package before committing: ``` Rscript -e "devtools::document()" && Rscript -e "styler::style_pkg()" && Rscript -e "spelling::spell_check_package()" && Rscript -e "lintr::lint_package()" && Rscript -e "devtools::check(vignettes = FALSE)" ``` + +## Changing the API + +### Goals + +1. **Better method constructors** — replace `setup_method_weighting(method_name="IPW", ...)` with `ec_ipw()`, etc. Each constructor carries its own estimation logic. +2. **Polymorphic dispatch** — `run_analysis()` calls a generic on the method object instead of an if/else tree. Adding a new method = writing one constructor. +3. **Merge bootstrap** — bootstrap is an inference option on the method, not a separate code path. +4. **(Future) Model formula interface** — `outcome ~ treatment | covariates` instead of column name args. + +### Current workflow (to deprecate) + +```r +method <- setup_method_weighting( + method_name = "IPW", + optimal_weight_flag = FALSE, + wt = 0, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" +) + +analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method +) + +res <- run_analysis(analysis) +``` + +### Desired workflow + +```r +method <- ec_ipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + weight = NULL, # NULL = optimal, 0 = no borrowing, 0.3 = fixed + bootstrap = 500, # NULL = sandwich SE only + bootstrap_ci_type = NULL # NULL defaults to "perc" when bootstrap is set +) + +analysis <- setup_analysis( + data = SyntheticData, + outcomes = c("y1", "y2"), + treatment = "A", + trial_status = "S", + covariates = c("x1", "x2", "x3", "x4", "x5"), + method = method +) + +res <- run_analysis(analysis) +``` + +### Method constructors + +| Constructor | Replaces | Phase | +|---|---|---| +| `ec_ipw()` | `setup_method_weighting(method_name="IPW", ...)` | Primary | +| `ec_aipw()` | `setup_method_weighting(method_name="AIPW", ...)` | Primary | +| `did_ec_ipw()` | `setup_method_DID(method_name="IPW", ...)` | OLE | +| `did_ec_aipw()` | `setup_method_DID(method_name="AIPW", ...)` | OLE | +| `did_ec_or()` | `setup_method_DID(method_name="OR", ...)` | OLE | +| `scm()` | `setup_method_SCM(...)` | OLE | + +Each constructor returns an S4 method object. The S4 class defines a generic `estimate()` that `run_analysis()` dispatches on — no if/else. + +### How dispatch works + +```r +# S4 generic +setGeneric("estimate", function(method, data, ...) standardGeneric("estimate")) + +# Each method class implements estimate() +setMethod("estimate", "ec_ipw_method", function(method, data, ...) { + # IPW estimation logic lives here +}) + +# run_analysis() becomes: +run_analysis <- function(analysis_obj) { + estimate(analysis_obj@method, data = analysis_obj@data, ...) +} +``` + +### Implementation order + +1. Create all 6 method constructors (start with `ec_ipw()`) +2. Each constructor returns an S4 object with estimation logic via `estimate()` generic +3. Refactor `setup_analysis()` into a single function (merge `_primary`/`_OLE`, add `T_cross = NULL`) +4. Refactor `run_analysis()` to use S4 dispatch +5. Deprecate `setup_method_weighting`, `setup_method_DID`, `setup_method_SCM`, `setup_analysis_primary`, `setup_analysis_OLE` + +### Design decisions + +- **S4 classes for method objects** — keeps rigorous type definitions, consistent with existing package patterns. +- **`T_cross` goes in `setup_analysis()`** — it's a property of the study design, not the method. +- **`bootstrap_ci_type` is nullable** — defaults to `"perc"` when `bootstrap` is non-NULL, ignored otherwise. \ No newline at end of file diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..4fa0a0d --- /dev/null +++ b/NEWS.md @@ -0,0 +1,9 @@ +# rdborrow 0.0.2.0 + +## Changes +- Refactor out imports, prepare for CRAN + +# rdborrow 0.0.1.0 + +## Changes +- Original package with boht IPW and AIPW methods \ No newline at end of file From 997100880696b645e383c8f9eb7df937cba31063 Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Thu, 14 May 2026 12:03:17 -0700 Subject: [PATCH 05/16] claude updates to refactor plan --- .claude/CLAUDE.md | 60 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 6ea7292..0e6595b 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -92,13 +92,25 @@ Fix spelling, grammar, and other minor problems without asking the user. Label a Only report what you have changed. ## Refactor rules -We're attemping to refactor many issues with this repositroy. In general, let's: -- make sure all examples are functional and not wrapped in dontrun{} + +### Per-function checklist + +When refactoring any estimator or internal function, apply all of these: + +1. Make runnable examples (no `\dontrun{}`) +2. Remove commented-out code +3. Remove TODOs +4. Add type checks (checkmate) +5. Factor out tidyverse (`filter` → `df[cond, ]`, `mutate` → direct assignment) +6. `<-` for assignment (not `=`) +7. Drop debug prints (`cat`, `print`, commented `# print(...)`) +8. Consistent variable naming without periods (e.g. `pi_S` not `pi.S`) +9. Drop backtick column access (`temp$\`piA\`` → `temp$piA`) + +### General rules + - make sure we have test cases for all functions -- remove @TODO blocks and other ugly code - remove magrittr pipe and replace with base R pipe -- remove tidyverse dependencies -- replace "=" with "<-" - make sure every function is documented - add validation to function inputs @@ -195,14 +207,40 @@ run_analysis <- function(analysis_obj) { ### Implementation order -1. Create all 6 method constructors (start with `ec_ipw()`) -2. Each constructor returns an S4 object with estimation logic via `estimate()` generic -3. Refactor `setup_analysis()` into a single function (merge `_primary`/`_OLE`, add `T_cross = NULL`) -4. Refactor `run_analysis()` to use S4 dispatch -5. Deprecate `setup_method_weighting`, `setup_method_DID`, `setup_method_SCM`, `setup_analysis_primary`, `setup_analysis_OLE` +1. Create full-pipeline regression tests for all 6 methods (old API, locked numerical values) +2. Create all 6 method constructors (start with `ec_ipw()`) +3. Each constructor returns an S4 object with estimation logic via `estimate()` generic +4. Add new-API tests to each pipeline test file (same expected values) +5. Refactor `setup_analysis()` into a single function (merge `_primary`/`_OLE`, add `T_cross = NULL`) +6. Refactor `run_analysis()` to use S4 dispatch +7. Deprecate `setup_method_weighting`, `setup_method_DID`, `setup_method_SCM`, `setup_analysis_primary`, `setup_analysis_OLE` ### Design decisions - **S4 classes for method objects** — keeps rigorous type definitions, consistent with existing package patterns. - **`T_cross` goes in `setup_analysis()`** — it's a property of the study design, not the method. -- **`bootstrap_ci_type` is nullable** — defaults to `"perc"` when `bootstrap` is non-NULL, ignored otherwise. \ No newline at end of file +- **`bootstrap_ci_type` is nullable** — defaults to `"perc"` when `bootstrap` is non-NULL, ignored otherwise. + +### Full-pipeline regression tests + +Before refactoring any estimator, we lock in its numerical outputs on `SyntheticData` so any code change that alters results is caught. Tests use the old API (setup_method → setup_analysis → run_analysis) with exact values at `tolerance = 1e-6`. As new API constructors are added, we add parallel assertions against the same expected values. + +Rename existing `test-vignette_results_*` files and split by method: + +| Test file | Method | Covers | +|---|---|---| +| `test-full_pipeline_ec_ipw.R` | EC-IPW | weight=0, optimal, fixed (0.3), bootstrap point estimates | +| `test-full_pipeline_ec_aipw.R` | EC-AIPW | weight=0, optimal, fixed (0.3), bootstrap point estimates | +| `test-full_pipeline_did_ec_ipw.R` | DID-EC-IPW | bootstrap CIs, point estimates | +| `test-full_pipeline_did_ec_aipw.R` | DID-EC-AIPW | bootstrap CIs, point estimates | +| `test-full_pipeline_did_ec_or.R` | DID-EC-OR | bootstrap CIs, point estimates | +| `test-full_pipeline_scm.R` | SCM | bootstrap CIs, point estimates | + +Each test file asserts: +- Point estimates (exact values) +- Standard errors / standard deviations (exact values) +- CI bounds for non-bootstrap (exact values) +- Bootstrap point estimates match non-bootstrap +- Borrow weight (where applicable) + +Simulation tests (`test-vignette_results_*_simulation.R`) stay separate — they test Monte Carlo properties, not individual estimator outputs. \ No newline at end of file From 7e27653fb403f4efd4d087632cbb0a64224be143 Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Thu, 14 May 2026 12:10:53 -0700 Subject: [PATCH 06/16] full pipeline tests, not simulation --- .../testthat/test-full_pipeline_did_ec_aipw.R | 35 +++ .../testthat/test-full_pipeline_did_ec_ipw.R | 29 +++ tests/testthat/test-full_pipeline_did_ec_or.R | 36 +++ tests/testthat/test-full_pipeline_ec_aipw.R | 133 ++++++++++ tests/testthat/test-full_pipeline_ec_ipw.R | 119 +++++++++ tests/testthat/test-full_pipeline_scm.R | 35 +++ .../test-vignette_results_OLE_analysis.R | 169 ------------ .../test-vignette_results_primary_analysis.R | 246 ------------------ 8 files changed, 387 insertions(+), 415 deletions(-) create mode 100644 tests/testthat/test-full_pipeline_did_ec_aipw.R create mode 100644 tests/testthat/test-full_pipeline_did_ec_ipw.R create mode 100644 tests/testthat/test-full_pipeline_did_ec_or.R create mode 100644 tests/testthat/test-full_pipeline_ec_aipw.R create mode 100644 tests/testthat/test-full_pipeline_ec_ipw.R create mode 100644 tests/testthat/test-full_pipeline_scm.R delete mode 100644 tests/testthat/test-vignette_results_OLE_analysis.R delete mode 100644 tests/testthat/test-vignette_results_primary_analysis.R diff --git a/tests/testthat/test-full_pipeline_did_ec_aipw.R b/tests/testthat/test-full_pipeline_did_ec_aipw.R new file mode 100644 index 0000000..4217772 --- /dev/null +++ b/tests/testthat/test-full_pipeline_did_ec_aipw.R @@ -0,0 +1,35 @@ +tol <- 1e-6 + +test_that("DID-EC-AIPW point estimates and bootstrap CIs", { + bootstrap_obj <- setup_bootstrap(replicates = 50, bootstrap_CI_type = "perc") + method <- setup_method_DID( + method_name = "AIPW", + bootstrap_flag = TRUE, + bootstrap_obj = bootstrap_obj, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_piA = "A ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5", + "y3 ~ x1 + x2 + x3 + x4 + x5", + "y4 ~ x1 + x2 + x3 + x4 + x5" + ) + ) + analysis <- setup_analysis_OLE( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2", "y3", "y4"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + method_OLE_obj = method + ) + + set.seed(42) + res <- run_analysis(analysis) + + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 2) + expect_equal(res$point_estimates[1], 2.0417274627, tolerance = tol) + expect_equal(res$point_estimates[2], 4.0361177594, tolerance = tol) +}) diff --git a/tests/testthat/test-full_pipeline_did_ec_ipw.R b/tests/testthat/test-full_pipeline_did_ec_ipw.R new file mode 100644 index 0000000..a4cd5a3 --- /dev/null +++ b/tests/testthat/test-full_pipeline_did_ec_ipw.R @@ -0,0 +1,29 @@ +tol <- 1e-6 + +test_that("DID-EC-IPW point estimates and bootstrap CIs", { + bootstrap_obj <- setup_bootstrap(replicates = 50, bootstrap_CI_type = "perc") + method <- setup_method_DID( + method_name = "IPW", + bootstrap_flag = TRUE, + bootstrap_obj = bootstrap_obj, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_piA = "A ~ x1 + x2 + x3 + x4 + x5" + ) + analysis <- setup_analysis_OLE( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2", "y3", "y4"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + method_OLE_obj = method + ) + + set.seed(42) + res <- run_analysis(analysis) + + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 2) + expect_equal(res$point_estimates[1], 2.0759256334, tolerance = tol) + expect_equal(res$point_estimates[2], 4.3894380397, tolerance = tol) +}) diff --git a/tests/testthat/test-full_pipeline_did_ec_or.R b/tests/testthat/test-full_pipeline_did_ec_or.R new file mode 100644 index 0000000..14cf520 --- /dev/null +++ b/tests/testthat/test-full_pipeline_did_ec_or.R @@ -0,0 +1,36 @@ +tol <- 1e-6 + +test_that("DID-EC-OR point estimates and bootstrap CIs", { + bootstrap_obj <- setup_bootstrap(replicates = 50, bootstrap_CI_type = "perc") + model_forms <- c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5", + "y3 ~ x1 + x2 + x3 + x4 + x5", + "y4 ~ x1 + x2 + x3 + x4 + x5" + ) + method <- setup_method_DID( + method_name = "OR", + bootstrap_flag = TRUE, + bootstrap_obj = bootstrap_obj, + model_form_mu0_ext = model_forms, + model_form_mu0_rct = model_forms, + model_form_mu1_rct = model_forms + ) + analysis <- setup_analysis_OLE( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2", "y3", "y4"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + method_OLE_obj = method + ) + + set.seed(42) + res <- run_analysis(analysis) + + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 2) + expect_equal(res$point_estimates[1], 1.5689465845, tolerance = tol) + expect_equal(res$point_estimates[2], 4.4078336543, tolerance = tol) +}) diff --git a/tests/testthat/test-full_pipeline_ec_aipw.R b/tests/testthat/test-full_pipeline_ec_aipw.R new file mode 100644 index 0000000..d172c3d --- /dev/null +++ b/tests/testthat/test-full_pipeline_ec_aipw.R @@ -0,0 +1,133 @@ +tol <- 1e-6 + +test_that("EC-AIPW zero weight (no borrowing)", { + method <- setup_method_weighting( + method_name = "AIPW", + optimal_weight_flag = FALSE, + wt = 0, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ) + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + res <- run_analysis(analysis) + + expect_equal(res$borrow_weight, 0) + expect_equal(res$results$point_estimates[1], -0.4361151144, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.4422248202, tolerance = tol) + expect_equal(res$results$standard_deviation[1], 0.5552064946, tolerance = tol) + expect_equal(res$results$standard_deviation[2], 0.5701633244, tolerance = tol) + expect_equal(res$results$lower_CI_normal[1], -1.5242998477, tolerance = tol) + expect_equal(res$results$lower_CI_normal[2], -0.6752747610, tolerance = tol) + expect_equal(res$results$upper_CI_normal[1], 0.6520696190, tolerance = tol) + expect_equal(res$results$upper_CI_normal[2], 1.5597244013, tolerance = tol) +}) + +test_that("EC-AIPW optimal weight", { + method <- setup_method_weighting( + method_name = "AIPW", + optimal_weight_flag = TRUE, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ) + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + res <- run_analysis(analysis) + + expect_equal(res$borrow_weight, 0.1475196487, tolerance = tol) + expect_equal(res$results$point_estimates[1], -0.5463256250, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.5401749583, tolerance = tol) + expect_equal(res$results$standard_deviation[1], 0.5305614087, tolerance = tol) + expect_equal(res$results$standard_deviation[2], 0.5543275065, tolerance = tol) + expect_equal(res$results$lower_CI_normal[1], -1.5862068777, tolerance = tol) + expect_equal(res$results$lower_CI_normal[2], -0.5462869900, tolerance = tol) + expect_equal(res$results$upper_CI_normal[1], 0.4935556277, tolerance = tol) + expect_equal(res$results$upper_CI_normal[2], 1.6266369066, tolerance = tol) +}) + +test_that("EC-AIPW fixed weight 0.3", { + method <- setup_method_weighting( + method_name = "AIPW", + optimal_weight_flag = FALSE, + wt = 0.3, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ) + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + res <- run_analysis(analysis) + + expect_equal(res$borrow_weight, 0.3) + expect_equal(res$results$point_estimates[1], -0.6602422289, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.6414189052, tolerance = tol) + expect_equal(res$results$standard_deviation[1], 0.5263737407, tolerance = tol) + expect_equal(res$results$standard_deviation[2], 0.5832105340, tolerance = tol) + expect_equal(res$results$lower_CI_normal[1], -1.6919158032, tolerance = tol) + expect_equal(res$results$lower_CI_normal[2], -0.5016527368, tolerance = tol) + expect_equal(res$results$upper_CI_normal[1], 0.3714313453, tolerance = tol) + expect_equal(res$results$upper_CI_normal[2], 1.7844905472, tolerance = tol) +}) + +test_that("EC-AIPW bootstrap preserves point estimates", { + bootstrap_obj <- setup_bootstrap(replicates = 50, bootstrap_CI_type = "perc") + method <- setup_method_weighting( + method_name = "AIPW", + optimal_weight_flag = TRUE, + wt = 0, + bootstrap_flag = TRUE, + bootstrap_obj = bootstrap_obj, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", + model_form_mu0_ext = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ) + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + + set.seed(42) + res <- run_analysis(analysis) + + expect_equal(res$results$point_estimates[1], -0.5463256250, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.5401749583, tolerance = tol) + expect_equal(res$results$standard_deviation[1], 0.5305614087, tolerance = tol) + expect_equal(res$results$standard_deviation[2], 0.5543275065, tolerance = tol) + expect_equal(res$borrow_weight, 0.1475196487, tolerance = tol) + expect_named( + res$results, + c("point_estimates", "standard_deviation", "lower_CI_boot", "upper_CI_boot") + ) +}) diff --git a/tests/testthat/test-full_pipeline_ec_ipw.R b/tests/testthat/test-full_pipeline_ec_ipw.R new file mode 100644 index 0000000..a17cf2f --- /dev/null +++ b/tests/testthat/test-full_pipeline_ec_ipw.R @@ -0,0 +1,119 @@ +tol <- 1e-6 + +test_that("EC-IPW zero weight (no borrowing)", { + method <- setup_method_weighting( + method_name = "IPW", + optimal_weight_flag = FALSE, + wt = 0, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + res <- run_analysis(analysis) + + expect_type(res, "list") + expect_named(res, c("results", "borrow_weight")) + expect_equal(res$borrow_weight, 0) + expect_equal(res$results$point_estimates[1], -0.0280870419, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.4095955812, tolerance = tol) + expect_equal(res$results$standard_deviation[1], 0.5367986594, tolerance = tol) + expect_equal(res$results$standard_deviation[2], 0.5625763260, tolerance = tol) + expect_equal(res$results$lower_CI_normal[1], -1.0801930814, tolerance = tol) + expect_equal(res$results$lower_CI_normal[2], -0.6930337563, tolerance = tol) + expect_equal(res$results$upper_CI_normal[1], 1.0240189975, tolerance = tol) + expect_equal(res$results$upper_CI_normal[2], 1.5122249187, tolerance = tol) +}) + +test_that("EC-IPW optimal weight", { + method <- setup_method_weighting( + method_name = "IPW", + optimal_weight_flag = TRUE, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + res <- run_analysis(analysis) + + expect_equal(res$borrow_weight, 0.1475196487, tolerance = tol) + expect_equal(res$results$point_estimates[1], -0.1971968926, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.4697208867, tolerance = tol) + expect_equal(res$results$standard_deviation[1], 0.5134017502, tolerance = tol) + expect_equal(res$results$standard_deviation[2], 0.5410007056, tolerance = tol) + expect_equal(res$results$lower_CI_normal[1], -1.2034458325, tolerance = tol) + expect_equal(res$results$lower_CI_normal[2], -0.5906210120, tolerance = tol) + expect_equal(res$results$upper_CI_normal[1], 0.8090520473, tolerance = tol) + expect_equal(res$results$upper_CI_normal[2], 1.5300627854, tolerance = tol) +}) + +test_that("EC-IPW fixed weight 0.3", { + method <- setup_method_weighting( + method_name = "IPW", + optimal_weight_flag = FALSE, + wt = 0.3, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + res <- run_analysis(analysis) + + expect_equal(res$borrow_weight, 0.3) + expect_equal(res$results$point_estimates[1], -0.3719934684, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.5318680501, tolerance = tol) + expect_equal(res$results$standard_deviation[1], 0.5148090221, tolerance = tol) + expect_equal(res$results$standard_deviation[2], 0.5657008041, tolerance = tol) + expect_equal(res$results$lower_CI_normal[1], -1.3810006106, tolerance = tol) + expect_equal(res$results$lower_CI_normal[2], -0.5768851520, tolerance = tol) + expect_equal(res$results$upper_CI_normal[1], 0.6370136739, tolerance = tol) + expect_equal(res$results$upper_CI_normal[2], 1.6406212522, tolerance = tol) +}) + +test_that("EC-IPW bootstrap preserves point estimates", { + bootstrap_obj <- setup_bootstrap(replicates = 50, bootstrap_CI_type = "perc") + method <- setup_method_weighting( + method_name = "IPW", + optimal_weight_flag = TRUE, + bootstrap_flag = TRUE, + bootstrap_obj = bootstrap_obj, + wt = 0, + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + + set.seed(42) + res <- run_analysis(analysis) + + expect_equal(res$results$point_estimates[1], -0.1971968926, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.4697208867, tolerance = tol) + expect_equal(res$results$standard_deviation[1], 0.5134017502, tolerance = tol) + expect_equal(res$results$standard_deviation[2], 0.5410007056, tolerance = tol) + expect_equal(res$borrow_weight, 0.1475196487, tolerance = tol) + expect_named( + res$results, + c("point_estimates", "standard_deviation", "lower_CI_boot", "upper_CI_boot") + ) +}) diff --git a/tests/testthat/test-full_pipeline_scm.R b/tests/testthat/test-full_pipeline_scm.R new file mode 100644 index 0000000..411234f --- /dev/null +++ b/tests/testthat/test-full_pipeline_scm.R @@ -0,0 +1,35 @@ +tol <- 1e-6 + +test_that("SCM runs and returns valid structure", { + skip_on_cran() + + bootstrap_obj <- setup_bootstrap(replicates = 5, bootstrap_CI_type = "perc") + method <- setup_method_SCM( + method_name = "SCM", + bootstrap_flag = TRUE, + bootstrap_obj = bootstrap_obj, + lambda.min = 0.0005, + lambda.max = 0.0005, + nlambda = 1, + parallel = "no", + ncpus = 1 + ) + analysis <- setup_analysis_OLE( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2", "y3", "y4"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + method_OLE_obj = method + ) + + set.seed(42) + res <- suppressWarnings(run_analysis(analysis)) + + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 2) + expect_named(res, c("point_estimates", "lower_CI_boot", "upper_CI_boot")) + expect_all_true(is.finite(res$point_estimates)) + expect_all_true(res$lower_CI_boot <= res$upper_CI_boot) +}) diff --git a/tests/testthat/test-vignette_results_OLE_analysis.R b/tests/testthat/test-vignette_results_OLE_analysis.R deleted file mode 100644 index 6bc41c7..0000000 --- a/tests/testthat/test-vignette_results_OLE_analysis.R +++ /dev/null @@ -1,169 +0,0 @@ -# Regression tests for the OLE Analysis Workflow vignette. -# These lock in the exact numerical outputs produced by the current codebase -# on the built-in SyntheticData, so any code change that alters results is caught. -# -# All OLE methods require bootstrap for inference. Point estimates are -# deterministic (seed-independent); bootstrap CIs are tested with set.seed(42). -# -# Reference: vignettes/OLE_analysis_workflow.Rmd - -# ---------- helpers shared across all test blocks ---------------------------- -common_args_ole <- list( - data = SyntheticData, - trial_status_col_name = "S", - treatment_col_name = "A", - outcome_col_name = c("y1", "y2", "y3", "y4"), - covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), - T_cross = 2 -) - -model_form_mu_ole <- c( - "y1 ~ x1 + x2 + x3 + x4 + x5", - "y2 ~ x1 + x2 + x3 + x4 + x5", - "y3 ~ x1 + x2 + x3 + x4 + x5", - "y4 ~ x1 + x2 + x3 + x4 + x5" -) - -bootstrap_obj_ole <- setup_bootstrap(replicates = 50, bootstrap_CI_type = "perc") - -tol <- 1e-6 - -# ============================================================================= -# Section 1.1 DID – IPW -# ============================================================================= -test_that("OLE DID-IPW point estimates match vignette", { - method_obj <- setup_method_DID( - method_name = "IPW", - bootstrap_flag = TRUE, - bootstrap_obj = bootstrap_obj_ole, - model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", - model_form_piA = "A ~ x1 + x2 + x3 + x4 + x5" - ) - - analysis_obj <- do.call( - setup_analysis_OLE, - c(common_args_ole, list(method_OLE_obj = method_obj)) - ) - - set.seed(42) - res <- run_analysis(analysis_obj) - - # Return structure - expect_s3_class(res, "data.frame") - expect_equal(nrow(res), 2) - expect_true(all(c("point_estimates", "lower_CI_boot", "upper_CI_boot") %in% names(res))) - - # Point estimates (deterministic, seed-independent) - expect_equal(res$point_estimates[1], 2.0759256334, tolerance = tol) - expect_equal(res$point_estimates[2], 4.3894380397, tolerance = tol) - - # Bootstrap CIs (seed=42, replicates=50) - expect_equal(res$lower_CI_boot[1], -0.1985438132, tolerance = tol) - expect_equal(res$lower_CI_boot[2], 1.7688904174, tolerance = tol) - expect_equal(res$upper_CI_boot[1], 4.0048311338, tolerance = tol) - expect_equal(res$upper_CI_boot[2], 8.2132873144, tolerance = tol) -}) - -# ============================================================================= -# Section 1.2 DID – AIPW -# ============================================================================= -test_that("OLE DID-AIPW point estimates match vignette", { - method_obj <- setup_method_DID( - method_name = "AIPW", - bootstrap_flag = TRUE, - bootstrap_obj = bootstrap_obj_ole, - model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", - model_form_piA = "A ~ x1 + x2 + x3 + x4 + x5", - model_form_mu0_ext = model_form_mu_ole - ) - - analysis_obj <- do.call( - setup_analysis_OLE, - c(common_args_ole, list(method_OLE_obj = method_obj)) - ) - - set.seed(42) - res <- run_analysis(analysis_obj) - - # Point estimates (deterministic) - expect_equal(res$point_estimates[1], 2.0417274627, tolerance = tol) - expect_equal(res$point_estimates[2], 4.0361177594, tolerance = tol) - - # Bootstrap CIs (seed=42, replicates=50) - expect_equal(res$lower_CI_boot[1], -0.2867417425, tolerance = tol) - expect_equal(res$lower_CI_boot[2], 1.8145638636, tolerance = tol) - expect_equal(res$upper_CI_boot[1], 4.0728301932, tolerance = tol) - expect_equal(res$upper_CI_boot[2], 8.6316113430, tolerance = tol) -}) - -# ============================================================================= -# Section 1.3 DID – OR (Outcome Regression) -# ============================================================================= -test_that("OLE DID-OR point estimates match vignette", { - method_obj <- setup_method_DID( - method_name = "OR", - bootstrap_flag = TRUE, - bootstrap_obj = bootstrap_obj_ole, - model_form_mu0_ext = model_form_mu_ole, - model_form_mu0_rct = model_form_mu_ole, - model_form_mu1_rct = model_form_mu_ole - ) - - analysis_obj <- do.call( - setup_analysis_OLE, - c(common_args_ole, list(method_OLE_obj = method_obj)) - ) - - set.seed(42) - res <- run_analysis(analysis_obj) - - # Point estimates (deterministic) - expect_equal(res$point_estimates[1], 1.5689465845, tolerance = tol) - expect_equal(res$point_estimates[2], 4.4078336543, tolerance = tol) - - # Bootstrap CIs (seed=42, replicates=50) - expect_equal(res$lower_CI_boot[1], -0.5122539446, tolerance = tol) - expect_equal(res$lower_CI_boot[2], 2.7815366451, tolerance = tol) - expect_equal(res$upper_CI_boot[1], 3.1261896871, tolerance = tol) - expect_equal(res$upper_CI_boot[2], 7.9430452413, tolerance = tol) -}) - -# ============================================================================= -# Section 2 SCM – Synthetic Control Method -# ============================================================================= -test_that("OLE SCM runs and returns valid results", { - skip_on_cran() # scm is computationally expensive (~1 min) - - bootstrap_obj_scm <- setup_bootstrap(replicates = 5, bootstrap_CI_type = "perc") - - method_obj <- setup_method_SCM( - method_name = "SCM", - bootstrap_flag = TRUE, - bootstrap_obj = bootstrap_obj_scm, - lambda.min = 0.0005, - lambda.max = 0.0005, - nlambda = 1, - parallel = "no", - ncpus = 1 - ) - - analysis_obj <- do.call( - setup_analysis_OLE, - c(common_args_ole, list(method_OLE_obj = method_obj)) - ) - - # few bootstrap replicates triggers "extreme order statistics" warnings ---- - set.seed(42) - res <- suppressWarnings(run_analysis(analysis_obj)) - - # structure checks ---- - expect_s3_class(res, "data.frame") - expect_equal(nrow(res), 2) - expect_true(all(c("point_estimates", "lower_CI_boot", "upper_CI_boot") %in% names(res))) - - # ecos solver is platform-dependent, so pin structure not exact values ---- - expect_true(all(is.finite(res$point_estimates))) - expect_true(all(is.finite(res$lower_CI_boot))) - expect_true(all(is.finite(res$upper_CI_boot))) - expect_true(all(res$lower_CI_boot <= res$upper_CI_boot)) -}) diff --git a/tests/testthat/test-vignette_results_primary_analysis.R b/tests/testthat/test-vignette_results_primary_analysis.R deleted file mode 100644 index 2f05c87..0000000 --- a/tests/testthat/test-vignette_results_primary_analysis.R +++ /dev/null @@ -1,246 +0,0 @@ -# Regression tests for the Primary Analysis Workflow vignette. -# These lock in the exact numerical outputs produced by the current codebase -# on the built-in SyntheticData, so any code change that alters results is caught. -# -# Reference: vignettes/primary_analysis_workflow.Rmd - -# ---------- helpers shared across all test blocks ---------------------------- -common_args <- list( - data = SyntheticData, - trial_status_col_name = "S", - treatment_col_name = "A", - outcome_col_name = c("y1", "y2"), - covariates_col_name = c("x1", "x2", "x3", "x4", "x5") -) - -tol <- 1e-6 # tolerance for floating-point comparison - -# ============================================================================= -# Section 2.1 IPW – zero weight (no borrowing) -# ============================================================================= -test_that("Primary IPW zero-weight results match vignette", { - method_obj <- setup_method_weighting( - method_name = "IPW", - optimal_weight_flag = FALSE, - wt = 0, - model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" - ) - - analysis_obj <- do.call( - setup_analysis_primary, - c(common_args, list(method_weighting_obj = method_obj)) - ) - - res <- run_analysis(analysis_obj) - - # Return structure - - expect_type(res, "list") - expect_named(res, c("results", "borrow_weight")) - expect_s3_class(res$results, "data.frame") - expect_equal(nrow(res$results), 2) - - # Point estimates (tau1, tau2) - expect_equal(res$results$point_estimates[1], -0.0280870419, tolerance = tol) - expect_equal(res$results$point_estimates[2], 0.4095955812, tolerance = tol) - - # Standard deviations - expect_equal(res$results$standard_deviation[1], 0.5367986594, tolerance = tol) - expect_equal(res$results$standard_deviation[2], 0.5625763260, tolerance = tol) - - # Normal CIs - expect_equal(res$results$lower_CI_normal[1], -1.0801930814, tolerance = tol) - expect_equal(res$results$lower_CI_normal[2], -0.6930337563, tolerance = tol) - expect_equal(res$results$upper_CI_normal[1], 1.0240189975, tolerance = tol) - expect_equal(res$results$upper_CI_normal[2], 1.5122249187, tolerance = tol) - - # Borrow weight - expect_equal(res$borrow_weight, 0) -}) - -# ============================================================================= -# Section 2.1 IPW – data-adaptive optimal weight -# ============================================================================= -test_that("Primary IPW optimal-weight results match vignette", { - method_obj <- setup_method_weighting( - method_name = "IPW", - optimal_weight_flag = TRUE, - model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" - ) - - analysis_obj <- do.call( - setup_analysis_primary, - c(common_args, list(method_weighting_obj = method_obj)) - ) - - res <- run_analysis(analysis_obj) - - # Point estimates - expect_equal(res$results$point_estimates[1], -0.1971968926, tolerance = tol) - expect_equal(res$results$point_estimates[2], 0.4697208867, tolerance = tol) - - # Standard deviations - expect_equal(res$results$standard_deviation[1], 0.5134017502, tolerance = tol) - expect_equal(res$results$standard_deviation[2], 0.5410007056, tolerance = tol) - - # Normal CIs - expect_equal(res$results$lower_CI_normal[1], -1.2034458325, tolerance = tol) - expect_equal(res$results$lower_CI_normal[2], -0.5906210120, tolerance = tol) - expect_equal(res$results$upper_CI_normal[1], 0.8090520473, tolerance = tol) - expect_equal(res$results$upper_CI_normal[2], 1.5300627854, tolerance = tol) - - # Borrow weight - expect_equal(res$borrow_weight, 0.1475196487, tolerance = tol) -}) - -# ============================================================================= -# Section 2.2 AIPW – zero weight (no borrowing) -# ============================================================================= -test_that("Primary AIPW zero-weight results match vignette", { - method_obj <- setup_method_weighting( - method_name = "AIPW", - optimal_weight_flag = FALSE, - wt = 0, - model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", - model_form_mu0_ext = c( - "y1 ~ x1 + x2 + x3 + x4 + x5", - "y2 ~ x1 + x2 + x3 + x4 + x5" - ) - ) - - analysis_obj <- do.call( - setup_analysis_primary, - c(common_args, list(method_weighting_obj = method_obj)) - ) - - res <- run_analysis(analysis_obj) - - # Point estimates - expect_equal(res$results$point_estimates[1], -0.4361151144, tolerance = tol) - expect_equal(res$results$point_estimates[2], 0.4422248202, tolerance = tol) - - # Standard deviations - expect_equal(res$results$standard_deviation[1], 0.5552064946, tolerance = tol) - expect_equal(res$results$standard_deviation[2], 0.5701633244, tolerance = tol) - - # Normal CIs - expect_equal(res$results$lower_CI_normal[1], -1.5242998477, tolerance = tol) - expect_equal(res$results$lower_CI_normal[2], -0.6752747610, tolerance = tol) - expect_equal(res$results$upper_CI_normal[1], 0.6520696190, tolerance = tol) - expect_equal(res$results$upper_CI_normal[2], 1.5597244013, tolerance = tol) - - # Borrow weight - expect_equal(res$borrow_weight, 0) -}) - -# ============================================================================= -# Section 2.2 AIPW – data-adaptive optimal weight -# ============================================================================= -test_that("Primary AIPW optimal-weight results match vignette", { - method_obj <- setup_method_weighting( - method_name = "AIPW", - optimal_weight_flag = TRUE, - model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", - model_form_mu0_ext = c( - "y1 ~ x1 + x2 + x3 + x4 + x5", - "y2 ~ x1 + x2 + x3 + x4 + x5" - ) - ) - - analysis_obj <- do.call( - setup_analysis_primary, - c(common_args, list(method_weighting_obj = method_obj)) - ) - - res <- run_analysis(analysis_obj) - - # Point estimates - expect_equal(res$results$point_estimates[1], -0.5463256250, tolerance = tol) - expect_equal(res$results$point_estimates[2], 0.5401749583, tolerance = tol) - - # Standard deviations - expect_equal(res$results$standard_deviation[1], 0.5305614087, tolerance = tol) - expect_equal(res$results$standard_deviation[2], 0.5543275065, tolerance = tol) - - # Normal CIs - expect_equal(res$results$lower_CI_normal[1], -1.5862068777, tolerance = tol) - expect_equal(res$results$lower_CI_normal[2], -0.5462869900, tolerance = tol) - expect_equal(res$results$upper_CI_normal[1], 0.4935556277, tolerance = tol) - expect_equal(res$results$upper_CI_normal[2], 1.6266369066, tolerance = tol) - - # Borrow weight (same as IPW optimal since same propensity model) - expect_equal(res$borrow_weight, 0.1475196487, tolerance = tol) -}) - -# ============================================================================= -# Section 3 Bootstrap inference – point estimates remain identical -# ============================================================================= -test_that("Primary IPW bootstrap point estimates match non-bootstrap", { - bootstrap_obj <- setup_bootstrap(replicates = 50, bootstrap_CI_type = "perc") - - method_obj <- setup_method_weighting( - method_name = "IPW", - optimal_weight_flag = TRUE, - bootstrap_flag = TRUE, - bootstrap_obj = bootstrap_obj, - wt = 0, - model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" - ) - - analysis_obj <- do.call( - setup_analysis_primary, - c(common_args, list(method_weighting_obj = method_obj)) - ) - - set.seed(42) - res <- run_analysis(analysis_obj) - - # Point estimates must match the non-bootstrap run exactly - expect_equal(res$results$point_estimates[1], -0.1971968926, tolerance = tol) - expect_equal(res$results$point_estimates[2], 0.4697208867, tolerance = tol) - - # Standard deviations must match - expect_equal(res$results$standard_deviation[1], 0.5134017502, tolerance = tol) - expect_equal(res$results$standard_deviation[2], 0.5410007056, tolerance = tol) - - # Borrow weight must match - expect_equal(res$borrow_weight, 0.1475196487, tolerance = tol) - - # Bootstrap-specific columns exist - expect_true("lower_CI_boot" %in% names(res$results)) - expect_true("upper_CI_boot" %in% names(res$results)) -}) - -test_that("Primary AIPW bootstrap point estimates match non-bootstrap", { - bootstrap_obj <- setup_bootstrap(replicates = 50, bootstrap_CI_type = "perc") - - method_obj <- setup_method_weighting( - method_name = "AIPW", - optimal_weight_flag = TRUE, - wt = 0, - bootstrap_flag = TRUE, - bootstrap_obj = bootstrap_obj, - model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", - model_form_mu0_ext = c( - "y1 ~ x1 + x2 + x3 + x4 + x5", - "y2 ~ x1 + x2 + x3 + x4 + x5" - ) - ) - - analysis_obj <- do.call( - setup_analysis_primary, - c(common_args, list(method_weighting_obj = method_obj)) - ) - - set.seed(42) - res <- run_analysis(analysis_obj) - - # Point estimates must match the non-bootstrap AIPW optimal run - expect_equal(res$results$point_estimates[1], -0.5463256250, tolerance = tol) - expect_equal(res$results$point_estimates[2], 0.5401749583, tolerance = tol) - - expect_equal(res$results$standard_deviation[1], 0.5305614087, tolerance = tol) - expect_equal(res$results$standard_deviation[2], 0.5543275065, tolerance = tol) - - expect_equal(res$borrow_weight, 0.1475196487, tolerance = tol) -}) From 98f1aab2f6be851c96aaec9bba7a24f6da8aac7c Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Thu, 14 May 2026 12:14:29 -0700 Subject: [PATCH 07/16] rename test cases --- ...sults_OLE_simulation.R => test-full_pipeline_simulation_OLE.R} | 0 ...imary_simulation.R => test-full_pipeline_simulation_primary.R} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/testthat/{test-vignette_results_OLE_simulation.R => test-full_pipeline_simulation_OLE.R} (100%) rename tests/testthat/{test-vignette_results_primary_simulation.R => test-full_pipeline_simulation_primary.R} (100%) diff --git a/tests/testthat/test-vignette_results_OLE_simulation.R b/tests/testthat/test-full_pipeline_simulation_OLE.R similarity index 100% rename from tests/testthat/test-vignette_results_OLE_simulation.R rename to tests/testthat/test-full_pipeline_simulation_OLE.R diff --git a/tests/testthat/test-vignette_results_primary_simulation.R b/tests/testthat/test-full_pipeline_simulation_primary.R similarity index 100% rename from tests/testthat/test-vignette_results_primary_simulation.R rename to tests/testthat/test-full_pipeline_simulation_primary.R From 7a01956fa2ea7ee93032f3778787d5d7ca75c526 Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Thu, 14 May 2026 14:43:07 -0700 Subject: [PATCH 08/16] ec_ipw --- .claude/CLAUDE.md | 46 ++- DESCRIPTION | 1 + NAMESPACE | 2 + R/ec_ipw.R | 349 ++++++++++++++++++++ R/method_weighting_class.R | 1 + R/run_analysis.R | 10 + _pkgdown.yml | 10 +- man/ec_ipw.Rd | 50 +++ man/estimate.Rd | 52 +++ tests/testthat/test-full_pipeline_ec_aipw.R | 16 +- tests/testthat/test-full_pipeline_ec_ipw.R | 108 +++++- vignettes/primary_analysis_workflow.Rmd | 39 +-- 12 files changed, 634 insertions(+), 50 deletions(-) create mode 100644 R/ec_ipw.R create mode 100644 man/ec_ipw.Rd create mode 100644 man/estimate.Rd diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 0e6595b..8a6cc20 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -91,6 +91,18 @@ Fix spelling, grammar, and other minor problems without asking the user. Label a Only report what you have changed. +## Programming rules for LLMs +1. mostly lower-case comments +2. no in-line comments +3. don't number comments +4. comments are followed by '----', such as "# dependencies----". nothing that takes a full line. +5. no unnecessary code changes beyond what is already done +6. unless I ask, don't print too much code at once +7. don't do unnecessary print statements within the code. +8. don't add unnecessary try/catch statements or make unnecessary validation checks. i'm working by myself, no need for these things -- I want to see the errors! +9. NO EMOJIS +10. in R code, 2 spaces per tab and base R pipe + ## Refactor rules ### Per-function checklist @@ -205,15 +217,39 @@ run_analysis <- function(analysis_obj) { } ``` +### Interim dispatch (during migration) + +While new method constructors coexist with the old if/else tree in `run_analysis()`, each new class gets an `else if` block at the end of `run_analysis()` that calls `estimate()`: + +```r +# In run_analysis.R, BEFORE the final } else { stop(...) }: +} else if (is(method, "ec_ipw_method")) { + res <- estimate(method, + data = data, + outcomes = outcome_col_name, + treatment = treatment_col_name, + trial_status = trial_status_col_name, + covariates = covariates_col_name, + alpha = alpha, + quiet = quiet + ) +} +``` + +This pattern is repeated for each new method class as it's created. The old if/else branches for the legacy classes remain untouched. Once all 6 methods are migrated, the entire if/else tree is replaced with a single `estimate()` call. + +New method objects inherit from `method_primary_obj` or `method_OLE_obj`, so they pass the existing `checkmate::assert_class(method, "method_primary_obj")` validation in `setup_analysis_primary()`. + ### Implementation order -1. Create full-pipeline regression tests for all 6 methods (old API, locked numerical values) +1. Create full-pipeline regression tests for all 6 methods (old API, locked numerical values) ✓ 2. Create all 6 method constructors (start with `ec_ipw()`) 3. Each constructor returns an S4 object with estimation logic via `estimate()` generic -4. Add new-API tests to each pipeline test file (same expected values) -5. Refactor `setup_analysis()` into a single function (merge `_primary`/`_OLE`, add `T_cross = NULL`) -6. Refactor `run_analysis()` to use S4 dispatch -7. Deprecate `setup_method_weighting`, `setup_method_DID`, `setup_method_SCM`, `setup_analysis_primary`, `setup_analysis_OLE` +4. For each new method, add an `else if (is(method, "xxx_method"))` to `run_analysis()` +5. Add new-API tests to each pipeline test file (same expected values) +6. Once all 6 are done: refactor `setup_analysis()` into a single function (merge `_primary`/`_OLE`, add `T_cross = NULL`) +7. Once all 6 are done: replace the entire if/else in `run_analysis()` with one `estimate()` call +8. Deprecate `setup_method_weighting`, `setup_method_DID`, `setup_method_SCM`, `setup_analysis_primary`, `setup_analysis_OLE` ### Design decisions diff --git a/DESCRIPTION b/DESCRIPTION index 8ca589b..b18ecdb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -94,6 +94,7 @@ Collate: 'method_weighting_class.R' 'analysis_primary_class.R' 'data.R' + 'ec_ipw.R' 'package.R' 'rdborrow-package.R' 'run_analysis.R' diff --git a/NAMESPACE b/NAMESPACE index 8ae08f6..1e4ecef 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +export(ec_ipw) +export(estimate) export(run_analysis) export(run_simulation) export(setup_analysis_OLE) diff --git a/R/ec_ipw.R b/R/ec_ipw.R new file mode 100644 index 0000000..d2fe3a8 --- /dev/null +++ b/R/ec_ipw.R @@ -0,0 +1,349 @@ +#' @include method_class.R +NULL + +# class unions---- +setClassUnion("numericOrNULL", c("numeric", "NULL")) + +# s4 class definition---- +.ec_ipw_method <- setClass( + "ec_ipw_method", + contains = "method_primary_obj", + slots = c( + ps_formula = "character", + weight = "numericOrNULL", + bootstrap = "numericOrNULL", + bootstrap_ci_type = "character" + ), + prototype = list( + method_name = "EC-IPW", + ps_formula = "", + weight = NULL, + bootstrap = NULL, + bootstrap_ci_type = "perc" + ) +) + +# constructor---- + +#' EC-IPW method constructor +#' +#' Creates a method object for IPW estimation with external control +#' borrowing (Zhou et al., 2024). Pass to \code{\link{setup_analysis_primary}} +#' and \code{\link{run_analysis}}. +#' +#' @param ps_formula Formula string for the propensity score model +#' predicting trial participation. The left-hand side is replaced +#' internally (e.g., \code{"S ~ x1 + x2 + x3"}). +#' @param weight Borrowing weight. \code{NULL} (default) for data-adaptive +#' optimal weight, \code{0} for RCT-only, or a value in (0, 1]. +#' @param bootstrap Number of bootstrap replicates, or \code{NULL} +#' (default) for sandwich variance with normal CIs. +#' @param bootstrap_ci_type Bootstrap CI type. Defaults to \code{"perc"} +#' when \code{bootstrap} is non-NULL. One of \code{"perc"}, \code{"bca"}, +#' \code{"norm"}, \code{"basic"}, or \code{"stud"}. +#' +#' @return An S4 object of class \code{ec_ipw_method}. +#' +#' @references +#' Zhou et al. (2024). Causal estimators for incorporating external +#' controls in randomized trials with longitudinal outcomes. +#' \emph{JRSS-A}. \doi{10.1093/jrsssa/qnae075} +#' +#' @export +#' +#' @examples +#' # Optimal weight, sandwich SE +#' ec_ipw(ps_formula = "S ~ x1 + x2 + x3 + x4 + x5") +#' +#' # No borrowing +#' ec_ipw(ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", weight = 0) +#' +#' # Fixed weight with bootstrap +#' ec_ipw( +#' ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", +#' weight = 0.3, +#' bootstrap = 500 +#' ) +ec_ipw <- function(ps_formula, + weight = NULL, + bootstrap = NULL, + bootstrap_ci_type = NULL) { + checkmate::assert_string(ps_formula) + checkmate::assert_number(weight, lower = 0, upper = 1, null.ok = TRUE) + checkmate::assert_count(bootstrap, positive = TRUE, null.ok = TRUE) + + if (is.null(bootstrap_ci_type)) { + bootstrap_ci_type <- if (!is.null(bootstrap)) "perc" else "perc" + } + checkmate::assert_choice( + bootstrap_ci_type, c("perc", "bca", "norm", "basic", "stud") + ) + + .ec_ipw_method( + ps_formula = ps_formula, + weight = weight, + bootstrap = bootstrap, + bootstrap_ci_type = bootstrap_ci_type, + method_name = "EC-IPW", + bootstrap_flag = !is.null(bootstrap), + bootstrap_obj = .bootstrap_obj( + replicates = bootstrap %||% 500L, + bootstrap_CI_type = bootstrap_ci_type + ) + ) +} + +# estimate() generic and method---- + +#' Run estimation for a method object +#' +#' @param method An S4 method object (e.g., from \code{\link{ec_ipw}}). +#' @param data Data frame with all subjects. +#' @param outcomes Character vector of outcome column names. +#' @param treatment Name of the treatment column. +#' @param trial_status Name of the trial participation column. +#' @param covariates Character vector of covariate column names. +#' @param alpha Significance level. +#' @param quiet Logical. Suppress output. +#' +#' @return A list with estimation results. +#' @export +setGeneric("estimate", function(method, data, outcomes, treatment, + trial_status, covariates, alpha = 0.05, + quiet = TRUE) { + standardGeneric("estimate") +}) + +#' @rdname estimate +setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, + treatment, trial_status, + covariates, alpha = 0.05, + quiet = TRUE) { + Y <- as.matrix(data[, outcomes, drop = FALSE]) + S <- data[[trial_status]] + A <- data[[treatment]] + n_time <- ncol(Y) + N <- nrow(data) + n <- sum(S) + m <- N - n + pi_S <- n / N + + ps_formula <- sub("^[^~]*~", paste0(trial_status, " ~"), method@ps_formula) + df <- data.frame(Y, S = S, A = A, data[, covariates, drop = FALSE]) + + if (!quiet) cat("Running EC-IPW estimator...\n") + + weight <- method@weight + use_borrowing <- is.null(weight) || weight > 0 + + if (!use_borrowing) { + result <- .ec_ipw_no_borrow(df, Y, S, A, n, n_time) + borrow_weight <- 0 + } else { + result <- .ec_ipw_borrow( + df, Y, S, A, n, m, N, pi_S, n_time, + ps_formula, weight + ) + borrow_weight <- result$borrow_weight + } + + tau <- result$tau + sd_tau <- result$sd_tau + cutoff <- qnorm(1 - alpha / 2) + + if (!is.null(method@bootstrap)) { + if (!quiet) cat("Running bootstrap inference...\n") + group_id <- as.integer(interaction(df$S, df$A, drop = TRUE)) + + boot_ci_type_long <- switch(method@bootstrap_ci_type, + norm = "normal", + bca = "bca", + stud = "student", + perc = "percent", + basic = "basic" + ) + + boot_out <- boot::boot( + data = df, + statistic = .ec_ipw_statistic, + outcomes = outcomes, + covariates = covariates, + ps_formula = ps_formula, + borrow_wt = borrow_weight, + R = method@bootstrap, + strata = group_id + ) + + lower_ci <- vapply(seq_len(n_time), \(i) { + ci <- boot::boot.ci(boot_out, + conf = 1 - alpha, + type = method@bootstrap_ci_type, index = i + ) + ci[[boot_ci_type_long]][4] + }, numeric(1)) + + upper_ci <- vapply(seq_len(n_time), \(i) { + ci <- boot::boot.ci(boot_out, + conf = 1 - alpha, + type = method@bootstrap_ci_type, index = i + ) + ci[[boot_ci_type_long]][5] + }, numeric(1)) + + sd_tau <- sqrt(diag(var(boot_out$t))) + + results <- data.frame( + point_estimates = tau, + standard_deviation = sd_tau, + lower_CI_boot = lower_ci, + upper_CI_boot = upper_ci, + row.names = paste0("tau", seq_len(n_time)) + ) + } else { + results <- data.frame( + point_estimates = tau, + standard_deviation = sd_tau, + lower_CI_normal = tau - sd_tau * cutoff, + upper_CI_normal = tau + sd_tau * cutoff, + row.names = paste0("tau", seq_len(n_time)) + ) + } + + list(results = results, borrow_weight = borrow_weight) +}) + +# internal helpers---- + +#' @noRd +.ec_ipw_no_borrow <- function(df, Y, S, A, n, n_time) { + rct <- df[df$S == 1, , drop = FALSE] + Y_rct <- Y[S == 1, , drop = FALSE] + pi_A <- sum(rct$A) / n + + potential <- (rct$A / pi_A + (1 - rct$A) / (1 - pi_A)) * Y_rct + mu1 <- colSums(potential[rct$A == 1, , drop = FALSE]) / n + mu0 <- colSums(potential[rct$A == 0, , drop = FALSE]) / n + tau <- mu1 - mu0 + + phi1 <- rct$A * sweep(Y_rct, 2, mu1) / pi_A + phi0 <- (1 - rct$A) * sweep(Y_rct, 2, mu0) / (1 - pi_A) + Phi <- cbind(phi1, phi0) + B <- crossprod(Phi) / n + + coef_mat <- cbind(diag(n_time), -diag(n_time)) + sd_tau <- sqrt(diag(coef_mat %*% B %*% t(coef_mat) / n)) + + list(tau = tau, sd_tau = sd_tau) +} + +#' @noRd +.ec_ipw_borrow <- function(df, Y, S, A, n, m, N, pi_S, n_time, + ps_formula, weight) { + ps_model <- glm(as.formula(ps_formula), data = df, family = "binomial") + pi_SX <- predict(ps_model, newdata = df, type = "response") + pi_A <- sum(A[S == 1]) / n + rx <- (pi_SX / (1 - pi_SX)) * ((1 - pi_S) / pi_S) + + w11 <- pi_A + w10 <- 1 - pi_A + w00 <- rx + + potential <- (S * A / w11 + S * (1 - A) / w10 + (1 - S) * w00) * Y + mu1 <- colSums(potential[S == 1 & A == 1, , drop = FALSE]) / + sum(S * A / w11) + mu10 <- colSums(potential[S == 1 & A == 0, , drop = FALSE]) / + sum(S * (1 - A) / w10) + mu00 <- colSums(potential[S == 0, , drop = FALSE]) / + sum((1 - S) * w00) + + # Optimal weight (Eq. 11 from Zhou et al. 2024) + num <- sum(S * (1 - A) / w10^2 / (sum(S * (1 - A) / w10))^2) + denom <- sum((1 - S) * w00^2 / (sum((1 - S) * w00))^2) + w_opt <- num / (num + denom) + + borrow_weight <- if (is.null(weight)) w_opt else weight + + mu0 <- (1 - borrow_weight) * mu10 + borrow_weight * mu00 + tau <- mu1 - mu0 + + # Sandwich variance + X_model <- model.matrix(ps_model) + A33 <- diag( + rep(-mean((1 - S) * w00 / (1 - pi_S)), n_time), + nrow = n_time + ) + A34 <- t((1 - S) * pi_SX / (pi_S * (1 - pi_SX)) * + sweep(Y, 2, mu00)) %*% X_model / N + A44 <- t(X_model) %*% diag(-pi_SX * (1 - pi_SX)) %*% X_model / N + + n_ps <- ncol(X_model) + block_dim <- n_time + n_time + n_time + n_ps + A_mat <- matrix(0, nrow = block_dim, ncol = block_dim) + A_mat[1:n_time, 1:n_time] <- diag(-1, n_time) + idx2 <- (n_time + 1):(2 * n_time) + A_mat[idx2, idx2] <- diag(-1, n_time) + idx3 <- (2 * n_time + 1):(3 * n_time) + A_mat[idx3, idx3] <- A33 + idx4 <- (3 * n_time + 1):block_dim + A_mat[idx4, idx4] <- A44 + A_mat[idx3, idx4] <- A34 + + phi1 <- S * A * sweep(Y, 2, mu1) / pi_A / pi_S + phi2 <- S * (1 - A) * sweep(Y, 2, mu10) / (1 - pi_A) / pi_S + phi3 <- (1 - S) * sweep(Y, 2, mu00) * w00 / (1 - pi_S) + phi_ps <- (S - pi_SX) * X_model + Phi <- cbind(phi1, phi2, phi3, phi_ps) + B <- crossprod(Phi) / N + + A_inv <- solve(A_mat) + sigma <- A_inv %*% B %*% t(A_inv) + + coef_mat <- cbind( + diag(n_time), + -(1 - borrow_weight) * diag(n_time), + -borrow_weight * diag(n_time), + matrix(0, nrow = n_time, ncol = n_ps) + ) + sd_tau <- sqrt(diag(coef_mat %*% sigma %*% t(coef_mat) / N)) + + list(tau = tau, sd_tau = sd_tau, borrow_weight = borrow_weight) +} + +#' @noRd +.ec_ipw_statistic <- function(data, indices, outcomes, covariates, + ps_formula, borrow_wt) { + d <- data[indices, , drop = FALSE] + Y <- as.matrix(d[, outcomes, drop = FALSE]) + S <- d$S + A <- d$A + n <- sum(S) + N <- nrow(d) + pi_S <- n / N + + if (borrow_wt == 0) { + Y_rct <- Y[S == 1, , drop = FALSE] + A_rct <- A[S == 1] + pi_A <- sum(A_rct) / n + potential <- (A_rct / pi_A + (1 - A_rct) / (1 - pi_A)) * Y_rct + mu1 <- colSums(potential[A_rct == 1, , drop = FALSE]) / n + mu0 <- colSums(potential[A_rct == 0, , drop = FALSE]) / n + return(mu1 - mu0) + } + + ps_model <- glm(as.formula(ps_formula), data = d, family = "binomial") + pi_SX <- predict(ps_model, newdata = d, type = "response") + pi_A <- sum(A[S == 1]) / n + rx <- (pi_SX / (1 - pi_SX)) * ((1 - pi_S) / pi_S) + + potential <- (S * A / pi_A + S * (1 - A) / (1 - pi_A) + + (1 - S) * rx) * Y + mu1 <- colSums(potential[S == 1 & A == 1, , drop = FALSE]) / + sum(S * A / pi_A) + mu10 <- colSums(potential[S == 1 & A == 0, , drop = FALSE]) / + sum(S * (1 - A) / (1 - pi_A)) + mu00 <- colSums(potential[S == 0, , drop = FALSE]) / + sum((1 - S) * rx) + + mu0 <- (1 - borrow_wt) * mu10 + borrow_wt * mu00 + mu1 - mu0 +} diff --git a/R/method_weighting_class.R b/R/method_weighting_class.R index 186a4b1..fa89c8d 100644 --- a/R/method_weighting_class.R +++ b/R/method_weighting_class.R @@ -71,6 +71,7 @@ setup_method_weighting <- function(method_name = "IPW", model_form_piA = "", model_form_mu0_rct = "", model_form_mu1_rct = "") { + .Deprecated("ec_ipw") .validate_method_base(method_name, bootstrap_flag, bootstrap_obj) checkmate::assert_flag(optimal_weight_flag) checkmate::assert_number(wt) diff --git a/R/run_analysis.R b/R/run_analysis.R index 80a56d7..5290ee3 100644 --- a/R/run_analysis.R +++ b/R/run_analysis.R @@ -184,6 +184,16 @@ run_analysis <- function(analysis_obj, quiet = TRUE) { } else { stop("No such method is defined!") } + } else if (is(method, "ec_ipw_method")) { + res <- estimate(method, + data = data, + outcomes = outcome_col_name, + treatment = treatment_col_name, + trial_status = trial_status_col_name, + covariates = covariates_col_name, + alpha = alpha, + quiet = quiet + ) } else { stop("No such method type is defined!") } diff --git a/_pkgdown.yml b/_pkgdown.yml index 5a998a1..4e880dd 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -47,8 +47,14 @@ reference: - run_simulation - setup_simulation_report - - title: Method Configuration - desc: Configure estimation methods + - title: Estimators + desc: Method constructors for estimation + contents: + - ec_ipw + - estimate + + - title: Method Configuration (legacy) + desc: Configure estimation methods (deprecated) contents: - setup_method_weighting - setup_method_DID diff --git a/man/ec_ipw.Rd b/man/ec_ipw.Rd new file mode 100644 index 0000000..076ad18 --- /dev/null +++ b/man/ec_ipw.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ec_ipw.R +\name{ec_ipw} +\alias{ec_ipw} +\title{EC-IPW method constructor} +\usage{ +ec_ipw(ps_formula, weight = NULL, bootstrap = NULL, bootstrap_ci_type = NULL) +} +\arguments{ +\item{ps_formula}{Formula string for the propensity score model +predicting trial participation. The left-hand side is replaced +internally (e.g., \code{"S ~ x1 + x2 + x3"}).} + +\item{weight}{Borrowing weight. \code{NULL} (default) for data-adaptive +optimal weight, \code{0} for RCT-only, or a value in (0, 1].} + +\item{bootstrap}{Number of bootstrap replicates, or \code{NULL} +(default) for sandwich variance with normal CIs.} + +\item{bootstrap_ci_type}{Bootstrap CI type. Defaults to \code{"perc"} +when \code{bootstrap} is non-NULL. One of \code{"perc"}, \code{"bca"}, +\code{"norm"}, \code{"basic"}, or \code{"stud"}.} +} +\value{ +An S4 object of class \code{ec_ipw_method}. +} +\description{ +Creates a method object for IPW estimation with external control +borrowing (Zhou et al., 2024). Pass to \code{\link{setup_analysis_primary}} +and \code{\link{run_analysis}}. +} +\examples{ +# Optimal weight, sandwich SE +ec_ipw(ps_formula = "S ~ x1 + x2 + x3 + x4 + x5") + +# No borrowing +ec_ipw(ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", weight = 0) + +# Fixed weight with bootstrap +ec_ipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + weight = 0.3, + bootstrap = 500 +) +} +\references{ +Zhou et al. (2024). Causal estimators for incorporating external +controls in randomized trials with longitudinal outcomes. +\emph{JRSS-A}. \doi{10.1093/jrsssa/qnae075} +} diff --git a/man/estimate.Rd b/man/estimate.Rd new file mode 100644 index 0000000..d6e76d7 --- /dev/null +++ b/man/estimate.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ec_ipw.R +\name{estimate} +\alias{estimate} +\alias{estimate,ec_ipw_method-method} +\title{Run estimation for a method object} +\usage{ +estimate( + method, + data, + outcomes, + treatment, + trial_status, + covariates, + alpha = 0.05, + quiet = TRUE +) + +\S4method{estimate}{ec_ipw_method}( + method, + data, + outcomes, + treatment, + trial_status, + covariates, + alpha = 0.05, + quiet = TRUE +) +} +\arguments{ +\item{method}{An S4 method object (e.g., from \code{\link{ec_ipw}}).} + +\item{data}{Data frame with all subjects.} + +\item{outcomes}{Character vector of outcome column names.} + +\item{treatment}{Name of the treatment column.} + +\item{trial_status}{Name of the trial participation column.} + +\item{covariates}{Character vector of covariate column names.} + +\item{alpha}{Significance level.} + +\item{quiet}{Logical. Suppress output.} +} +\value{ +A list with estimation results. +} +\description{ +Run estimation for a method object +} diff --git a/tests/testthat/test-full_pipeline_ec_aipw.R b/tests/testthat/test-full_pipeline_ec_aipw.R index d172c3d..119cda0 100644 --- a/tests/testthat/test-full_pipeline_ec_aipw.R +++ b/tests/testthat/test-full_pipeline_ec_aipw.R @@ -1,7 +1,7 @@ tol <- 1e-6 test_that("EC-AIPW zero weight (no borrowing)", { - method <- setup_method_weighting( + method <- suppressWarnings(setup_method_weighting( method_name = "AIPW", optimal_weight_flag = FALSE, wt = 0, @@ -10,7 +10,7 @@ test_that("EC-AIPW zero weight (no borrowing)", { "y1 ~ x1 + x2 + x3 + x4 + x5", "y2 ~ x1 + x2 + x3 + x4 + x5" ) - ) + )) analysis <- setup_analysis_primary( data = SyntheticData, trial_status_col_name = "S", @@ -33,7 +33,7 @@ test_that("EC-AIPW zero weight (no borrowing)", { }) test_that("EC-AIPW optimal weight", { - method <- setup_method_weighting( + method <- suppressWarnings(setup_method_weighting( method_name = "AIPW", optimal_weight_flag = TRUE, model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5", @@ -41,7 +41,7 @@ test_that("EC-AIPW optimal weight", { "y1 ~ x1 + x2 + x3 + x4 + x5", "y2 ~ x1 + x2 + x3 + x4 + x5" ) - ) + )) analysis <- setup_analysis_primary( data = SyntheticData, trial_status_col_name = "S", @@ -64,7 +64,7 @@ test_that("EC-AIPW optimal weight", { }) test_that("EC-AIPW fixed weight 0.3", { - method <- setup_method_weighting( + method <- suppressWarnings(setup_method_weighting( method_name = "AIPW", optimal_weight_flag = FALSE, wt = 0.3, @@ -73,7 +73,7 @@ test_that("EC-AIPW fixed weight 0.3", { "y1 ~ x1 + x2 + x3 + x4 + x5", "y2 ~ x1 + x2 + x3 + x4 + x5" ) - ) + )) analysis <- setup_analysis_primary( data = SyntheticData, trial_status_col_name = "S", @@ -97,7 +97,7 @@ test_that("EC-AIPW fixed weight 0.3", { test_that("EC-AIPW bootstrap preserves point estimates", { bootstrap_obj <- setup_bootstrap(replicates = 50, bootstrap_CI_type = "perc") - method <- setup_method_weighting( + method <- suppressWarnings(setup_method_weighting( method_name = "AIPW", optimal_weight_flag = TRUE, wt = 0, @@ -108,7 +108,7 @@ test_that("EC-AIPW bootstrap preserves point estimates", { "y1 ~ x1 + x2 + x3 + x4 + x5", "y2 ~ x1 + x2 + x3 + x4 + x5" ) - ) + )) analysis <- setup_analysis_primary( data = SyntheticData, trial_status_col_name = "S", diff --git a/tests/testthat/test-full_pipeline_ec_ipw.R b/tests/testthat/test-full_pipeline_ec_ipw.R index a17cf2f..45d8934 100644 --- a/tests/testthat/test-full_pipeline_ec_ipw.R +++ b/tests/testthat/test-full_pipeline_ec_ipw.R @@ -1,12 +1,22 @@ tol <- 1e-6 +test_that("setup_method_weighting emits deprecation warning", { + expect_warning( + setup_method_weighting( + method_name = "IPW", + model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" + ), + "deprecated" + ) +}) + test_that("EC-IPW zero weight (no borrowing)", { - method <- setup_method_weighting( + method <- suppressWarnings(setup_method_weighting( method_name = "IPW", optimal_weight_flag = FALSE, wt = 0, model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" - ) + )) analysis <- setup_analysis_primary( data = SyntheticData, trial_status_col_name = "S", @@ -31,11 +41,11 @@ test_that("EC-IPW zero weight (no borrowing)", { }) test_that("EC-IPW optimal weight", { - method <- setup_method_weighting( + method <- suppressWarnings(setup_method_weighting( method_name = "IPW", optimal_weight_flag = TRUE, model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" - ) + )) analysis <- setup_analysis_primary( data = SyntheticData, trial_status_col_name = "S", @@ -58,12 +68,12 @@ test_that("EC-IPW optimal weight", { }) test_that("EC-IPW fixed weight 0.3", { - method <- setup_method_weighting( + method <- suppressWarnings(setup_method_weighting( method_name = "IPW", optimal_weight_flag = FALSE, wt = 0.3, model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" - ) + )) analysis <- setup_analysis_primary( data = SyntheticData, trial_status_col_name = "S", @@ -87,14 +97,14 @@ test_that("EC-IPW fixed weight 0.3", { test_that("EC-IPW bootstrap preserves point estimates", { bootstrap_obj <- setup_bootstrap(replicates = 50, bootstrap_CI_type = "perc") - method <- setup_method_weighting( + method <- suppressWarnings(setup_method_weighting( method_name = "IPW", optimal_weight_flag = TRUE, bootstrap_flag = TRUE, bootstrap_obj = bootstrap_obj, wt = 0, model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" - ) + )) analysis <- setup_analysis_primary( data = SyntheticData, trial_status_col_name = "S", @@ -117,3 +127,85 @@ test_that("EC-IPW bootstrap preserves point estimates", { c("point_estimates", "standard_deviation", "lower_CI_boot", "upper_CI_boot") ) }) + +# new API---- + +test_that("ec_ipw() optimal weight matches old API", { + method <- ec_ipw(ps_formula = "S ~ x1 + x2 + x3 + x4 + x5") + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + res <- run_analysis(analysis) + + expect_equal(res$borrow_weight, 0.1475196487, tolerance = tol) + expect_equal(res$results$point_estimates[1], -0.1971968926, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.4697208867, tolerance = tol) + expect_equal(res$results$standard_deviation[1], 0.5134017502, tolerance = tol) + expect_equal(res$results$standard_deviation[2], 0.5410007056, tolerance = tol) +}) + +test_that("ec_ipw() weight=0 matches old API", { + method <- ec_ipw(ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", weight = 0) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + res <- run_analysis(analysis) + + expect_equal(res$borrow_weight, 0) + expect_equal(res$results$point_estimates[1], -0.0280870419, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.4095955812, tolerance = tol) +}) + +test_that("ec_ipw() weight=0.3 matches old API", { + method <- ec_ipw(ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", weight = 0.3) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + res <- run_analysis(analysis) + + expect_equal(res$borrow_weight, 0.3) + expect_equal(res$results$point_estimates[1], -0.3719934684, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.5318680501, tolerance = tol) +}) + +test_that("ec_ipw() with bootstrap", { + method <- ec_ipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + bootstrap = 50, + bootstrap_ci_type = "perc" + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + + set.seed(42) + res <- run_analysis(analysis) + + expect_equal(res$results$point_estimates[1], -0.1971968926, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.4697208867, tolerance = tol) + expect_equal(res$borrow_weight, 0.1475196487, tolerance = tol) + expect_named( + res$results, + c("point_estimates", "standard_deviation", "lower_CI_boot", "upper_CI_boot") + ) +}) diff --git a/vignettes/primary_analysis_workflow.Rmd b/vignettes/primary_analysis_workflow.Rmd index 081d49c..52d9848 100644 --- a/vignettes/primary_analysis_workflow.Rmd +++ b/vignettes/primary_analysis_workflow.Rmd @@ -38,11 +38,9 @@ head(SyntheticData) ```{r} # test: within trial ## Data argument + column names (coxph, glm) -method_weighting_obj <- setup_method_weighting( - method_name = "IPW", - optimal_weight_flag = FALSE, - wt = 0, - model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" +method <- ec_ipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + weight = 0 ) analysis_primary_obj <- setup_analysis_primary( @@ -51,7 +49,7 @@ analysis_primary_obj <- setup_analysis_primary( treatment_col_name = "A", outcome_col_name = c("y1", "y2"), covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), - method_weighting_obj = method_weighting_obj + method_weighting_obj = method ) res <- run_analysis(analysis_primary_obj) @@ -61,11 +59,7 @@ res 2) IPW with data-adaptive weight: ```{r} # test: within trial -method_weighting_obj <- setup_method_weighting( - method_name = "IPW", - optimal_weight_flag = TRUE, - model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" -) +method <- ec_ipw(ps_formula = "S ~ x1 + x2 + x3 + x4 + x5") analysis_primary_obj <- setup_analysis_primary( data = SyntheticData, @@ -73,7 +67,7 @@ analysis_primary_obj <- setup_analysis_primary( treatment_col_name = "A", outcome_col_name = c("y1", "y2"), covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), - method_weighting_obj = method_weighting_obj + method_weighting_obj = method ) res <- run_analysis(analysis_primary_obj) @@ -149,20 +143,11 @@ In this section we present Bootstrap inference results. We report Bootstrap conf 1) IPW with bootstrap CI ```{r} # test: within trial -## Data argument + column names (coxph, glm) -## having a bootstrap_flag outside the class -bootstrap_obj <- setup_bootstrap( - replicates = 50, - bootstrap_CI_type = "perc" -) - -method_weighting_obj <- setup_method_weighting( - method_name = "IPW", - optimal_weight_flag = TRUE, - bootstrap_flag = TRUE, - bootstrap_obj = bootstrap_obj, - wt = 0, - model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" +## bootstrap is now specified directly in the method constructor +method <- ec_ipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + bootstrap = 50, + bootstrap_ci_type = "perc" ) analysis_primary_obj <- setup_analysis_primary( @@ -171,7 +156,7 @@ analysis_primary_obj <- setup_analysis_primary( treatment_col_name = "A", outcome_col_name = c("y1", "y2"), covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), - method_weighting_obj = method_weighting_obj + method_weighting_obj = method ) res <- run_analysis(analysis_primary_obj) From f9e2c638ea19473ed0673f77f3e5d0eaa6455b15 Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Thu, 14 May 2026 15:01:50 -0700 Subject: [PATCH 09/16] aiptw --- DESCRIPTION | 1 + NAMESPACE | 1 + R/ec_aipw.R | 340 ++++++++++++++++++ R/run_analysis.R | 2 +- _pkgdown.yml | 1 + man/ec_aipw.Rd | 52 +++ man/estimate.Rd | 14 +- tests/testthat/test-analysis_OLE_class.R | 2 +- tests/testthat/test-analysis_primary_class.R | 8 +- tests/testthat/test-full_pipeline_ec_aipw.R | 106 ++++++ .../test-full_pipeline_simulation_primary.R | 4 +- tests/testthat/test-method_weighting_class.R | 18 +- 12 files changed, 531 insertions(+), 18 deletions(-) create mode 100644 R/ec_aipw.R create mode 100644 man/ec_aipw.Rd diff --git a/DESCRIPTION b/DESCRIPTION index b18ecdb..a1abad4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -95,6 +95,7 @@ Collate: 'analysis_primary_class.R' 'data.R' 'ec_ipw.R' + 'ec_aipw.R' 'package.R' 'rdborrow-package.R' 'run_analysis.R' diff --git a/NAMESPACE b/NAMESPACE index 1e4ecef..4919df1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(ec_aipw) export(ec_ipw) export(estimate) export(run_analysis) diff --git a/R/ec_aipw.R b/R/ec_aipw.R new file mode 100644 index 0000000..05efe44 --- /dev/null +++ b/R/ec_aipw.R @@ -0,0 +1,340 @@ +#' @include ec_ipw.R +NULL + +# s4 class definition---- +.ec_aipw_method <- setClass( + "ec_aipw_method", + contains = "method_primary_obj", + slots = c( + ps_formula = "character", + outcome_formula = "character", + weight = "numericOrNULL", + bootstrap = "numericOrNULL", + bootstrap_ci_type = "character" + ), + prototype = list( + method_name = "EC-AIPW", + ps_formula = "", + outcome_formula = "", + weight = NULL, + bootstrap = NULL, + bootstrap_ci_type = "perc" + ) +) + +# constructor---- + +#' EC-AIPW method constructor +#' +#' Creates a method object for augmented IPW estimation with external +#' control borrowing (Zhou et al., 2024). Augments the IPW estimator +#' with an outcome regression model for improved efficiency. Pass to +#' \code{\link{setup_analysis_primary}} and \code{\link{run_analysis}}. +#' +#' @param ps_formula Formula string for the propensity score model +#' predicting trial participation. +#' @param outcome_formula Character vector of outcome model formulas, +#' one per time point (e.g., \code{c("y1 ~ x1 + x2", "y2 ~ x1 + x2")}). +#' @param weight Borrowing weight. \code{NULL} (default) for data-adaptive +#' optimal weight, \code{0} for RCT-only, or a value in (0, 1]. +#' @param bootstrap Number of bootstrap replicates, or \code{NULL} +#' (default) for sandwich variance with normal CIs. +#' @param bootstrap_ci_type Bootstrap CI type. Defaults to \code{"perc"}. +#' +#' @return An S4 object of class \code{ec_aipw_method}. +#' +#' @references +#' Zhou et al. (2024). Causal estimators for incorporating external +#' controls in randomized trials with longitudinal outcomes. +#' \emph{JRSS-A}. \doi{10.1093/jrsssa/qnae075} +#' +#' @export +#' +#' @examples +#' ec_aipw( +#' ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", +#' outcome_formula = c( +#' "y1 ~ x1 + x2 + x3 + x4 + x5", +#' "y2 ~ x1 + x2 + x3 + x4 + x5" +#' ) +#' ) +ec_aipw <- function(ps_formula, + outcome_formula, + weight = NULL, + bootstrap = NULL, + bootstrap_ci_type = NULL) { + checkmate::assert_string(ps_formula) + checkmate::assert_character(outcome_formula, min.len = 1) + checkmate::assert_number(weight, lower = 0, upper = 1, null.ok = TRUE) + checkmate::assert_count(bootstrap, positive = TRUE, null.ok = TRUE) + + if (is.null(bootstrap_ci_type)) { + bootstrap_ci_type <- "perc" + } + checkmate::assert_choice( + bootstrap_ci_type, c("perc", "bca", "norm", "basic", "stud") + ) + + .ec_aipw_method( + ps_formula = ps_formula, + outcome_formula = outcome_formula, + weight = weight, + bootstrap = bootstrap, + bootstrap_ci_type = bootstrap_ci_type, + method_name = "EC-AIPW", + bootstrap_flag = !is.null(bootstrap), + bootstrap_obj = .bootstrap_obj( + replicates = bootstrap %||% 500L, + bootstrap_CI_type = bootstrap_ci_type + ) + ) +} + +# estimate() method---- + +#' @rdname estimate +setMethod("estimate", "ec_aipw_method", function(method, data, outcomes, + treatment, trial_status, + covariates, alpha = 0.05, + quiet = TRUE) { + Y <- as.matrix(data[, outcomes, drop = FALSE]) + S <- data[[trial_status]] + A <- data[[treatment]] + n_time <- ncol(Y) + N <- nrow(data) + n <- sum(S) + m <- N - n + pi_S <- n / N + + ps_formula <- sub("^[^~]*~", paste0(trial_status, " ~"), method@ps_formula) + df <- data.frame(Y, S = S, A = A, data[, covariates, drop = FALSE]) + + if (!quiet) cat("Running EC-AIPW estimator...\n") + + result <- .ec_aipw_estimate( + df, Y, S, A, n, m, N, pi_S, n_time, + ps_formula, method@outcome_formula, method@weight + ) + + tau <- result$tau + sd_tau <- result$sd_tau + borrow_weight <- result$borrow_weight + cutoff <- qnorm(1 - alpha / 2) + + if (!is.null(method@bootstrap)) { + if (!quiet) cat("Running bootstrap inference...\n") + group_id <- as.integer(interaction(df$S, df$A, drop = TRUE)) + + boot_ci_type_long <- switch(method@bootstrap_ci_type, + norm = "normal", + bca = "bca", + stud = "student", + perc = "percent", + basic = "basic" + ) + + boot_out <- boot::boot( + data = df, + statistic = .ec_aipw_statistic, + outcomes = outcomes, + covariates = covariates, + ps_formula = ps_formula, + outcome_formula = method@outcome_formula, + borrow_wt = borrow_weight, + R = method@bootstrap, + strata = group_id + ) + + lower_ci <- vapply(seq_len(n_time), \(i) { + ci <- boot::boot.ci(boot_out, + conf = 1 - alpha, + type = method@bootstrap_ci_type, index = i + ) + ci[[boot_ci_type_long]][4] + }, numeric(1)) + + upper_ci <- vapply(seq_len(n_time), \(i) { + ci <- boot::boot.ci(boot_out, + conf = 1 - alpha, + type = method@bootstrap_ci_type, index = i + ) + ci[[boot_ci_type_long]][5] + }, numeric(1)) + + sd_tau <- sqrt(diag(var(boot_out$t))) + + results <- data.frame( + point_estimates = tau, + standard_deviation = sd_tau, + lower_CI_boot = lower_ci, + upper_CI_boot = upper_ci, + row.names = paste0("tau", seq_len(n_time)) + ) + } else { + results <- data.frame( + point_estimates = tau, + standard_deviation = sd_tau, + lower_CI_normal = tau - sd_tau * cutoff, + upper_CI_normal = tau + sd_tau * cutoff, + row.names = paste0("tau", seq_len(n_time)) + ) + } + + list(results = results, borrow_weight = borrow_weight) +}) + +# internal helpers---- + +#' @noRd +.ec_aipw_estimate <- function(df, Y, S, A, n, m, N, pi_S, n_time, + ps_formula, outcome_formula, weight) { + ps_model <- glm(as.formula(ps_formula), data = df, family = "binomial") + pi_SX <- predict(ps_model, newdata = df, type = "response") + pi_A <- sum(A[S == 1]) / n + rx <- (pi_SX / (1 - pi_SX)) * ((1 - pi_S) / pi_S) + + # outcome regression models fitted on controls only + Y0_models <- lapply(outcome_formula, \(f) { + lm(as.formula(f), data = df[A == 0, , drop = FALSE]) + }) + + # predicted Y0 for all subjects + Y0 <- vapply(seq_len(n_time), \(t) { + predict(Y0_models[[t]], newdata = df) + }, numeric(N)) + + # residuals + Yr <- Y - Y0 + + w11 <- pi_A + w10 <- 1 - pi_A + w00 <- rx + + potential <- (S * A / w11 + S * (1 - A) / w10 + (1 - S) * w00) * Yr + mu1 <- colSums(potential[S == 1 & A == 1, , drop = FALSE]) / n + mu10 <- colSums(potential[S == 1 & A == 0, , drop = FALSE]) / n + mu00 <- colSums(potential[S == 0, , drop = FALSE]) / sum((1 - S) * w00) + + # optimal weight + num <- sum(S * (1 - A) / w10^2 / (sum(S * (1 - A) / w10))^2) + denom <- sum((1 - S) * w00^2 / (sum((1 - S) * w00))^2) + w_opt <- num / (num + denom) + borrow_weight <- if (is.null(weight)) w_opt else weight + + mu0 <- (1 - borrow_weight) * mu10 + borrow_weight * mu00 + tau <- mu1 - mu0 + + # sandwich variance + X_ps <- model.matrix(ps_model) + n_ps <- ncol(X_ps) + + # outcome model matrices (fit on full data for sandwich) + Y0_models_full <- lapply(outcome_formula, \(f) lm(as.formula(f), data = df)) + Y0_model_dims <- vapply(Y0_models_full, \(m) ncol(model.matrix(m)), integer(1)) + n_outcome <- sum(Y0_model_dims) + + A33 <- diag(rep(-mean((1 - S) * w00 / (1 - pi_S)), n_time), nrow = n_time) + A34 <- t((1 - S) * pi_SX / (pi_S * (1 - pi_SX)) * + sweep(Yr, 2, mu00)) %*% X_ps / N + A44 <- t(X_ps) %*% diag(-pi_SX * (1 - pi_SX)) %*% X_ps / N + + A0 <- as.matrix(Matrix::bdiag( + diag(-1, n_time), diag(-1, n_time), A33, A44 + )) + A0[ + (n_time + n_time + 1):(n_time + n_time + n_time), + (n_time + n_time + n_time + 1):(n_time + n_time + n_time + n_ps) + ] <- A34 + + # outcome model blocks + Phi1_gamma_list <- lapply(seq_len(n_time), \(t) { + Xm <- model.matrix(Y0_models_full[[t]]) + as.vector(-S * A / (pi_S * pi_A)) %*% Xm / N + }) + Phi1_gamma <- as.matrix(Matrix::bdiag(Phi1_gamma_list)) + + Phi2_gamma_list <- lapply(seq_len(n_time), \(t) { + Xm <- model.matrix(Y0_models_full[[t]]) + as.vector(-S * (1 - A) / (pi_S * (1 - pi_A))) %*% Xm / N + }) + Phi2_gamma <- as.matrix(Matrix::bdiag(Phi2_gamma_list)) + + Phi3_gamma_list <- lapply(seq_len(n_time), \(t) { + Xm <- model.matrix(Y0_models_full[[t]]) + as.vector(-(1 - S) * rx / (1 - pi_S)) %*% Xm / N + }) + Phi3_gamma <- as.matrix(Matrix::bdiag(Phi3_gamma_list)) + + Y0_gamma_list <- lapply(seq_len(n_time), \(t) { + Xm <- model.matrix(Y0_models_full[[t]]) + -t(Xm) %*% diag((1 - A) / (1 - mean(A))) %*% Xm / N + }) + Y0_gamma <- as.matrix(Matrix::bdiag(Y0_gamma_list)) + + A45 <- matrix(0, nrow = n_ps, ncol = n_outcome) + A51 <- matrix(0, nrow = n_outcome, ncol = n_time + n_time + n_time + n_ps) + + A_left <- rbind(A0, A51) + A_right <- rbind(Phi1_gamma, Phi2_gamma, Phi3_gamma, A45, Y0_gamma) + A_mat <- cbind(A_left, A_right) + + # meat + phi1 <- S * A * sweep(Yr, 2, mu1) / pi_A / pi_S + phi2 <- S * (1 - A) * sweep(Yr, 2, mu10) / (1 - pi_A) / pi_S + phi3 <- (1 - S) * sweep(Yr, 2, mu00) * w00 / (1 - pi_S) + phi_ps <- (S - pi_SX) * X_ps + phi_Y0 <- do.call(cbind, lapply(seq_len(n_time), \(t) { + Xm <- model.matrix(Y0_models_full[[t]]) + ((1 - A) / (1 - mean(A))) * (Yr[, t] * Xm) + })) + + Phi <- cbind(phi1, phi2, phi3, phi_ps, phi_Y0) + B <- crossprod(Phi) / N + + A_inv <- solve(A_mat) + sigma <- A_inv %*% B %*% t(A_inv) + + coef_mat <- cbind( + diag(n_time), + -(1 - borrow_weight) * diag(n_time), + -borrow_weight * diag(n_time), + matrix(0, nrow = n_time, ncol = n_ps + n_outcome) + ) + sd_tau <- sqrt(diag(coef_mat %*% sigma %*% t(coef_mat) / N)) + + list(tau = tau, sd_tau = sd_tau, borrow_weight = borrow_weight) +} + +#' @noRd +.ec_aipw_statistic <- function(data, indices, outcomes, covariates, + ps_formula, outcome_formula, borrow_wt) { + d <- data[indices, , drop = FALSE] + Y <- as.matrix(d[, outcomes, drop = FALSE]) + S <- d$S + A <- d$A + n <- sum(S) + N <- nrow(d) + n_time <- ncol(Y) + pi_S <- n / N + + ps_model <- glm(as.formula(ps_formula), data = d, family = "binomial") + pi_SX <- predict(ps_model, newdata = d, type = "response") + pi_A <- sum(A[S == 1]) / n + rx <- (pi_SX / (1 - pi_SX)) * ((1 - pi_S) / pi_S) + + Y0_models <- lapply(outcome_formula, \(f) { + lm(as.formula(f), data = d[A == 0, , drop = FALSE]) + }) + Y0 <- vapply(seq_len(n_time), \(t) { + predict(Y0_models[[t]], newdata = d) + }, numeric(N)) + Yr <- Y - Y0 + + potential <- (S * A / pi_A + S * (1 - A) / (1 - pi_A) + (1 - S) * rx) * Yr + mu1 <- colSums(potential[S == 1 & A == 1, , drop = FALSE]) / n + mu10 <- colSums(potential[S == 1 & A == 0, , drop = FALSE]) / n + mu00 <- colSums(potential[S == 0, , drop = FALSE]) / sum((1 - S) * rx) + + mu0 <- (1 - borrow_wt) * mu10 + borrow_wt * mu00 + mu1 - mu0 +} diff --git a/R/run_analysis.R b/R/run_analysis.R index 5290ee3..13222be 100644 --- a/R/run_analysis.R +++ b/R/run_analysis.R @@ -184,7 +184,7 @@ run_analysis <- function(analysis_obj, quiet = TRUE) { } else { stop("No such method is defined!") } - } else if (is(method, "ec_ipw_method")) { + } else if (is(method, "ec_ipw_method") || is(method, "ec_aipw_method")) { res <- estimate(method, data = data, outcomes = outcome_col_name, diff --git a/_pkgdown.yml b/_pkgdown.yml index 4e880dd..5431c13 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -51,6 +51,7 @@ reference: desc: Method constructors for estimation contents: - ec_ipw + - ec_aipw - estimate - title: Method Configuration (legacy) diff --git a/man/ec_aipw.Rd b/man/ec_aipw.Rd new file mode 100644 index 0000000..a7a9fae --- /dev/null +++ b/man/ec_aipw.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ec_aipw.R +\name{ec_aipw} +\alias{ec_aipw} +\title{EC-AIPW method constructor} +\usage{ +ec_aipw( + ps_formula, + outcome_formula, + weight = NULL, + bootstrap = NULL, + bootstrap_ci_type = NULL +) +} +\arguments{ +\item{ps_formula}{Formula string for the propensity score model +predicting trial participation.} + +\item{outcome_formula}{Character vector of outcome model formulas, +one per time point (e.g., \code{c("y1 ~ x1 + x2", "y2 ~ x1 + x2")}).} + +\item{weight}{Borrowing weight. \code{NULL} (default) for data-adaptive +optimal weight, \code{0} for RCT-only, or a value in (0, 1].} + +\item{bootstrap}{Number of bootstrap replicates, or \code{NULL} +(default) for sandwich variance with normal CIs.} + +\item{bootstrap_ci_type}{Bootstrap CI type. Defaults to \code{"perc"}.} +} +\value{ +An S4 object of class \code{ec_aipw_method}. +} +\description{ +Creates a method object for augmented IPW estimation with external +control borrowing (Zhou et al., 2024). Augments the IPW estimator +with an outcome regression model for improved efficiency. Pass to +\code{\link{setup_analysis_primary}} and \code{\link{run_analysis}}. +} +\examples{ +ec_aipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + outcome_formula = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ) +) +} +\references{ +Zhou et al. (2024). Causal estimators for incorporating external +controls in randomized trials with longitudinal outcomes. +\emph{JRSS-A}. \doi{10.1093/jrsssa/qnae075} +} diff --git a/man/estimate.Rd b/man/estimate.Rd index d6e76d7..abb18c5 100644 --- a/man/estimate.Rd +++ b/man/estimate.Rd @@ -1,8 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ec_ipw.R +% Please edit documentation in R/ec_ipw.R, R/ec_aipw.R \name{estimate} \alias{estimate} \alias{estimate,ec_ipw_method-method} +\alias{estimate,ec_aipw_method-method} \title{Run estimation for a method object} \usage{ estimate( @@ -26,6 +27,17 @@ estimate( alpha = 0.05, quiet = TRUE ) + +\S4method{estimate}{ec_aipw_method}( + method, + data, + outcomes, + treatment, + trial_status, + covariates, + alpha = 0.05, + quiet = TRUE +) } \arguments{ \item{method}{An S4 method object (e.g., from \code{\link{ec_ipw}}).} diff --git a/tests/testthat/test-analysis_OLE_class.R b/tests/testthat/test-analysis_OLE_class.R index f257cb3..ce8e2aa 100644 --- a/tests/testthat/test-analysis_OLE_class.R +++ b/tests/testthat/test-analysis_OLE_class.R @@ -42,7 +42,7 @@ test_that("setup_analysis_OLE validates method type", { treatment_col_name = "A", outcome_col_name = "y1", covariates_col_name = "x1", - method_OLE_obj = setup_method_weighting(), + method_OLE_obj = suppressWarnings(setup_method_weighting()), T_cross = 2 )) }) diff --git a/tests/testthat/test-analysis_primary_class.R b/tests/testthat/test-analysis_primary_class.R index fc53c2e..0e08e37 100644 --- a/tests/testthat/test-analysis_primary_class.R +++ b/tests/testthat/test-analysis_primary_class.R @@ -5,7 +5,7 @@ test_that("setup_analysis_primary returns valid object", { treatment_col_name = "A", outcome_col_name = c("y1", "y2"), covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), - method_weighting_obj = setup_method_weighting(method_name = "IPW") + method_weighting_obj = suppressWarnings(setup_method_weighting(method_name = "IPW")) ) expect_s4_class(obj, "analysis_primary_obj") expect_identical(obj@method_obj@method_name, "IPW") @@ -19,7 +19,7 @@ test_that("setup_analysis_primary inherits base validation", { treatment_col_name = "A", outcome_col_name = "y1", covariates_col_name = "x1", - method_weighting_obj = setup_method_weighting() + method_weighting_obj = suppressWarnings(setup_method_weighting()) )) expect_error(setup_analysis_primary( data = SyntheticData, @@ -27,7 +27,7 @@ test_that("setup_analysis_primary inherits base validation", { treatment_col_name = "A", outcome_col_name = "y1", covariates_col_name = "x1", - method_weighting_obj = setup_method_weighting() + method_weighting_obj = suppressWarnings(setup_method_weighting()) )) }) @@ -49,7 +49,7 @@ test_that("show method prints without error", { treatment_col_name = "A", outcome_col_name = c("y1", "y2"), covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), - method_weighting_obj = setup_method_weighting(method_name = "IPW") + method_weighting_obj = suppressWarnings(setup_method_weighting(method_name = "IPW")) ) expect_output(show(obj), "analysis_primary_obj") expect_output(show(obj), "IPW") diff --git a/tests/testthat/test-full_pipeline_ec_aipw.R b/tests/testthat/test-full_pipeline_ec_aipw.R index 119cda0..2fc5a19 100644 --- a/tests/testthat/test-full_pipeline_ec_aipw.R +++ b/tests/testthat/test-full_pipeline_ec_aipw.R @@ -131,3 +131,109 @@ test_that("EC-AIPW bootstrap preserves point estimates", { c("point_estimates", "standard_deviation", "lower_CI_boot", "upper_CI_boot") ) }) + +# new API---- + +test_that("ec_aipw() optimal weight matches old API", { + method <- ec_aipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + outcome_formula = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ) + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + res <- run_analysis(analysis) + + expect_equal(res$borrow_weight, 0.1475196487, tolerance = tol) + expect_equal(res$results$point_estimates[1], -0.5463256250, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.5401749583, tolerance = tol) + expect_equal(res$results$standard_deviation[1], 0.5305614087, tolerance = tol) + expect_equal(res$results$standard_deviation[2], 0.5543275065, tolerance = tol) +}) + +test_that("ec_aipw() weight=0 matches old API", { + method <- ec_aipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + outcome_formula = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ), + weight = 0 + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + res <- run_analysis(analysis) + + expect_equal(res$borrow_weight, 0) + expect_equal(res$results$point_estimates[1], -0.4361151144, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.4422248202, tolerance = tol) +}) + +test_that("ec_aipw() weight=0.3 matches old API", { + method <- ec_aipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + outcome_formula = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ), + weight = 0.3 + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + res <- run_analysis(analysis) + + expect_equal(res$borrow_weight, 0.3) + expect_equal(res$results$point_estimates[1], -0.6602422289, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.6414189052, tolerance = tol) +}) + +test_that("ec_aipw() with bootstrap", { + method <- ec_aipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + outcome_formula = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5" + ), + bootstrap = 50, + bootstrap_ci_type = "perc" + ) + analysis <- setup_analysis_primary( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + method_weighting_obj = method + ) + + set.seed(42) + res <- run_analysis(analysis) + + expect_equal(res$results$point_estimates[1], -0.5463256250, tolerance = tol) + expect_equal(res$results$point_estimates[2], 0.5401749583, tolerance = tol) + expect_equal(res$borrow_weight, 0.1475196487, tolerance = tol) + expect_named( + res$results, + c("point_estimates", "standard_deviation", "lower_CI_boot", "upper_CI_boot") + ) +}) diff --git a/tests/testthat/test-full_pipeline_simulation_primary.R b/tests/testthat/test-full_pipeline_simulation_primary.R index 065ca4d..e494017 100644 --- a/tests/testthat/test-full_pipeline_simulation_primary.R +++ b/tests/testthat/test-full_pipeline_simulation_primary.R @@ -121,7 +121,7 @@ generate_primary_sim_data <- function() { test_that("Primary simulation (parametric) report matches vignette", { sim_data <- generate_primary_sim_data() - method_obj_list <- list( + method_obj_list <- suppressWarnings(list( setup_method_weighting( method_name = "IPW", optimal_weight_flag = TRUE, model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5" @@ -146,7 +146,7 @@ test_that("Primary simulation (parametric) report matches vignette", { "y2 ~ x1 + x2 + x3 + x4 + x5" ) ) - ) + )) sim_obj <- setup_simulation_primary( data_matrix_list_null = sim_data$null, diff --git a/tests/testthat/test-method_weighting_class.R b/tests/testthat/test-method_weighting_class.R index 8167124..16dabe1 100644 --- a/tests/testthat/test-method_weighting_class.R +++ b/tests/testthat/test-method_weighting_class.R @@ -1,5 +1,5 @@ test_that("setup_method_weighting returns default object", { - obj <- setup_method_weighting() + obj <- suppressWarnings(setup_method_weighting()) expect_s4_class(obj, "method_weighting_obj") expect_identical(obj@method_name, "IPW") expect_identical(obj@optimal_weight_flag, FALSE) @@ -8,14 +8,14 @@ test_that("setup_method_weighting returns default object", { }) test_that("setup_method_weighting accepts valid arguments", { - obj <- setup_method_weighting( + obj <- suppressWarnings(setup_method_weighting( method_name = "AIPW", optimal_weight_flag = TRUE, wt = 0.5, bootstrap_flag = TRUE, bootstrap_obj = setup_bootstrap(), model_form_piS = "S ~ x1 + x2" - ) + )) expect_identical(obj@method_name, "AIPW") expect_identical(obj@optimal_weight_flag, TRUE) expect_identical(obj@wt, 0.5) @@ -23,13 +23,13 @@ test_that("setup_method_weighting accepts valid arguments", { }) test_that("setup_method_weighting inherits base validation", { - expect_error(setup_method_weighting(method_name = 123)) - expect_error(setup_method_weighting(bootstrap_flag = "yes")) - expect_error(setup_method_weighting(bootstrap_obj = list())) + expect_warning(expect_error(setup_method_weighting(method_name = 123))) + expect_warning(expect_error(setup_method_weighting(bootstrap_flag = "yes"))) + expect_warning(expect_error(setup_method_weighting(bootstrap_obj = list()))) }) test_that("setup_method_weighting validates weighting-specific args", { - expect_error(setup_method_weighting(optimal_weight_flag = "yes")) - expect_error(setup_method_weighting(wt = "heavy")) - expect_error(setup_method_weighting(model_form_piS = 123)) + expect_warning(expect_error(setup_method_weighting(optimal_weight_flag = "yes"))) + expect_warning(expect_error(setup_method_weighting(wt = "heavy"))) + expect_warning(expect_error(setup_method_weighting(model_form_piS = 123))) }) From 227a2ec2d93fda3ee709729ec505928e0785c216 Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Fri, 15 May 2026 05:32:33 -0700 Subject: [PATCH 10/16] wip updating methods --- DESCRIPTION | 4 + NAMESPACE | 4 + R/did_ec_aipw.R | 286 ++++++++++++++++++ R/did_ec_ipw.R | 256 ++++++++++++++++ R/did_ec_or.R | 262 ++++++++++++++++ R/ec_ipw.R | 17 +- R/run_analysis.R | 14 + R/scm_method.R | 124 ++++++++ _pkgdown.yml | 4 + man/did_ec_aipw.Rd | 55 ++++ man/did_ec_ipw.Rd | 46 +++ man/did_ec_or.Rd | 56 ++++ man/estimate.Rd | 69 +++-- man/scm.Rd | 51 ++++ .../testthat/test-full_pipeline_did_ec_aipw.R | 33 ++ .../testthat/test-full_pipeline_did_ec_ipw.R | 27 ++ tests/testthat/test-full_pipeline_did_ec_or.R | 34 +++ tests/testthat/test-full_pipeline_scm.R | 31 ++ 18 files changed, 1343 insertions(+), 30 deletions(-) create mode 100644 R/did_ec_aipw.R create mode 100644 R/did_ec_ipw.R create mode 100644 R/did_ec_or.R create mode 100644 R/scm_method.R create mode 100644 man/did_ec_aipw.Rd create mode 100644 man/did_ec_ipw.Rd create mode 100644 man/did_ec_or.Rd create mode 100644 man/scm.Rd diff --git a/DESCRIPTION b/DESCRIPTION index a1abad4..69a579b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -95,11 +95,15 @@ Collate: 'analysis_primary_class.R' 'data.R' 'ec_ipw.R' + 'did_ec_ipw.R' + 'did_ec_aipw.R' + 'did_ec_or.R' 'ec_aipw.R' 'package.R' 'rdborrow-package.R' 'run_analysis.R' 'run_simulation.R' + 'scm_method.R' 'simulate_X_copula.R' 'simulate_X_dct_mvnorm.R' 'simulate_X_mixture.R' diff --git a/NAMESPACE b/NAMESPACE index 4919df1..cbbe409 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,14 @@ # Generated by roxygen2: do not edit by hand +export(did_ec_aipw) +export(did_ec_ipw) +export(did_ec_or) export(ec_aipw) export(ec_ipw) export(estimate) export(run_analysis) export(run_simulation) +export(scm) export(setup_analysis_OLE) export(setup_analysis_primary) export(setup_bootstrap) diff --git a/R/did_ec_aipw.R b/R/did_ec_aipw.R new file mode 100644 index 0000000..45b1434 --- /dev/null +++ b/R/did_ec_aipw.R @@ -0,0 +1,286 @@ +#' @include did_ec_ipw.R +NULL + +# s4 class definition---- +.did_ec_aipw_method <- setClass( + "did_ec_aipw_method", + contains = "method_OLE_obj", + slots = c( + ps_formula = "character", + trt_formula = "character", + outcome_formula = "character", + bootstrap = "numericOrNULL", + bootstrap_ci_type = "character" + ), + prototype = list( + method_name = "DID-EC-AIPW", + ps_formula = "", + trt_formula = "", + outcome_formula = "", + bootstrap = NULL, + bootstrap_ci_type = "perc" + ) +) + +# constructor---- + +#' DID-EC-AIPW method constructor +#' +#' Creates a method object for difference-in-differences augmented IPW +#' estimation with external control borrowing for the open-label +#' extension phase (Zhou et al., 2024). +#' +#' @param ps_formula Formula string for the propensity score model +#' predicting trial participation. +#' @param trt_formula Formula string for the treatment assignment model, +#' or \code{""} for marginal probability. +#' @param outcome_formula Character vector of outcome model formulas, +#' one per time point. +#' @param bootstrap Number of bootstrap replicates. Defaults to 500. +#' @param bootstrap_ci_type Bootstrap CI type. Defaults to \code{"perc"}. +#' +#' @return An S4 object of class \code{did_ec_aipw_method}. +#' +#' @references +#' Zhou et al. (2024). Estimating treatment effect in randomized trial +#' after control to treatment crossover using external controls. +#' \emph{Journal of Biopharmaceutical Statistics}. +#' \doi{10.1080/10543406.2024.2444222} +#' +#' @export +#' +#' @examples +#' did_ec_aipw( +#' ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", +#' trt_formula = "A ~ x1 + x2 + x3 + x4 + x5", +#' outcome_formula = c( +#' "y1 ~ x1 + x2 + x3 + x4 + x5", +#' "y2 ~ x1 + x2 + x3 + x4 + x5", +#' "y3 ~ x1 + x2 + x3 + x4 + x5", +#' "y4 ~ x1 + x2 + x3 + x4 + x5" +#' ), +#' bootstrap = 500 +#' ) +did_ec_aipw <- function(ps_formula, + trt_formula = "", + outcome_formula, + bootstrap = 500L, + bootstrap_ci_type = NULL) { + checkmate::assert_string(ps_formula) + checkmate::assert_string(trt_formula) + checkmate::assert_character(outcome_formula, min.len = 1) + checkmate::assert_count(bootstrap, positive = TRUE) + + if (is.null(bootstrap_ci_type)) { + bootstrap_ci_type <- "perc" + } + checkmate::assert_choice( + bootstrap_ci_type, c("perc", "bca", "norm", "basic", "stud") + ) + + .did_ec_aipw_method( + ps_formula = ps_formula, + trt_formula = trt_formula, + outcome_formula = outcome_formula, + bootstrap = bootstrap, + bootstrap_ci_type = bootstrap_ci_type, + method_name = "DID-EC-AIPW", + bootstrap_flag = TRUE, + bootstrap_obj = .bootstrap_obj( + replicates = bootstrap, + bootstrap_CI_type = bootstrap_ci_type + ) + ) +} + +# estimate() method---- + +#' @rdname estimate +setMethod("estimate", "did_ec_aipw_method", function(method, data, outcomes, + treatment, trial_status, + covariates, alpha = 0.05, + quiet = TRUE, + T_cross) { + Y <- as.matrix(data[, outcomes, drop = FALSE]) + S <- data[[trial_status]] + A <- data[[treatment]] + n_time <- ncol(Y) + N <- nrow(data) + n <- sum(S) + pi_S <- n / N + + ps_formula <- sub("^[^~]*~", paste0(trial_status, " ~"), method@ps_formula) + trt_formula <- method@trt_formula + if (trt_formula != "") { + trt_formula <- sub("^[^~]*~", paste0(treatment, " ~"), trt_formula) + } + + df <- data.frame(Y, S = S, A = A, data[, covariates, drop = FALSE]) + + if (!quiet) cat("Running DID-EC-AIPW estimator...\n") + + result <- .did_ec_aipw_estimate( + df, Y, S, A, n, N, pi_S, n_time, + T_cross, ps_formula, trt_formula, + method@outcome_formula + ) + tau <- result$tau + + if (!quiet) cat("Running bootstrap inference...\n") + group_id <- as.integer(interaction(df$S, df$A, drop = TRUE)) + + boot_ci_type_long <- switch(method@bootstrap_ci_type, + norm = "normal", + bca = "bca", + stud = "student", + perc = "percent", + basic = "basic" + ) + + boot_out <- boot::boot( + data = df, + statistic = .did_ec_aipw_statistic, + outcomes = outcomes, + ps_formula = ps_formula, + trt_formula = trt_formula, + outcome_formula = method@outcome_formula, + T_cross = T_cross, + R = method@bootstrap, + strata = group_id + ) + + n_ole <- n_time - T_cross + + lower_ci <- vapply(seq_len(n_ole), \(i) { + ci <- boot::boot.ci(boot_out, + conf = 1 - alpha, + type = method@bootstrap_ci_type, index = i + ) + ci[[boot_ci_type_long]][4] + }, numeric(1)) + + upper_ci <- vapply(seq_len(n_ole), \(i) { + ci <- boot::boot.ci(boot_out, + conf = 1 - alpha, + type = method@bootstrap_ci_type, index = i + ) + ci[[boot_ci_type_long]][5] + }, numeric(1)) + + data.frame( + point_estimates = tau, + lower_CI_boot = lower_ci, + upper_CI_boot = upper_ci, + row.names = paste0("tau", (T_cross + 1):n_time) + ) +}) + +# internal helpers---- + +#' @noRd +.did_ec_aipw_estimate <- function(df, Y, S, A, n, N, pi_S, n_time, + T_cross, ps_formula, trt_formula, + outcome_formula) { + ps_model <- glm(as.formula(ps_formula), data = df, family = "binomial") + pi_SX <- predict(ps_model, newdata = df, type = "response") + + if (trt_formula == "") { + pi_AX <- sum(A[S == 1]) / n + } else { + trt_model <- glm(as.formula(trt_formula), + data = df[S == 1, , drop = FALSE], family = "binomial" + ) + pi_AX <- predict(trt_model, newdata = df, type = "response") + } + + # outcome models on external controls + Y0_models <- lapply(outcome_formula, \(f) { + lm(as.formula(f), data = df[S == 0, , drop = FALSE]) + }) + Y0 <- vapply(seq_len(n_time), \(t) { + predict(Y0_models[[t]], newdata = df) + }, numeric(N)) + Yr <- Y - Y0 + + rx <- pi_SX * (1 - pi_S) / (1 - pi_SX) / pi_S + w11 <- 1 / pi_AX + w10 <- 1 / (1 - pi_AX) + w00 <- rx + + potential <- (S * A * w11 / sum(S * A * w11) + + S * (1 - A) * w10 / sum(S * (1 - A) * w10) + + (1 - S) * w00 / sum((1 - S) * w00)) * Yr + + T_pc <- T_cross + mu_S1A1 <- colSums( + potential[S == 1 & A == 1, (T_pc + 1):n_time, drop = FALSE] + ) + mu_S0A0 <- colSums( + potential[S == 0, (T_pc + 1):n_time, drop = FALSE] + ) + bias <- sum(rowMeans( + potential[S == 1 & A == 0, 1:T_pc, drop = FALSE] + )) - sum(rowMeans( + potential[S == 0, 1:T_pc, drop = FALSE] + )) + + tau <- mu_S1A1 - mu_S0A0 - bias + list(tau = tau) +} + +#' @noRd +.did_ec_aipw_statistic <- function(data, indices, outcomes, ps_formula, + trt_formula, outcome_formula, T_cross) { + d <- data[indices, , drop = FALSE] + Y <- as.matrix(d[, outcomes, drop = FALSE]) + S <- d$S + A <- d$A + n <- sum(S) + N <- nrow(d) + n_time <- ncol(Y) + pi_S <- n / N + + ps_model <- glm(as.formula(ps_formula), data = d, family = "binomial") + pi_SX <- predict(ps_model, newdata = d, type = "response") + + if (trt_formula == "") { + pi_AX <- sum(A[S == 1]) / n + } else { + trt_model <- glm(as.formula(trt_formula), + data = d[S == 1, , drop = FALSE], family = "binomial" + ) + pi_AX <- predict(trt_model, newdata = d, type = "response") + } + + Y0_models <- lapply(outcome_formula, \(f) { + lm(as.formula(f), data = d[S == 0, , drop = FALSE]) + }) + Y0 <- vapply(seq_len(n_time), \(t) { + predict(Y0_models[[t]], newdata = d) + }, numeric(N)) + Yr <- Y - Y0 + + rx <- pi_SX * (1 - pi_S) / (1 - pi_SX) / pi_S + w11 <- 1 / pi_AX + w10 <- 1 / (1 - pi_AX) + w00 <- rx + + potential <- (S * A * w11 / sum(S * A * w11) + + S * (1 - A) * w10 / sum(S * (1 - A) * w10) + + (1 - S) * w00 / sum((1 - S) * w00)) * Yr + + T_pc <- T_cross + mu_S1A1 <- colSums( + potential[S == 1 & A == 1, (T_pc + 1):n_time, drop = FALSE] + ) + mu_S0A0 <- colSums( + potential[S == 0, (T_pc + 1):n_time, drop = FALSE] + ) + bias <- sum(rowMeans( + potential[S == 1 & A == 0, 1:T_pc, drop = FALSE] + )) - sum(rowMeans( + potential[S == 0, 1:T_pc, drop = FALSE] + )) + + mu_S1A1 - mu_S0A0 - bias +} diff --git a/R/did_ec_ipw.R b/R/did_ec_ipw.R new file mode 100644 index 0000000..570135f --- /dev/null +++ b/R/did_ec_ipw.R @@ -0,0 +1,256 @@ +#' @include ec_ipw.R +NULL + +# s4 class definition---- +.did_ec_ipw_method <- setClass( + "did_ec_ipw_method", + contains = "method_OLE_obj", + slots = c( + ps_formula = "character", + trt_formula = "character", + bootstrap = "numericOrNULL", + bootstrap_ci_type = "character" + ), + prototype = list( + method_name = "DID-EC-IPW", + ps_formula = "", + trt_formula = "", + bootstrap = NULL, + bootstrap_ci_type = "perc" + ) +) + +# constructor---- + +#' DID-EC-IPW method constructor +#' +#' Creates a method object for difference-in-differences IPW estimation +#' with external control borrowing for the open-label extension phase +#' (Zhou et al., 2024). +#' +#' @param ps_formula Formula string for the propensity score model +#' predicting trial participation. +#' @param trt_formula Formula string for the treatment assignment model, +#' or \code{""} for marginal probability. +#' @param bootstrap Number of bootstrap replicates (required for DID +#' methods). Defaults to 500. +#' @param bootstrap_ci_type Bootstrap CI type. Defaults to \code{"perc"}. +#' +#' @return An S4 object of class \code{did_ec_ipw_method}. +#' +#' @references +#' Zhou et al. (2024). Estimating treatment effect in randomized trial +#' after control to treatment crossover using external controls. +#' \emph{Journal of Biopharmaceutical Statistics}. +#' \doi{10.1080/10543406.2024.2444222} +#' +#' @export +#' +#' @examples +#' did_ec_ipw( +#' ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", +#' trt_formula = "A ~ x1 + x2 + x3 + x4 + x5", +#' bootstrap = 500 +#' ) +did_ec_ipw <- function(ps_formula, + trt_formula = "", + bootstrap = 500L, + bootstrap_ci_type = NULL) { + checkmate::assert_string(ps_formula) + checkmate::assert_string(trt_formula) + checkmate::assert_count(bootstrap, positive = TRUE) + + if (is.null(bootstrap_ci_type)) { + bootstrap_ci_type <- "perc" + } + checkmate::assert_choice( + bootstrap_ci_type, c("perc", "bca", "norm", "basic", "stud") + ) + + .did_ec_ipw_method( + ps_formula = ps_formula, + trt_formula = trt_formula, + bootstrap = bootstrap, + bootstrap_ci_type = bootstrap_ci_type, + method_name = "DID-EC-IPW", + bootstrap_flag = TRUE, + bootstrap_obj = .bootstrap_obj( + replicates = bootstrap, + bootstrap_CI_type = bootstrap_ci_type + ) + ) +} + +# estimate() method---- + +#' @rdname estimate +setMethod("estimate", "did_ec_ipw_method", function(method, data, outcomes, + treatment, trial_status, + covariates, alpha = 0.05, + quiet = TRUE, + T_cross) { + Y <- as.matrix(data[, outcomes, drop = FALSE]) + S <- data[[trial_status]] + A <- data[[treatment]] + n_time <- ncol(Y) + N <- nrow(data) + n <- sum(S) + pi_S <- n / N + + ps_formula <- sub("^[^~]*~", paste0(trial_status, " ~"), method@ps_formula) + trt_formula <- method@trt_formula + if (trt_formula != "") { + trt_formula <- sub("^[^~]*~", paste0(treatment, " ~"), trt_formula) + } + + df <- data.frame(Y, S = S, A = A, data[, covariates, drop = FALSE]) + + if (!quiet) cat("Running DID-EC-IPW estimator...\n") + + result <- .did_ec_ipw_estimate( + df, Y, S, A, n, N, pi_S, n_time, + T_cross, ps_formula, trt_formula + ) + tau <- result$tau + + if (!quiet) cat("Running bootstrap inference...\n") + group_id <- as.integer(interaction(df$S, df$A, drop = TRUE)) + + boot_ci_type_long <- switch(method@bootstrap_ci_type, + norm = "normal", + bca = "bca", + stud = "student", + perc = "percent", + basic = "basic" + ) + + boot_out <- boot::boot( + data = df, + statistic = .did_ec_ipw_statistic, + outcomes = outcomes, + ps_formula = ps_formula, + trt_formula = trt_formula, + T_cross = T_cross, + R = method@bootstrap, + strata = group_id + ) + + n_ole <- n_time - T_cross + + lower_ci <- vapply(seq_len(n_ole), \(i) { + ci <- boot::boot.ci(boot_out, + conf = 1 - alpha, + type = method@bootstrap_ci_type, index = i + ) + ci[[boot_ci_type_long]][4] + }, numeric(1)) + + upper_ci <- vapply(seq_len(n_ole), \(i) { + ci <- boot::boot.ci(boot_out, + conf = 1 - alpha, + type = method@bootstrap_ci_type, index = i + ) + ci[[boot_ci_type_long]][5] + }, numeric(1)) + + data.frame( + point_estimates = tau, + lower_CI_boot = lower_ci, + upper_CI_boot = upper_ci, + row.names = paste0("tau", (T_cross + 1):n_time) + ) +}) + +# internal helpers---- + +#' @noRd +.did_ec_ipw_estimate <- function(df, Y, S, A, n, N, pi_S, n_time, + T_cross, ps_formula, trt_formula) { + ps_model <- glm(as.formula(ps_formula), data = df, family = "binomial") + pi_SX <- predict(ps_model, newdata = df, type = "response") + + if (trt_formula == "") { + pi_AX <- sum(A[S == 1]) / n + } else { + trt_model <- glm(as.formula(trt_formula), + data = df[S == 1, , drop = FALSE], family = "binomial" + ) + pi_AX <- predict(trt_model, newdata = df, type = "response") + } + + rx <- pi_SX * (1 - pi_S) / (1 - pi_SX) / pi_S + + w11 <- 1 / pi_AX + w10 <- 1 / (1 - pi_AX) + w00 <- rx + + Ys <- as.matrix(Y) + potential <- (S * A * w11 / sum(S * A * w11) + + S * (1 - A) * w10 / sum(S * (1 - A) * w10) + + (1 - S) * w00 / sum((1 - S) * w00)) * Ys + + T_pc <- T_cross + mu_S1A1 <- colSums( + potential[S == 1 & A == 1, (T_pc + 1):n_time, drop = FALSE] + ) + mu_S0A0 <- colSums( + potential[S == 0, (T_pc + 1):n_time, drop = FALSE] + ) + bias <- sum(rowMeans( + potential[S == 1 & A == 0, 1:T_pc, drop = FALSE] + )) - sum(rowMeans( + potential[S == 0, 1:T_pc, drop = FALSE] + )) + + tau <- mu_S1A1 - mu_S0A0 - bias + list(tau = tau) +} + +#' @noRd +.did_ec_ipw_statistic <- function(data, indices, outcomes, ps_formula, + trt_formula, T_cross) { + d <- data[indices, , drop = FALSE] + Y <- as.matrix(d[, outcomes, drop = FALSE]) + S <- d$S + A <- d$A + n <- sum(S) + N <- nrow(d) + n_time <- ncol(Y) + pi_S <- n / N + + ps_model <- glm(as.formula(ps_formula), data = d, family = "binomial") + pi_SX <- predict(ps_model, newdata = d, type = "response") + + if (trt_formula == "") { + pi_AX <- sum(A[S == 1]) / n + } else { + trt_model <- glm(as.formula(trt_formula), + data = d[S == 1, , drop = FALSE], family = "binomial" + ) + pi_AX <- predict(trt_model, newdata = d, type = "response") + } + + rx <- pi_SX * (1 - pi_S) / (1 - pi_SX) / pi_S + w11 <- 1 / pi_AX + w10 <- 1 / (1 - pi_AX) + w00 <- rx + + potential <- (S * A * w11 / sum(S * A * w11) + + S * (1 - A) * w10 / sum(S * (1 - A) * w10) + + (1 - S) * w00 / sum((1 - S) * w00)) * Y + + T_pc <- T_cross + mu_S1A1 <- colSums( + potential[S == 1 & A == 1, (T_pc + 1):n_time, drop = FALSE] + ) + mu_S0A0 <- colSums( + potential[S == 0, (T_pc + 1):n_time, drop = FALSE] + ) + bias <- sum(rowMeans( + potential[S == 1 & A == 0, 1:T_pc, drop = FALSE] + )) - sum(rowMeans( + potential[S == 0, 1:T_pc, drop = FALSE] + )) + + mu_S1A1 - mu_S0A0 - bias +} diff --git a/R/did_ec_or.R b/R/did_ec_or.R new file mode 100644 index 0000000..aa5c2b4 --- /dev/null +++ b/R/did_ec_or.R @@ -0,0 +1,262 @@ +#' @include did_ec_ipw.R +NULL + +# s4 class definition---- +.did_ec_or_method <- setClass( + "did_ec_or_method", + contains = "method_OLE_obj", + slots = c( + outcome_formula_ext = "character", + outcome_formula_rct_ctrl = "character", + outcome_formula_rct_trt = "character", + bootstrap = "numericOrNULL", + bootstrap_ci_type = "character" + ), + prototype = list( + method_name = "DID-EC-OR", + outcome_formula_ext = "", + outcome_formula_rct_ctrl = "", + outcome_formula_rct_trt = "", + bootstrap = NULL, + bootstrap_ci_type = "perc" + ) +) + +# constructor---- + +#' DID-EC-OR method constructor +#' +#' Creates a method object for difference-in-differences outcome +#' regression estimation with external control borrowing for the +#' open-label extension phase (Zhou et al., 2024). +#' +#' @param outcome_formula_ext Character vector of outcome model formulas +#' for external controls, one per time point. +#' @param outcome_formula_rct_ctrl Character vector of outcome model +#' formulas for RCT control subjects, one per time point. +#' @param outcome_formula_rct_trt Character vector of outcome model +#' formulas for RCT treated subjects, one per time point. +#' @param bootstrap Number of bootstrap replicates. Defaults to 500. +#' @param bootstrap_ci_type Bootstrap CI type. Defaults to \code{"perc"}. +#' +#' @return An S4 object of class \code{did_ec_or_method}. +#' +#' @references +#' Zhou et al. (2024). Estimating treatment effect in randomized trial +#' after control to treatment crossover using external controls. +#' \emph{Journal of Biopharmaceutical Statistics}. +#' \doi{10.1080/10543406.2024.2444222} +#' +#' @export +#' +#' @examples +#' model_forms <- c( +#' "y1 ~ x1 + x2 + x3 + x4 + x5", +#' "y2 ~ x1 + x2 + x3 + x4 + x5", +#' "y3 ~ x1 + x2 + x3 + x4 + x5", +#' "y4 ~ x1 + x2 + x3 + x4 + x5" +#' ) +#' did_ec_or( +#' outcome_formula_ext = model_forms, +#' outcome_formula_rct_ctrl = model_forms, +#' outcome_formula_rct_trt = model_forms, +#' bootstrap = 500 +#' ) +did_ec_or <- function(outcome_formula_ext, + outcome_formula_rct_ctrl, + outcome_formula_rct_trt, + bootstrap = 500L, + bootstrap_ci_type = NULL) { + checkmate::assert_character(outcome_formula_ext, min.len = 1) + checkmate::assert_character(outcome_formula_rct_ctrl, min.len = 1) + checkmate::assert_character(outcome_formula_rct_trt, min.len = 1) + checkmate::assert_count(bootstrap, positive = TRUE) + + if (is.null(bootstrap_ci_type)) { + bootstrap_ci_type <- "perc" + } + checkmate::assert_choice( + bootstrap_ci_type, c("perc", "bca", "norm", "basic", "stud") + ) + + .did_ec_or_method( + outcome_formula_ext = outcome_formula_ext, + outcome_formula_rct_ctrl = outcome_formula_rct_ctrl, + outcome_formula_rct_trt = outcome_formula_rct_trt, + bootstrap = bootstrap, + bootstrap_ci_type = bootstrap_ci_type, + method_name = "DID-EC-OR", + bootstrap_flag = TRUE, + bootstrap_obj = .bootstrap_obj( + replicates = bootstrap, + bootstrap_CI_type = bootstrap_ci_type + ) + ) +} + +# estimate() method---- + +#' @rdname estimate +setMethod("estimate", "did_ec_or_method", function(method, data, outcomes, + treatment, trial_status, + covariates, alpha = 0.05, + quiet = TRUE, + T_cross) { + Y <- as.matrix(data[, outcomes, drop = FALSE]) + S <- data[[trial_status]] + A <- data[[treatment]] + n_time <- ncol(Y) + N <- nrow(data) + n <- sum(S) + + df <- data.frame(Y, S = S, A = A, data[, covariates, drop = FALSE]) + + if (!quiet) cat("Running DID-EC-OR estimator...\n") + + result <- .did_ec_or_estimate( + df, S, A, n, n_time, T_cross, + method@outcome_formula_ext, + method@outcome_formula_rct_ctrl, + method@outcome_formula_rct_trt + ) + tau <- result$tau + + if (!quiet) cat("Running bootstrap inference...\n") + group_id <- as.integer(interaction(df$S, df$A, drop = TRUE)) + + boot_ci_type_long <- switch(method@bootstrap_ci_type, + norm = "normal", + bca = "bca", + stud = "student", + perc = "percent", + basic = "basic" + ) + + boot_out <- boot::boot( + data = df, + statistic = .did_ec_or_statistic, + outcomes = outcomes, + outcome_formula_ext = method@outcome_formula_ext, + outcome_formula_rct_ctrl = method@outcome_formula_rct_ctrl, + outcome_formula_rct_trt = method@outcome_formula_rct_trt, + T_cross = T_cross, + R = method@bootstrap, + strata = group_id + ) + + n_ole <- n_time - T_cross + + lower_ci <- vapply(seq_len(n_ole), \(i) { + ci <- boot::boot.ci(boot_out, + conf = 1 - alpha, + type = method@bootstrap_ci_type, index = i + ) + ci[[boot_ci_type_long]][4] + }, numeric(1)) + + upper_ci <- vapply(seq_len(n_ole), \(i) { + ci <- boot::boot.ci(boot_out, + conf = 1 - alpha, + type = method@bootstrap_ci_type, index = i + ) + ci[[boot_ci_type_long]][5] + }, numeric(1)) + + data.frame( + point_estimates = tau, + lower_CI_boot = lower_ci, + upper_CI_boot = upper_ci, + row.names = paste0("tau", (T_cross + 1):n_time) + ) +}) + +# internal helpers---- + +#' @noRd +.did_ec_or_estimate <- function(df, S, A, n, n_time, T_cross, + outcome_formula_ext, + outcome_formula_rct_ctrl, + outcome_formula_rct_trt) { + T_pc <- T_cross + + # external outcome models + model_list_ext <- lapply(seq_len(n_time), \(t) { + lm(as.formula(outcome_formula_ext[t]), data = df[S == 0, , drop = FALSE]) + }) + + # rct outcome models: control for placebo period, treated for OLE period + model_list_rct_pc <- lapply(seq_len(T_pc), \(t) { + lm(as.formula(outcome_formula_rct_ctrl[t]), + data = df[S == 1 & A == 0, , drop = FALSE] + ) + }) + model_list_rct_cr <- lapply((T_pc + 1):n_time, \(t) { + lm(as.formula(outcome_formula_rct_trt[t]), + data = df[S == 1 & A == 1, , drop = FALSE] + ) + }) + model_list_rct <- c(model_list_rct_pc, model_list_rct_cr) + + rct_data <- df[S == 1, , drop = FALSE] + + # predicted outcomes for RCT subjects from external models + mu_S0A0 <- vapply(seq_len(n_time), \(t) { + predict(model_list_ext[[t]], newdata = rct_data) + }, numeric(n)) + avg_S0A0 <- colMeans(mu_S0A0) + + # predicted outcomes for RCT subjects from RCT models + mu_S1 <- vapply(seq_len(n_time), \(t) { + predict(model_list_rct[[t]], newdata = rct_data) + }, numeric(n)) + avg_S1 <- colMeans(mu_S1) + + tau <- (avg_S1[(T_pc + 1):n_time] - mean(avg_S1[1:T_pc])) - + (avg_S0A0[(T_pc + 1):n_time] - mean(avg_S0A0[1:T_pc])) + + list(tau = tau) +} + +#' @noRd +.did_ec_or_statistic <- function(data, indices, outcomes, + outcome_formula_ext, + outcome_formula_rct_ctrl, + outcome_formula_rct_trt, + T_cross) { + d <- data[indices, , drop = FALSE] + S <- d$S + A <- d$A + n <- sum(S) + n_time <- length(outcomes) + T_pc <- T_cross + + model_list_ext <- lapply(seq_len(n_time), \(t) { + lm(as.formula(outcome_formula_ext[t]), data = d[S == 0, , drop = FALSE]) + }) + model_list_rct_pc <- lapply(seq_len(T_pc), \(t) { + lm(as.formula(outcome_formula_rct_ctrl[t]), + data = d[S == 1 & A == 0, , drop = FALSE] + ) + }) + model_list_rct_cr <- lapply((T_pc + 1):n_time, \(t) { + lm(as.formula(outcome_formula_rct_trt[t]), + data = d[S == 1 & A == 1, , drop = FALSE] + ) + }) + model_list_rct <- c(model_list_rct_pc, model_list_rct_cr) + + rct_data <- d[S == 1, , drop = FALSE] + + mu_S0A0 <- vapply(seq_len(n_time), \(t) { + predict(model_list_ext[[t]], newdata = rct_data) + }, numeric(n)) + avg_S0A0 <- colMeans(mu_S0A0) + + mu_S1 <- vapply(seq_len(n_time), \(t) { + predict(model_list_rct[[t]], newdata = rct_data) + }, numeric(n)) + avg_S1 <- colMeans(mu_S1) + + (avg_S1[(T_pc + 1):n_time] - mean(avg_S1[1:T_pc])) - + (avg_S0A0[(T_pc + 1):n_time] - mean(avg_S0A0[1:T_pc])) +} diff --git a/R/ec_ipw.R b/R/ec_ipw.R index d2fe3a8..82e149f 100644 --- a/R/ec_ipw.R +++ b/R/ec_ipw.R @@ -97,22 +97,15 @@ ec_ipw <- function(ps_formula, #' Run estimation for a method object #' +#' S4 generic that dispatches to the appropriate estimation logic +#' based on the method class. Each method defines its own arguments. +#' #' @param method An S4 method object (e.g., from \code{\link{ec_ipw}}). -#' @param data Data frame with all subjects. -#' @param outcomes Character vector of outcome column names. -#' @param treatment Name of the treatment column. -#' @param trial_status Name of the trial participation column. -#' @param covariates Character vector of covariate column names. -#' @param alpha Significance level. -#' @param quiet Logical. Suppress output. +#' @param ... Method-specific arguments (data, outcomes, etc.). #' #' @return A list with estimation results. #' @export -setGeneric("estimate", function(method, data, outcomes, treatment, - trial_status, covariates, alpha = 0.05, - quiet = TRUE) { - standardGeneric("estimate") -}) +setGeneric("estimate", function(method, ...) standardGeneric("estimate")) #' @rdname estimate setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, diff --git a/R/run_analysis.R b/R/run_analysis.R index 13222be..178e4ab 100644 --- a/R/run_analysis.R +++ b/R/run_analysis.R @@ -194,6 +194,20 @@ run_analysis <- function(analysis_obj, quiet = TRUE) { alpha = alpha, quiet = quiet ) + } else if (is(method, "did_ec_ipw_method") || + is(method, "did_ec_aipw_method") || + is(method, "did_ec_or_method") || + is(method, "scm_method")) { + res <- estimate(method, + data = data, + outcomes = outcome_col_name, + treatment = treatment_col_name, + trial_status = trial_status_col_name, + covariates = covariates_col_name, + alpha = alpha, + quiet = quiet, + T_cross = T_cross + ) } else { stop("No such method type is defined!") } diff --git a/R/scm_method.R b/R/scm_method.R new file mode 100644 index 0000000..c59a5d6 --- /dev/null +++ b/R/scm_method.R @@ -0,0 +1,124 @@ +#' @include did_ec_ipw.R +NULL + +# s4 class definition---- +.scm_method <- setClass( + "scm_method", + contains = "method_OLE_obj", + slots = c( + lambda_min = "numeric", + lambda_max = "numeric", + nlambda = "integer", + parallel = "character", + ncpus = "integer", + bootstrap = "numeric", + bootstrap_ci_type = "character" + ), + prototype = list( + method_name = "SCM", + lambda_min = 0, + lambda_max = 0.1, + nlambda = 2L, + parallel = "no", + ncpus = 1L, + bootstrap = 100L, + bootstrap_ci_type = "perc" + ) +) + +# constructor---- + +#' SCM method constructor +#' +#' Creates a method object for synthetic control estimation with +#' external control borrowing for the open-label extension phase +#' (Zhou et al., 2024). The SCM method constructs a weighted +#' combination of external controls that matches each RCT control +#' subject on covariates and pre-crossover outcomes. +#' +#' @param lambda_min Minimum penalty parameter for LOOCV. +#' @param lambda_max Maximum penalty parameter for LOOCV. +#' @param nlambda Number of lambda values to evaluate in LOOCV. +#' @param parallel Parallelization type for bootstrap (\code{"no"}, +#' \code{"multicore"}, or \code{"snow"}). +#' @param ncpus Number of CPUs for parallel bootstrap. +#' @param bootstrap Number of bootstrap replicates. Defaults to 200. +#' @param bootstrap_ci_type Bootstrap CI type. Defaults to \code{"perc"}. +#' +#' @return An S4 object of class \code{scm_method}. +#' +#' @references +#' Zhou et al. (2024). Estimating treatment effect in randomized trial +#' after control to treatment crossover using external controls. +#' \emph{Journal of Biopharmaceutical Statistics}. +#' \doi{10.1080/10543406.2024.2444222} +#' +#' @export +#' +#' @examples +#' scm(lambda_min = 0, lambda_max = 0.001, nlambda = 2, bootstrap = 50) +scm <- function(lambda_min = 0, + lambda_max = 0.1, + nlambda = 2L, + parallel = "no", + ncpus = 1L, + bootstrap = 200L, + bootstrap_ci_type = NULL) { + checkmate::assert_number(lambda_min, lower = 0) + checkmate::assert_number(lambda_max, lower = lambda_min) + checkmate::assert_count(nlambda, positive = TRUE) + checkmate::assert_choice(parallel, c("no", "multicore", "snow")) + checkmate::assert_count(ncpus, positive = TRUE) + checkmate::assert_count(bootstrap, positive = TRUE) + + if (is.null(bootstrap_ci_type)) { + bootstrap_ci_type <- "perc" + } + checkmate::assert_choice( + bootstrap_ci_type, c("perc", "bca", "norm", "basic", "stud") + ) + + .scm_method( + lambda_min = lambda_min, + lambda_max = lambda_max, + nlambda = as.integer(nlambda), + parallel = parallel, + ncpus = as.integer(ncpus), + bootstrap = as.integer(bootstrap), + bootstrap_ci_type = bootstrap_ci_type, + method_name = "SCM", + bootstrap_flag = TRUE, + bootstrap_obj = .bootstrap_obj( + replicates = as.integer(bootstrap), + bootstrap_CI_type = bootstrap_ci_type + ) + ) +} + +# estimate() method---- + +#' @rdname estimate +setMethod("estimate", "scm_method", function(method, data, outcomes, + treatment, trial_status, + covariates, alpha = 0.05, + quiet = TRUE, + T_cross) { + SCM( + data = data, + outcome_col_name = outcomes, + trial_status_col_name = trial_status, + treatment_col_name = treatment, + covariates_col_name = covariates, + T_cross = T_cross, + Bootstrap = TRUE, + R = method@bootstrap, + bootstrap_CI_type = method@bootstrap_ci_type, + alpha = alpha, + lambda.min = method@lambda_min, + lambda.max = method@lambda_max, + nlambda = method@nlambda, + parallel = method@parallel, + ncpus = method@ncpus, + quiet = quiet + ) +}) diff --git a/_pkgdown.yml b/_pkgdown.yml index 5431c13..4de3680 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -52,6 +52,10 @@ reference: contents: - ec_ipw - ec_aipw + - did_ec_ipw + - did_ec_aipw + - did_ec_or + - scm - estimate - title: Method Configuration (legacy) diff --git a/man/did_ec_aipw.Rd b/man/did_ec_aipw.Rd new file mode 100644 index 0000000..4f7e886 --- /dev/null +++ b/man/did_ec_aipw.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/did_ec_aipw.R +\name{did_ec_aipw} +\alias{did_ec_aipw} +\title{DID-EC-AIPW method constructor} +\usage{ +did_ec_aipw( + ps_formula, + trt_formula = "", + outcome_formula, + bootstrap = 500L, + bootstrap_ci_type = NULL +) +} +\arguments{ +\item{ps_formula}{Formula string for the propensity score model +predicting trial participation.} + +\item{trt_formula}{Formula string for the treatment assignment model, +or \code{""} for marginal probability.} + +\item{outcome_formula}{Character vector of outcome model formulas, +one per time point.} + +\item{bootstrap}{Number of bootstrap replicates. Defaults to 500.} + +\item{bootstrap_ci_type}{Bootstrap CI type. Defaults to \code{"perc"}.} +} +\value{ +An S4 object of class \code{did_ec_aipw_method}. +} +\description{ +Creates a method object for difference-in-differences augmented IPW +estimation with external control borrowing for the open-label +extension phase (Zhou et al., 2024). +} +\examples{ +did_ec_aipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + trt_formula = "A ~ x1 + x2 + x3 + x4 + x5", + outcome_formula = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5", + "y3 ~ x1 + x2 + x3 + x4 + x5", + "y4 ~ x1 + x2 + x3 + x4 + x5" + ), + bootstrap = 500 +) +} +\references{ +Zhou et al. (2024). Estimating treatment effect in randomized trial +after control to treatment crossover using external controls. +\emph{Journal of Biopharmaceutical Statistics}. +\doi{10.1080/10543406.2024.2444222} +} diff --git a/man/did_ec_ipw.Rd b/man/did_ec_ipw.Rd new file mode 100644 index 0000000..c2d369c --- /dev/null +++ b/man/did_ec_ipw.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/did_ec_ipw.R +\name{did_ec_ipw} +\alias{did_ec_ipw} +\title{DID-EC-IPW method constructor} +\usage{ +did_ec_ipw( + ps_formula, + trt_formula = "", + bootstrap = 500L, + bootstrap_ci_type = NULL +) +} +\arguments{ +\item{ps_formula}{Formula string for the propensity score model +predicting trial participation.} + +\item{trt_formula}{Formula string for the treatment assignment model, +or \code{""} for marginal probability.} + +\item{bootstrap}{Number of bootstrap replicates (required for DID +methods). Defaults to 500.} + +\item{bootstrap_ci_type}{Bootstrap CI type. Defaults to \code{"perc"}.} +} +\value{ +An S4 object of class \code{did_ec_ipw_method}. +} +\description{ +Creates a method object for difference-in-differences IPW estimation +with external control borrowing for the open-label extension phase +(Zhou et al., 2024). +} +\examples{ +did_ec_ipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + trt_formula = "A ~ x1 + x2 + x3 + x4 + x5", + bootstrap = 500 +) +} +\references{ +Zhou et al. (2024). Estimating treatment effect in randomized trial +after control to treatment crossover using external controls. +\emph{Journal of Biopharmaceutical Statistics}. +\doi{10.1080/10543406.2024.2444222} +} diff --git a/man/did_ec_or.Rd b/man/did_ec_or.Rd new file mode 100644 index 0000000..0aef013 --- /dev/null +++ b/man/did_ec_or.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/did_ec_or.R +\name{did_ec_or} +\alias{did_ec_or} +\title{DID-EC-OR method constructor} +\usage{ +did_ec_or( + outcome_formula_ext, + outcome_formula_rct_ctrl, + outcome_formula_rct_trt, + bootstrap = 500L, + bootstrap_ci_type = NULL +) +} +\arguments{ +\item{outcome_formula_ext}{Character vector of outcome model formulas +for external controls, one per time point.} + +\item{outcome_formula_rct_ctrl}{Character vector of outcome model +formulas for RCT control subjects, one per time point.} + +\item{outcome_formula_rct_trt}{Character vector of outcome model +formulas for RCT treated subjects, one per time point.} + +\item{bootstrap}{Number of bootstrap replicates. Defaults to 500.} + +\item{bootstrap_ci_type}{Bootstrap CI type. Defaults to \code{"perc"}.} +} +\value{ +An S4 object of class \code{did_ec_or_method}. +} +\description{ +Creates a method object for difference-in-differences outcome +regression estimation with external control borrowing for the +open-label extension phase (Zhou et al., 2024). +} +\examples{ +model_forms <- c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5", + "y3 ~ x1 + x2 + x3 + x4 + x5", + "y4 ~ x1 + x2 + x3 + x4 + x5" +) +did_ec_or( + outcome_formula_ext = model_forms, + outcome_formula_rct_ctrl = model_forms, + outcome_formula_rct_trt = model_forms, + bootstrap = 500 +) +} +\references{ +Zhou et al. (2024). Estimating treatment effect in randomized trial +after control to treatment crossover using external controls. +\emph{Journal of Biopharmaceutical Statistics}. +\doi{10.1080/10543406.2024.2444222} +} diff --git a/man/estimate.Rd b/man/estimate.Rd index abb18c5..3fbd2fe 100644 --- a/man/estimate.Rd +++ b/man/estimate.Rd @@ -1,12 +1,19 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ec_ipw.R, R/ec_aipw.R +% Please edit documentation in R/ec_ipw.R, R/did_ec_ipw.R, R/did_ec_aipw.R, +% R/did_ec_or.R, R/ec_aipw.R, R/scm_method.R \name{estimate} \alias{estimate} \alias{estimate,ec_ipw_method-method} +\alias{estimate,did_ec_ipw_method-method} +\alias{estimate,did_ec_aipw_method-method} +\alias{estimate,did_ec_or_method-method} \alias{estimate,ec_aipw_method-method} +\alias{estimate,scm_method-method} \title{Run estimation for a method object} \usage{ -estimate( +estimate(method, ...) + +\S4method{estimate}{ec_ipw_method}( method, data, outcomes, @@ -17,7 +24,7 @@ estimate( quiet = TRUE ) -\S4method{estimate}{ec_ipw_method}( +\S4method{estimate}{did_ec_ipw_method}( method, data, outcomes, @@ -25,7 +32,32 @@ estimate( trial_status, covariates, alpha = 0.05, - quiet = TRUE + quiet = TRUE, + T_cross +) + +\S4method{estimate}{did_ec_aipw_method}( + method, + data, + outcomes, + treatment, + trial_status, + covariates, + alpha = 0.05, + quiet = TRUE, + T_cross +) + +\S4method{estimate}{did_ec_or_method}( + method, + data, + outcomes, + treatment, + trial_status, + covariates, + alpha = 0.05, + quiet = TRUE, + T_cross ) \S4method{estimate}{ec_aipw_method}( @@ -38,27 +70,28 @@ estimate( alpha = 0.05, quiet = TRUE ) + +\S4method{estimate}{scm_method}( + method, + data, + outcomes, + treatment, + trial_status, + covariates, + alpha = 0.05, + quiet = TRUE, + T_cross +) } \arguments{ \item{method}{An S4 method object (e.g., from \code{\link{ec_ipw}}).} -\item{data}{Data frame with all subjects.} - -\item{outcomes}{Character vector of outcome column names.} - -\item{treatment}{Name of the treatment column.} - -\item{trial_status}{Name of the trial participation column.} - -\item{covariates}{Character vector of covariate column names.} - -\item{alpha}{Significance level.} - -\item{quiet}{Logical. Suppress output.} +\item{...}{Method-specific arguments (data, outcomes, etc.).} } \value{ A list with estimation results. } \description{ -Run estimation for a method object +S4 generic that dispatches to the appropriate estimation logic +based on the method class. Each method defines its own arguments. } diff --git a/man/scm.Rd b/man/scm.Rd new file mode 100644 index 0000000..a234f59 --- /dev/null +++ b/man/scm.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/scm_method.R +\name{scm} +\alias{scm} +\title{SCM method constructor} +\usage{ +scm( + lambda_min = 0, + lambda_max = 0.1, + nlambda = 2L, + parallel = "no", + ncpus = 1L, + bootstrap = 200L, + bootstrap_ci_type = NULL +) +} +\arguments{ +\item{lambda_min}{Minimum penalty parameter for LOOCV.} + +\item{lambda_max}{Maximum penalty parameter for LOOCV.} + +\item{nlambda}{Number of lambda values to evaluate in LOOCV.} + +\item{parallel}{Parallelization type for bootstrap (\code{"no"}, +\code{"multicore"}, or \code{"snow"}).} + +\item{ncpus}{Number of CPUs for parallel bootstrap.} + +\item{bootstrap}{Number of bootstrap replicates. Defaults to 200.} + +\item{bootstrap_ci_type}{Bootstrap CI type. Defaults to \code{"perc"}.} +} +\value{ +An S4 object of class \code{scm_method}. +} +\description{ +Creates a method object for synthetic control estimation with +external control borrowing for the open-label extension phase +(Zhou et al., 2024). The SCM method constructs a weighted +combination of external controls that matches each RCT control +subject on covariates and pre-crossover outcomes. +} +\examples{ +scm(lambda_min = 0, lambda_max = 0.001, nlambda = 2, bootstrap = 50) +} +\references{ +Zhou et al. (2024). Estimating treatment effect in randomized trial +after control to treatment crossover using external controls. +\emph{Journal of Biopharmaceutical Statistics}. +\doi{10.1080/10543406.2024.2444222} +} diff --git a/tests/testthat/test-full_pipeline_did_ec_aipw.R b/tests/testthat/test-full_pipeline_did_ec_aipw.R index 4217772..25b196a 100644 --- a/tests/testthat/test-full_pipeline_did_ec_aipw.R +++ b/tests/testthat/test-full_pipeline_did_ec_aipw.R @@ -33,3 +33,36 @@ test_that("DID-EC-AIPW point estimates and bootstrap CIs", { expect_equal(res$point_estimates[1], 2.0417274627, tolerance = tol) expect_equal(res$point_estimates[2], 4.0361177594, tolerance = tol) }) + +# new API---- + +test_that("did_ec_aipw() matches old API", { + method <- did_ec_aipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + trt_formula = "A ~ x1 + x2 + x3 + x4 + x5", + outcome_formula = c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5", + "y3 ~ x1 + x2 + x3 + x4 + x5", + "y4 ~ x1 + x2 + x3 + x4 + x5" + ), + bootstrap = 50 + ) + analysis <- setup_analysis_OLE( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2", "y3", "y4"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + method_OLE_obj = method + ) + + set.seed(42) + res <- run_analysis(analysis) + + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 2) + expect_equal(res$point_estimates[1], 2.0417274627, tolerance = tol) + expect_equal(res$point_estimates[2], 4.0361177594, tolerance = tol) +}) diff --git a/tests/testthat/test-full_pipeline_did_ec_ipw.R b/tests/testthat/test-full_pipeline_did_ec_ipw.R index a4cd5a3..99f2b01 100644 --- a/tests/testthat/test-full_pipeline_did_ec_ipw.R +++ b/tests/testthat/test-full_pipeline_did_ec_ipw.R @@ -27,3 +27,30 @@ test_that("DID-EC-IPW point estimates and bootstrap CIs", { expect_equal(res$point_estimates[1], 2.0759256334, tolerance = tol) expect_equal(res$point_estimates[2], 4.3894380397, tolerance = tol) }) + +# new API---- + +test_that("did_ec_ipw() matches old API", { + method <- did_ec_ipw( + ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", + trt_formula = "A ~ x1 + x2 + x3 + x4 + x5", + bootstrap = 50 + ) + analysis <- setup_analysis_OLE( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2", "y3", "y4"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + method_OLE_obj = method + ) + + set.seed(42) + res <- run_analysis(analysis) + + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 2) + expect_equal(res$point_estimates[1], 2.0759256334, tolerance = tol) + expect_equal(res$point_estimates[2], 4.3894380397, tolerance = tol) +}) diff --git a/tests/testthat/test-full_pipeline_did_ec_or.R b/tests/testthat/test-full_pipeline_did_ec_or.R index 14cf520..7caf3d2 100644 --- a/tests/testthat/test-full_pipeline_did_ec_or.R +++ b/tests/testthat/test-full_pipeline_did_ec_or.R @@ -34,3 +34,37 @@ test_that("DID-EC-OR point estimates and bootstrap CIs", { expect_equal(res$point_estimates[1], 1.5689465845, tolerance = tol) expect_equal(res$point_estimates[2], 4.4078336543, tolerance = tol) }) + +# new API---- + +test_that("did_ec_or() matches old API", { + model_forms <- c( + "y1 ~ x1 + x2 + x3 + x4 + x5", + "y2 ~ x1 + x2 + x3 + x4 + x5", + "y3 ~ x1 + x2 + x3 + x4 + x5", + "y4 ~ x1 + x2 + x3 + x4 + x5" + ) + method <- did_ec_or( + outcome_formula_ext = model_forms, + outcome_formula_rct_ctrl = model_forms, + outcome_formula_rct_trt = model_forms, + bootstrap = 50 + ) + analysis <- setup_analysis_OLE( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2", "y3", "y4"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + method_OLE_obj = method + ) + + set.seed(42) + res <- run_analysis(analysis) + + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 2) + expect_equal(res$point_estimates[1], 1.5689465845, tolerance = tol) + expect_equal(res$point_estimates[2], 4.4078336543, tolerance = tol) +}) diff --git a/tests/testthat/test-full_pipeline_scm.R b/tests/testthat/test-full_pipeline_scm.R index 411234f..f5cb5c0 100644 --- a/tests/testthat/test-full_pipeline_scm.R +++ b/tests/testthat/test-full_pipeline_scm.R @@ -33,3 +33,34 @@ test_that("SCM runs and returns valid structure", { expect_all_true(is.finite(res$point_estimates)) expect_all_true(res$lower_CI_boot <= res$upper_CI_boot) }) + +# new API---- + +test_that("scm() matches old API structure", { + skip_on_cran() + + method <- scm( + lambda_min = 0.0005, + lambda_max = 0.0005, + nlambda = 1, + bootstrap = 5, + bootstrap_ci_type = "perc" + ) + analysis <- setup_analysis_OLE( + data = SyntheticData, + trial_status_col_name = "S", + treatment_col_name = "A", + outcome_col_name = c("y1", "y2", "y3", "y4"), + covariates_col_name = c("x1", "x2", "x3", "x4", "x5"), + T_cross = 2, + method_OLE_obj = method + ) + + set.seed(42) + res <- suppressWarnings(run_analysis(analysis)) + + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 2) + expect_all_true(is.finite(res$point_estimates)) + expect_all_true(res$lower_CI_boot <= res$upper_CI_boot) +}) From 566561b650b3ef81afea6aa2e9348fe7371cc369 Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Fri, 15 May 2026 05:54:49 -0700 Subject: [PATCH 11/16] run_bootstrap --- R/did_ec_aipw.R | 49 ++++++++++-------------------------------------- R/did_ec_ipw.R | 47 +++++++++------------------------------------- R/did_ec_or.R | 43 ++++++++---------------------------------- R/ec_aipw.R | 49 ++++++++++-------------------------------------- R/ec_ipw.R | 47 +++++++++------------------------------------- R/method_class.R | 43 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 89 insertions(+), 189 deletions(-) diff --git a/R/did_ec_aipw.R b/R/did_ec_aipw.R index 45b1434..0041242 100644 --- a/R/did_ec_aipw.R +++ b/R/did_ec_aipw.R @@ -127,50 +127,21 @@ setMethod("estimate", "did_ec_aipw_method", function(method, data, outcomes, tau <- result$tau if (!quiet) cat("Running bootstrap inference...\n") - group_id <- as.integer(interaction(df$S, df$A, drop = TRUE)) - - boot_ci_type_long <- switch(method@bootstrap_ci_type, - norm = "normal", - bca = "bca", - stud = "student", - perc = "percent", - basic = "basic" - ) - - boot_out <- boot::boot( - data = df, - statistic = .did_ec_aipw_statistic, - outcomes = outcomes, - ps_formula = ps_formula, - trt_formula = trt_formula, - outcome_formula = method@outcome_formula, - T_cross = T_cross, - R = method@bootstrap, - strata = group_id - ) n_ole <- n_time - T_cross - - lower_ci <- vapply(seq_len(n_ole), \(i) { - ci <- boot::boot.ci(boot_out, - conf = 1 - alpha, - type = method@bootstrap_ci_type, index = i - ) - ci[[boot_ci_type_long]][4] - }, numeric(1)) - - upper_ci <- vapply(seq_len(n_ole), \(i) { - ci <- boot::boot.ci(boot_out, - conf = 1 - alpha, - type = method@bootstrap_ci_type, index = i - ) - ci[[boot_ci_type_long]][5] - }, numeric(1)) + boot_res <- .run_bootstrap( + df = df, statistic = .did_ec_aipw_statistic, + n_estimates = n_ole, bootstrap = method@bootstrap, + bootstrap_ci_type = method@bootstrap_ci_type, alpha = alpha, + outcomes = outcomes, ps_formula = ps_formula, + trt_formula = trt_formula, outcome_formula = method@outcome_formula, + T_cross = T_cross + ) data.frame( point_estimates = tau, - lower_CI_boot = lower_ci, - upper_CI_boot = upper_ci, + lower_CI_boot = boot_res$lower_ci, + upper_CI_boot = boot_res$upper_ci, row.names = paste0("tau", (T_cross + 1):n_time) ) }) diff --git a/R/did_ec_ipw.R b/R/did_ec_ipw.R index 570135f..c5074f3 100644 --- a/R/did_ec_ipw.R +++ b/R/did_ec_ipw.R @@ -114,49 +114,20 @@ setMethod("estimate", "did_ec_ipw_method", function(method, data, outcomes, tau <- result$tau if (!quiet) cat("Running bootstrap inference...\n") - group_id <- as.integer(interaction(df$S, df$A, drop = TRUE)) - - boot_ci_type_long <- switch(method@bootstrap_ci_type, - norm = "normal", - bca = "bca", - stud = "student", - perc = "percent", - basic = "basic" - ) - - boot_out <- boot::boot( - data = df, - statistic = .did_ec_ipw_statistic, - outcomes = outcomes, - ps_formula = ps_formula, - trt_formula = trt_formula, - T_cross = T_cross, - R = method@bootstrap, - strata = group_id - ) n_ole <- n_time - T_cross - - lower_ci <- vapply(seq_len(n_ole), \(i) { - ci <- boot::boot.ci(boot_out, - conf = 1 - alpha, - type = method@bootstrap_ci_type, index = i - ) - ci[[boot_ci_type_long]][4] - }, numeric(1)) - - upper_ci <- vapply(seq_len(n_ole), \(i) { - ci <- boot::boot.ci(boot_out, - conf = 1 - alpha, - type = method@bootstrap_ci_type, index = i - ) - ci[[boot_ci_type_long]][5] - }, numeric(1)) + boot_res <- .run_bootstrap( + df = df, statistic = .did_ec_ipw_statistic, + n_estimates = n_ole, bootstrap = method@bootstrap, + bootstrap_ci_type = method@bootstrap_ci_type, alpha = alpha, + outcomes = outcomes, ps_formula = ps_formula, + trt_formula = trt_formula, T_cross = T_cross + ) data.frame( point_estimates = tau, - lower_CI_boot = lower_ci, - upper_CI_boot = upper_ci, + lower_CI_boot = boot_res$lower_ci, + upper_CI_boot = boot_res$upper_ci, row.names = paste0("tau", (T_cross + 1):n_time) ) }) diff --git a/R/did_ec_or.R b/R/did_ec_or.R index aa5c2b4..72df0a8 100644 --- a/R/did_ec_or.R +++ b/R/did_ec_or.R @@ -122,50 +122,23 @@ setMethod("estimate", "did_ec_or_method", function(method, data, outcomes, tau <- result$tau if (!quiet) cat("Running bootstrap inference...\n") - group_id <- as.integer(interaction(df$S, df$A, drop = TRUE)) - boot_ci_type_long <- switch(method@bootstrap_ci_type, - norm = "normal", - bca = "bca", - stud = "student", - perc = "percent", - basic = "basic" - ) - - boot_out <- boot::boot( - data = df, - statistic = .did_ec_or_statistic, + n_ole <- n_time - T_cross + boot_res <- .run_bootstrap( + df = df, statistic = .did_ec_or_statistic, + n_estimates = n_ole, bootstrap = method@bootstrap, + bootstrap_ci_type = method@bootstrap_ci_type, alpha = alpha, outcomes = outcomes, outcome_formula_ext = method@outcome_formula_ext, outcome_formula_rct_ctrl = method@outcome_formula_rct_ctrl, outcome_formula_rct_trt = method@outcome_formula_rct_trt, - T_cross = T_cross, - R = method@bootstrap, - strata = group_id + T_cross = T_cross ) - n_ole <- n_time - T_cross - - lower_ci <- vapply(seq_len(n_ole), \(i) { - ci <- boot::boot.ci(boot_out, - conf = 1 - alpha, - type = method@bootstrap_ci_type, index = i - ) - ci[[boot_ci_type_long]][4] - }, numeric(1)) - - upper_ci <- vapply(seq_len(n_ole), \(i) { - ci <- boot::boot.ci(boot_out, - conf = 1 - alpha, - type = method@bootstrap_ci_type, index = i - ) - ci[[boot_ci_type_long]][5] - }, numeric(1)) - data.frame( point_estimates = tau, - lower_CI_boot = lower_ci, - upper_CI_boot = upper_ci, + lower_CI_boot = boot_res$lower_ci, + upper_CI_boot = boot_res$upper_ci, row.names = paste0("tau", (T_cross + 1):n_time) ) }) diff --git a/R/ec_aipw.R b/R/ec_aipw.R index 05efe44..70892a6 100644 --- a/R/ec_aipw.R +++ b/R/ec_aipw.R @@ -123,51 +123,22 @@ setMethod("estimate", "ec_aipw_method", function(method, data, outcomes, if (!is.null(method@bootstrap)) { if (!quiet) cat("Running bootstrap inference...\n") - group_id <- as.integer(interaction(df$S, df$A, drop = TRUE)) - - boot_ci_type_long <- switch(method@bootstrap_ci_type, - norm = "normal", - bca = "bca", - stud = "student", - perc = "percent", - basic = "basic" - ) - boot_out <- boot::boot( - data = df, - statistic = .ec_aipw_statistic, - outcomes = outcomes, - covariates = covariates, - ps_formula = ps_formula, - outcome_formula = method@outcome_formula, - borrow_wt = borrow_weight, - R = method@bootstrap, - strata = group_id + boot_res <- .run_bootstrap( + df = df, statistic = .ec_aipw_statistic, + n_estimates = n_time, bootstrap = method@bootstrap, + bootstrap_ci_type = method@bootstrap_ci_type, alpha = alpha, + outcomes = outcomes, covariates = covariates, + ps_formula = ps_formula, outcome_formula = method@outcome_formula, + borrow_wt = borrow_weight ) - - lower_ci <- vapply(seq_len(n_time), \(i) { - ci <- boot::boot.ci(boot_out, - conf = 1 - alpha, - type = method@bootstrap_ci_type, index = i - ) - ci[[boot_ci_type_long]][4] - }, numeric(1)) - - upper_ci <- vapply(seq_len(n_time), \(i) { - ci <- boot::boot.ci(boot_out, - conf = 1 - alpha, - type = method@bootstrap_ci_type, index = i - ) - ci[[boot_ci_type_long]][5] - }, numeric(1)) - - sd_tau <- sqrt(diag(var(boot_out$t))) + sd_tau <- boot_res$sd_boot results <- data.frame( point_estimates = tau, standard_deviation = sd_tau, - lower_CI_boot = lower_ci, - upper_CI_boot = upper_ci, + lower_CI_boot = boot_res$lower_ci, + upper_CI_boot = boot_res$upper_ci, row.names = paste0("tau", seq_len(n_time)) ) } else { diff --git a/R/ec_ipw.R b/R/ec_ipw.R index 82e149f..e7267c7 100644 --- a/R/ec_ipw.R +++ b/R/ec_ipw.R @@ -146,50 +146,21 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, if (!is.null(method@bootstrap)) { if (!quiet) cat("Running bootstrap inference...\n") - group_id <- as.integer(interaction(df$S, df$A, drop = TRUE)) - - boot_ci_type_long <- switch(method@bootstrap_ci_type, - norm = "normal", - bca = "bca", - stud = "student", - perc = "percent", - basic = "basic" - ) - boot_out <- boot::boot( - data = df, - statistic = .ec_ipw_statistic, - outcomes = outcomes, - covariates = covariates, - ps_formula = ps_formula, - borrow_wt = borrow_weight, - R = method@bootstrap, - strata = group_id + boot_res <- .run_bootstrap( + df = df, statistic = .ec_ipw_statistic, + n_estimates = n_time, bootstrap = method@bootstrap, + bootstrap_ci_type = method@bootstrap_ci_type, alpha = alpha, + outcomes = outcomes, covariates = covariates, + ps_formula = ps_formula, borrow_wt = borrow_weight ) - - lower_ci <- vapply(seq_len(n_time), \(i) { - ci <- boot::boot.ci(boot_out, - conf = 1 - alpha, - type = method@bootstrap_ci_type, index = i - ) - ci[[boot_ci_type_long]][4] - }, numeric(1)) - - upper_ci <- vapply(seq_len(n_time), \(i) { - ci <- boot::boot.ci(boot_out, - conf = 1 - alpha, - type = method@bootstrap_ci_type, index = i - ) - ci[[boot_ci_type_long]][5] - }, numeric(1)) - - sd_tau <- sqrt(diag(var(boot_out$t))) + sd_tau <- boot_res$sd_boot results <- data.frame( point_estimates = tau, standard_deviation = sd_tau, - lower_CI_boot = lower_ci, - upper_CI_boot = upper_ci, + lower_CI_boot = boot_res$lower_ci, + upper_CI_boot = boot_res$upper_ci, row.names = paste0("tau", seq_len(n_time)) ) } else { diff --git a/R/method_class.R b/R/method_class.R index b1aaaa3..90edf8b 100644 --- a/R/method_class.R +++ b/R/method_class.R @@ -24,6 +24,49 @@ contains = "method_obj" ) +# bootstrap helpers---- + +#' @noRd +.boot_ci_type_long <- function(short) { + switch(short, + norm = "normal", bca = "bca", stud = "student", + perc = "percent", basic = "basic" + ) +} + +#' @noRd +.run_bootstrap <- function(df, statistic, n_estimates, bootstrap, + bootstrap_ci_type, alpha, ...) { + group_id <- as.integer(interaction(df$S, df$A, drop = TRUE)) + ci_type_long <- .boot_ci_type_long(bootstrap_ci_type) + + boot_out <- boot::boot( + data = df, + statistic = statistic, + R = bootstrap, + strata = group_id, + ... + ) + + lower_ci <- vapply(seq_len(n_estimates), \(i) { + ci <- boot::boot.ci(boot_out, conf = 1 - alpha, + type = bootstrap_ci_type, index = i) + ci[[ci_type_long]][4] + }, numeric(1)) + + upper_ci <- vapply(seq_len(n_estimates), \(i) { + ci <- boot::boot.ci(boot_out, conf = 1 - alpha, + type = bootstrap_ci_type, index = i) + ci[[ci_type_long]][5] + }, numeric(1)) + + sd_boot <- sqrt(diag(var(boot_out$t))) + + list(lower_ci = lower_ci, upper_ci = upper_ci, sd_boot = sd_boot) +} + +# validation---- + .validate_method_base <- function(method_name, bootstrap_flag, bootstrap_obj) { checkmate::assert_string(method_name) checkmate::assert_flag(bootstrap_flag) From 3a068e926564145914b91ea515f07b75e057ec4b Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Fri, 15 May 2026 06:08:31 -0700 Subject: [PATCH 12/16] separate internal logic fore c --- R/ec_aipw.R | 111 ++++++++++++++++++++++--------------------------- R/ec_ipw.R | 116 +++++++++++++++++++++++++--------------------------- 2 files changed, 104 insertions(+), 123 deletions(-) diff --git a/R/ec_aipw.R b/R/ec_aipw.R index 70892a6..87d1510 100644 --- a/R/ec_aipw.R +++ b/R/ec_aipw.R @@ -1,7 +1,7 @@ #' @include ec_ipw.R NULL -# s4 class definition---- +# S4 class definition---- .ec_aipw_method <- setClass( "ec_aipw_method", contains = "method_primary_obj", @@ -24,7 +24,7 @@ NULL # constructor---- -#' EC-AIPW method constructor +#' EC-AIPW method #' #' Creates a method object for augmented IPW estimation with external #' control borrowing (Zhou et al., 2024). Augments the IPW estimator @@ -156,25 +156,21 @@ setMethod("estimate", "ec_aipw_method", function(method, data, outcomes, # internal helpers---- +# point estimate (shared by estimate and bootstrap)---- #' @noRd -.ec_aipw_estimate <- function(df, Y, S, A, n, m, N, pi_S, n_time, - ps_formula, outcome_formula, weight) { +.ec_aipw_core <- function(df, Y, S, A, n, N, pi_S, n_time, + ps_formula, outcome_formula, weight) { ps_model <- glm(as.formula(ps_formula), data = df, family = "binomial") pi_SX <- predict(ps_model, newdata = df, type = "response") pi_A <- sum(A[S == 1]) / n rx <- (pi_SX / (1 - pi_SX)) * ((1 - pi_S) / pi_S) - # outcome regression models fitted on controls only Y0_models <- lapply(outcome_formula, \(f) { lm(as.formula(f), data = df[A == 0, , drop = FALSE]) }) - - # predicted Y0 for all subjects Y0 <- vapply(seq_len(n_time), \(t) { predict(Y0_models[[t]], newdata = df) }, numeric(N)) - - # residuals Yr <- Y - Y0 w11 <- pi_A @@ -195,8 +191,19 @@ setMethod("estimate", "ec_aipw_method", function(method, data, outcomes, mu0 <- (1 - borrow_weight) * mu10 + borrow_weight * mu00 tau <- mu1 - mu0 - # sandwich variance - X_ps <- model.matrix(ps_model) + list(tau = tau, borrow_weight = borrow_weight, ps_model = ps_model, + pi_SX = pi_SX, pi_A = pi_A, rx = rx, w00 = w00, w10 = w10, + Yr = Yr, mu1 = mu1, mu10 = mu10, mu00 = mu00) +} + +# sandwich variance (uses intermediates from _core)---- +#' @noRd +.ec_aipw_estimate <- function(df, Y, S, A, n, m, N, pi_S, n_time, + ps_formula, outcome_formula, weight) { + core <- .ec_aipw_core(df, Y, S, A, n, N, pi_S, n_time, + ps_formula, outcome_formula, weight) + + X_ps <- model.matrix(core$ps_model) n_ps <- ncol(X_ps) # outcome model matrices (fit on full data for sandwich) @@ -204,78 +211,73 @@ setMethod("estimate", "ec_aipw_method", function(method, data, outcomes, Y0_model_dims <- vapply(Y0_models_full, \(m) ncol(model.matrix(m)), integer(1)) n_outcome <- sum(Y0_model_dims) - A33 <- diag(rep(-mean((1 - S) * w00 / (1 - pi_S)), n_time), nrow = n_time) - A34 <- t((1 - S) * pi_SX / (pi_S * (1 - pi_SX)) * - sweep(Yr, 2, mu00)) %*% X_ps / N - A44 <- t(X_ps) %*% diag(-pi_SX * (1 - pi_SX)) %*% X_ps / N + A33 <- diag(rep(-mean((1 - S) * core$w00 / (1 - pi_S)), n_time), nrow = n_time) + A34 <- t((1 - S) * core$pi_SX / (pi_S * (1 - core$pi_SX)) * + sweep(core$Yr, 2, core$mu00)) %*% X_ps / N + A44 <- t(X_ps) %*% diag(-core$pi_SX * (1 - core$pi_SX)) %*% X_ps / N A0 <- as.matrix(Matrix::bdiag( diag(-1, n_time), diag(-1, n_time), A33, A44 )) A0[ - (n_time + n_time + 1):(n_time + n_time + n_time), - (n_time + n_time + n_time + 1):(n_time + n_time + n_time + n_ps) + (2 * n_time + 1):(3 * n_time), + (3 * n_time + 1):(3 * n_time + n_ps) ] <- A34 - # outcome model blocks Phi1_gamma_list <- lapply(seq_len(n_time), \(t) { Xm <- model.matrix(Y0_models_full[[t]]) - as.vector(-S * A / (pi_S * pi_A)) %*% Xm / N + as.vector(-S * A / (pi_S * core$pi_A)) %*% Xm / N }) - Phi1_gamma <- as.matrix(Matrix::bdiag(Phi1_gamma_list)) - Phi2_gamma_list <- lapply(seq_len(n_time), \(t) { Xm <- model.matrix(Y0_models_full[[t]]) - as.vector(-S * (1 - A) / (pi_S * (1 - pi_A))) %*% Xm / N + as.vector(-S * (1 - A) / (pi_S * (1 - core$pi_A))) %*% Xm / N }) - Phi2_gamma <- as.matrix(Matrix::bdiag(Phi2_gamma_list)) - Phi3_gamma_list <- lapply(seq_len(n_time), \(t) { Xm <- model.matrix(Y0_models_full[[t]]) - as.vector(-(1 - S) * rx / (1 - pi_S)) %*% Xm / N + as.vector(-(1 - S) * core$rx / (1 - pi_S)) %*% Xm / N }) - Phi3_gamma <- as.matrix(Matrix::bdiag(Phi3_gamma_list)) - Y0_gamma_list <- lapply(seq_len(n_time), \(t) { Xm <- model.matrix(Y0_models_full[[t]]) -t(Xm) %*% diag((1 - A) / (1 - mean(A))) %*% Xm / N }) - Y0_gamma <- as.matrix(Matrix::bdiag(Y0_gamma_list)) A45 <- matrix(0, nrow = n_ps, ncol = n_outcome) - A51 <- matrix(0, nrow = n_outcome, ncol = n_time + n_time + n_time + n_ps) - + A51 <- matrix(0, nrow = n_outcome, ncol = 3 * n_time + n_ps) A_left <- rbind(A0, A51) - A_right <- rbind(Phi1_gamma, Phi2_gamma, Phi3_gamma, A45, Y0_gamma) + A_right <- rbind( + as.matrix(Matrix::bdiag(Phi1_gamma_list)), + as.matrix(Matrix::bdiag(Phi2_gamma_list)), + as.matrix(Matrix::bdiag(Phi3_gamma_list)), + A45, + as.matrix(Matrix::bdiag(Y0_gamma_list)) + ) A_mat <- cbind(A_left, A_right) - # meat - phi1 <- S * A * sweep(Yr, 2, mu1) / pi_A / pi_S - phi2 <- S * (1 - A) * sweep(Yr, 2, mu10) / (1 - pi_A) / pi_S - phi3 <- (1 - S) * sweep(Yr, 2, mu00) * w00 / (1 - pi_S) - phi_ps <- (S - pi_SX) * X_ps + phi1 <- S * A * sweep(core$Yr, 2, core$mu1) / core$pi_A / pi_S + phi2 <- S * (1 - A) * sweep(core$Yr, 2, core$mu10) / (1 - core$pi_A) / pi_S + phi3 <- (1 - S) * sweep(core$Yr, 2, core$mu00) * core$w00 / (1 - pi_S) + phi_ps <- (S - core$pi_SX) * X_ps phi_Y0 <- do.call(cbind, lapply(seq_len(n_time), \(t) { Xm <- model.matrix(Y0_models_full[[t]]) - ((1 - A) / (1 - mean(A))) * (Yr[, t] * Xm) + ((1 - A) / (1 - mean(A))) * (core$Yr[, t] * Xm) })) - Phi <- cbind(phi1, phi2, phi3, phi_ps, phi_Y0) - B <- crossprod(Phi) / N - + B <- crossprod(cbind(phi1, phi2, phi3, phi_ps, phi_Y0)) / N A_inv <- solve(A_mat) sigma <- A_inv %*% B %*% t(A_inv) coef_mat <- cbind( diag(n_time), - -(1 - borrow_weight) * diag(n_time), - -borrow_weight * diag(n_time), + -(1 - core$borrow_weight) * diag(n_time), + -core$borrow_weight * diag(n_time), matrix(0, nrow = n_time, ncol = n_ps + n_outcome) ) sd_tau <- sqrt(diag(coef_mat %*% sigma %*% t(coef_mat) / N)) - list(tau = tau, sd_tau = sd_tau, borrow_weight = borrow_weight) + list(tau = core$tau, sd_tau = sd_tau, borrow_weight = core$borrow_weight) } +# bootstrap statistic (calls _core, returns tau only)---- #' @noRd .ec_aipw_statistic <- function(data, indices, outcomes, covariates, ps_formula, outcome_formula, borrow_wt) { @@ -288,24 +290,7 @@ setMethod("estimate", "ec_aipw_method", function(method, data, outcomes, n_time <- ncol(Y) pi_S <- n / N - ps_model <- glm(as.formula(ps_formula), data = d, family = "binomial") - pi_SX <- predict(ps_model, newdata = d, type = "response") - pi_A <- sum(A[S == 1]) / n - rx <- (pi_SX / (1 - pi_SX)) * ((1 - pi_S) / pi_S) - - Y0_models <- lapply(outcome_formula, \(f) { - lm(as.formula(f), data = d[A == 0, , drop = FALSE]) - }) - Y0 <- vapply(seq_len(n_time), \(t) { - predict(Y0_models[[t]], newdata = d) - }, numeric(N)) - Yr <- Y - Y0 - - potential <- (S * A / pi_A + S * (1 - A) / (1 - pi_A) + (1 - S) * rx) * Yr - mu1 <- colSums(potential[S == 1 & A == 1, , drop = FALSE]) / n - mu10 <- colSums(potential[S == 1 & A == 0, , drop = FALSE]) / n - mu00 <- colSums(potential[S == 0, , drop = FALSE]) / sum((1 - S) * rx) - - mu0 <- (1 - borrow_wt) * mu10 + borrow_wt * mu00 - mu1 - mu0 + core <- .ec_aipw_core(d, Y, S, A, n, N, pi_S, n_time, + ps_formula, outcome_formula, borrow_wt) + core$tau } diff --git a/R/ec_ipw.R b/R/ec_ipw.R index e7267c7..48feea9 100644 --- a/R/ec_ipw.R +++ b/R/ec_ipw.R @@ -178,8 +178,9 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, # internal helpers---- +# point estimate for weight=0 (rct-only, no PS model)---- #' @noRd -.ec_ipw_no_borrow <- function(df, Y, S, A, n, n_time) { +.ec_ipw_no_borrow_core <- function(df, Y, S, A, n, n_time) { rct <- df[df$S == 1, , drop = FALSE] Y_rct <- Y[S == 1, , drop = FALSE] pi_A <- sum(rct$A) / n @@ -187,22 +188,30 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, potential <- (rct$A / pi_A + (1 - rct$A) / (1 - pi_A)) * Y_rct mu1 <- colSums(potential[rct$A == 1, , drop = FALSE]) / n mu0 <- colSums(potential[rct$A == 0, , drop = FALSE]) / n - tau <- mu1 - mu0 - phi1 <- rct$A * sweep(Y_rct, 2, mu1) / pi_A - phi0 <- (1 - rct$A) * sweep(Y_rct, 2, mu0) / (1 - pi_A) - Phi <- cbind(phi1, phi0) - B <- crossprod(Phi) / n + list(tau = mu1 - mu0, pi_A = pi_A, mu1 = mu1, mu0 = mu0, + Y_rct = Y_rct, rct = rct) +} + +# sandwich variance for weight=0 case---- +#' @noRd +.ec_ipw_no_borrow <- function(df, Y, S, A, n, n_time) { + core <- .ec_ipw_no_borrow_core(df, Y, S, A, n, n_time) + + phi1 <- core$rct$A * sweep(core$Y_rct, 2, core$mu1) / core$pi_A + phi0 <- (1 - core$rct$A) * sweep(core$Y_rct, 2, core$mu0) / (1 - core$pi_A) + B <- crossprod(cbind(phi1, phi0)) / n coef_mat <- cbind(diag(n_time), -diag(n_time)) sd_tau <- sqrt(diag(coef_mat %*% B %*% t(coef_mat) / n)) - list(tau = tau, sd_tau = sd_tau) + list(tau = core$tau, sd_tau = sd_tau) } +# point estimate with borrowing (shared by estimate and bootstrap)---- #' @noRd -.ec_ipw_borrow <- function(df, Y, S, A, n, m, N, pi_S, n_time, - ps_formula, weight) { +.ec_ipw_borrow_core <- function(df, Y, S, A, n, N, pi_S, n_time, + ps_formula, weight) { ps_model <- glm(as.formula(ps_formula), data = df, family = "binomial") pi_SX <- predict(ps_model, newdata = df, type = "response") pi_A <- sum(A[S == 1]) / n @@ -213,34 +222,39 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, w00 <- rx potential <- (S * A / w11 + S * (1 - A) / w10 + (1 - S) * w00) * Y - mu1 <- colSums(potential[S == 1 & A == 1, , drop = FALSE]) / - sum(S * A / w11) - mu10 <- colSums(potential[S == 1 & A == 0, , drop = FALSE]) / - sum(S * (1 - A) / w10) - mu00 <- colSums(potential[S == 0, , drop = FALSE]) / - sum((1 - S) * w00) - - # Optimal weight (Eq. 11 from Zhou et al. 2024) + mu1 <- colSums(potential[S == 1 & A == 1, , drop = FALSE]) / sum(S * A / w11) + mu10 <- colSums(potential[S == 1 & A == 0, , drop = FALSE]) / sum(S * (1 - A) / w10) + mu00 <- colSums(potential[S == 0, , drop = FALSE]) / sum((1 - S) * w00) + + # optimal weight (Eq. 11 from Zhou et al. 2024) num <- sum(S * (1 - A) / w10^2 / (sum(S * (1 - A) / w10))^2) denom <- sum((1 - S) * w00^2 / (sum((1 - S) * w00))^2) w_opt <- num / (num + denom) - borrow_weight <- if (is.null(weight)) w_opt else weight mu0 <- (1 - borrow_weight) * mu10 + borrow_weight * mu00 tau <- mu1 - mu0 - # Sandwich variance - X_model <- model.matrix(ps_model) - A33 <- diag( - rep(-mean((1 - S) * w00 / (1 - pi_S)), n_time), - nrow = n_time - ) - A34 <- t((1 - S) * pi_SX / (pi_S * (1 - pi_SX)) * - sweep(Y, 2, mu00)) %*% X_model / N - A44 <- t(X_model) %*% diag(-pi_SX * (1 - pi_SX)) %*% X_model / N + list(tau = tau, borrow_weight = borrow_weight, ps_model = ps_model, + pi_SX = pi_SX, pi_A = pi_A, w00 = w00, w10 = w10, + mu1 = mu1, mu10 = mu10, mu00 = mu00) +} +# sandwich variance with borrowing---- +#' @noRd +.ec_ipw_borrow <- function(df, Y, S, A, n, m, N, pi_S, n_time, + ps_formula, weight) { + core <- .ec_ipw_borrow_core(df, Y, S, A, n, N, pi_S, n_time, + ps_formula, weight) + + X_model <- model.matrix(core$ps_model) n_ps <- ncol(X_model) + + A33 <- diag(rep(-mean((1 - S) * core$w00 / (1 - pi_S)), n_time), nrow = n_time) + A34 <- t((1 - S) * core$pi_SX / (pi_S * (1 - core$pi_SX)) * + sweep(Y, 2, core$mu00)) %*% X_model / N + A44 <- t(X_model) %*% diag(-core$pi_SX * (1 - core$pi_SX)) %*% X_model / N + block_dim <- n_time + n_time + n_time + n_ps A_mat <- matrix(0, nrow = block_dim, ncol = block_dim) A_mat[1:n_time, 1:n_time] <- diag(-1, n_time) @@ -252,27 +266,27 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, A_mat[idx4, idx4] <- A44 A_mat[idx3, idx4] <- A34 - phi1 <- S * A * sweep(Y, 2, mu1) / pi_A / pi_S - phi2 <- S * (1 - A) * sweep(Y, 2, mu10) / (1 - pi_A) / pi_S - phi3 <- (1 - S) * sweep(Y, 2, mu00) * w00 / (1 - pi_S) - phi_ps <- (S - pi_SX) * X_model - Phi <- cbind(phi1, phi2, phi3, phi_ps) - B <- crossprod(Phi) / N + phi1 <- S * A * sweep(Y, 2, core$mu1) / core$pi_A / pi_S + phi2 <- S * (1 - A) * sweep(Y, 2, core$mu10) / (1 - core$pi_A) / pi_S + phi3 <- (1 - S) * sweep(Y, 2, core$mu00) * core$w00 / (1 - pi_S) + phi_ps <- (S - core$pi_SX) * X_model + B <- crossprod(cbind(phi1, phi2, phi3, phi_ps)) / N A_inv <- solve(A_mat) sigma <- A_inv %*% B %*% t(A_inv) coef_mat <- cbind( diag(n_time), - -(1 - borrow_weight) * diag(n_time), - -borrow_weight * diag(n_time), + -(1 - core$borrow_weight) * diag(n_time), + -core$borrow_weight * diag(n_time), matrix(0, nrow = n_time, ncol = n_ps) ) sd_tau <- sqrt(diag(coef_mat %*% sigma %*% t(coef_mat) / N)) - list(tau = tau, sd_tau = sd_tau, borrow_weight = borrow_weight) + list(tau = core$tau, sd_tau = sd_tau, borrow_weight = core$borrow_weight) } +# bootstrap statistic (calls _core, returns tau only)---- #' @noRd .ec_ipw_statistic <- function(data, indices, outcomes, covariates, ps_formula, borrow_wt) { @@ -282,32 +296,14 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, A <- d$A n <- sum(S) N <- nrow(d) - pi_S <- n / N + n_time <- ncol(Y) if (borrow_wt == 0) { - Y_rct <- Y[S == 1, , drop = FALSE] - A_rct <- A[S == 1] - pi_A <- sum(A_rct) / n - potential <- (A_rct / pi_A + (1 - A_rct) / (1 - pi_A)) * Y_rct - mu1 <- colSums(potential[A_rct == 1, , drop = FALSE]) / n - mu0 <- colSums(potential[A_rct == 0, , drop = FALSE]) / n - return(mu1 - mu0) + return(.ec_ipw_no_borrow_core(d, Y, S, A, n, n_time)$tau) } - ps_model <- glm(as.formula(ps_formula), data = d, family = "binomial") - pi_SX <- predict(ps_model, newdata = d, type = "response") - pi_A <- sum(A[S == 1]) / n - rx <- (pi_SX / (1 - pi_SX)) * ((1 - pi_S) / pi_S) - - potential <- (S * A / pi_A + S * (1 - A) / (1 - pi_A) + - (1 - S) * rx) * Y - mu1 <- colSums(potential[S == 1 & A == 1, , drop = FALSE]) / - sum(S * A / pi_A) - mu10 <- colSums(potential[S == 1 & A == 0, , drop = FALSE]) / - sum(S * (1 - A) / (1 - pi_A)) - mu00 <- colSums(potential[S == 0, , drop = FALSE]) / - sum((1 - S) * rx) - - mu0 <- (1 - borrow_wt) * mu10 + borrow_wt * mu00 - mu1 - mu0 + pi_S <- n / N + core <- .ec_ipw_borrow_core(d, Y, S, A, n, N, pi_S, n_time, + ps_formula, borrow_wt) + core$tau } From 37e9af3a4a88c827805b0c4d306ad441e5c15ad5 Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Fri, 15 May 2026 07:25:26 -0700 Subject: [PATCH 13/16] redocument --- man/ec_aipw.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/ec_aipw.Rd b/man/ec_aipw.Rd index a7a9fae..607e57f 100644 --- a/man/ec_aipw.Rd +++ b/man/ec_aipw.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ec_aipw.R \name{ec_aipw} \alias{ec_aipw} -\title{EC-AIPW method constructor} +\title{EC-AIPW method} \usage{ ec_aipw( ps_formula, From bdcbcae4bce23dd2f1294d4abb81b2209a7bea7b Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Fri, 15 May 2026 09:08:52 -0700 Subject: [PATCH 14/16] reclass methods --- R/did_ec_aipw.R | 2 +- R/did_ec_ipw.R | 2 +- R/did_ec_or.R | 2 +- R/ec_aipw.R | 223 ++++++++++++++++--------------------- R/ec_ipw.R | 103 ++++++----------- R/method_weighting_class.R | 46 ++++++++ R/scm_method.R | 2 +- 7 files changed, 186 insertions(+), 194 deletions(-) diff --git a/R/did_ec_aipw.R b/R/did_ec_aipw.R index 0041242..afa353f 100644 --- a/R/did_ec_aipw.R +++ b/R/did_ec_aipw.R @@ -4,7 +4,7 @@ NULL # s4 class definition---- .did_ec_aipw_method <- setClass( "did_ec_aipw_method", - contains = "method_OLE_obj", + contains = "method_DID_obj", slots = c( ps_formula = "character", trt_formula = "character", diff --git a/R/did_ec_ipw.R b/R/did_ec_ipw.R index c5074f3..3c104a9 100644 --- a/R/did_ec_ipw.R +++ b/R/did_ec_ipw.R @@ -4,7 +4,7 @@ NULL # s4 class definition---- .did_ec_ipw_method <- setClass( "did_ec_ipw_method", - contains = "method_OLE_obj", + contains = "method_DID_obj", slots = c( ps_formula = "character", trt_formula = "character", diff --git a/R/did_ec_or.R b/R/did_ec_or.R index 72df0a8..37db463 100644 --- a/R/did_ec_or.R +++ b/R/did_ec_or.R @@ -4,7 +4,7 @@ NULL # s4 class definition---- .did_ec_or_method <- setClass( "did_ec_or_method", - contains = "method_OLE_obj", + contains = "method_DID_obj", slots = c( outcome_formula_ext = "character", outcome_formula_rct_ctrl = "character", diff --git a/R/ec_aipw.R b/R/ec_aipw.R index 87d1510..3a7d697 100644 --- a/R/ec_aipw.R +++ b/R/ec_aipw.R @@ -1,10 +1,10 @@ #' @include ec_ipw.R NULL -# S4 class definition---- +# s4 class definition---- .ec_aipw_method <- setClass( "ec_aipw_method", - contains = "method_primary_obj", + contains = "method_weighting_obj", slots = c( ps_formula = "character", outcome_formula = "character", @@ -90,81 +90,56 @@ ec_aipw <- function(ps_formula, ) } -# estimate() method---- +# estimate() dispatch---- #' @rdname estimate setMethod("estimate", "ec_aipw_method", function(method, data, outcomes, treatment, trial_status, covariates, alpha = 0.05, quiet = TRUE) { - Y <- as.matrix(data[, outcomes, drop = FALSE]) - S <- data[[trial_status]] - A <- data[[treatment]] - n_time <- ncol(Y) - N <- nrow(data) - n <- sum(S) - m <- N - n - pi_S <- n / N - ps_formula <- sub("^[^~]*~", paste0(trial_status, " ~"), method@ps_formula) - df <- data.frame(Y, S = S, A = A, data[, covariates, drop = FALSE]) + df <- .build_analysis_df(data, outcomes, treatment, trial_status, covariates) if (!quiet) cat("Running EC-AIPW estimator...\n") - result <- .ec_aipw_estimate( - df, Y, S, A, n, m, N, pi_S, n_time, - ps_formula, method@outcome_formula, method@weight + core <- .ec_aipw_core(df, outcomes, ps_formula, + method@outcome_formula, method@weight) + sd_tau <- .ec_aipw_sandwich(df, core, length(outcomes), + method@outcome_formula) + + .format_primary_results( + tau = core$tau, sd_tau = sd_tau, + borrow_weight = core$borrow_weight, + n_time = length(core$tau), alpha = alpha, + method = method, df = df, quiet = quiet, + statistic = .ec_aipw_statistic, + outcomes = outcomes, covariates = covariates, + ps_formula = ps_formula, + outcome_formula = method@outcome_formula ) - - tau <- result$tau - sd_tau <- result$sd_tau - borrow_weight <- result$borrow_weight - cutoff <- qnorm(1 - alpha / 2) - - if (!is.null(method@bootstrap)) { - if (!quiet) cat("Running bootstrap inference...\n") - - boot_res <- .run_bootstrap( - df = df, statistic = .ec_aipw_statistic, - n_estimates = n_time, bootstrap = method@bootstrap, - bootstrap_ci_type = method@bootstrap_ci_type, alpha = alpha, - outcomes = outcomes, covariates = covariates, - ps_formula = ps_formula, outcome_formula = method@outcome_formula, - borrow_wt = borrow_weight - ) - sd_tau <- boot_res$sd_boot - - results <- data.frame( - point_estimates = tau, - standard_deviation = sd_tau, - lower_CI_boot = boot_res$lower_ci, - upper_CI_boot = boot_res$upper_ci, - row.names = paste0("tau", seq_len(n_time)) - ) - } else { - results <- data.frame( - point_estimates = tau, - standard_deviation = sd_tau, - lower_CI_normal = tau - sd_tau * cutoff, - upper_CI_normal = tau + sd_tau * cutoff, - row.names = paste0("tau", seq_len(n_time)) - ) - } - - list(results = results, borrow_weight = borrow_weight) }) -# internal helpers---- +# core estimation---- -# point estimate (shared by estimate and bootstrap)---- +# fits ps + outcome models, computes weighted residual potentials, returns +# point estimate tau and all intermediates needed for sandwich variance---- #' @noRd -.ec_aipw_core <- function(df, Y, S, A, n, N, pi_S, n_time, - ps_formula, outcome_formula, weight) { +.ec_aipw_core <- function(df, outcomes, ps_formula, outcome_formula, weight) { + Y <- as.matrix(df[, outcomes, drop = FALSE]) + S <- df$S + A <- df$A + N <- nrow(df) + n <- sum(S) + n_time <- ncol(Y) + pi_S <- n / N + + # propensity score model ps_model <- glm(as.formula(ps_formula), data = df, family = "binomial") pi_SX <- predict(ps_model, newdata = df, type = "response") pi_A <- sum(A[S == 1]) / n rx <- (pi_SX / (1 - pi_SX)) * ((1 - pi_S) / pi_S) + # outcome regression on controls, predict for all subjects Y0_models <- lapply(outcome_formula, \(f) { lm(as.formula(f), data = df[A == 0, , drop = FALSE]) }) @@ -173,96 +148,107 @@ setMethod("estimate", "ec_aipw_method", function(method, data, outcomes, }, numeric(N)) Yr <- Y - Y0 - w11 <- pi_A - w10 <- 1 - pi_A + # weighted potential outcomes using residuals w00 <- rx - - potential <- (S * A / w11 + S * (1 - A) / w10 + (1 - S) * w00) * Yr + potential <- (S * A / pi_A + S * (1 - A) / (1 - pi_A) + + (1 - S) * w00) * Yr mu1 <- colSums(potential[S == 1 & A == 1, , drop = FALSE]) / n mu10 <- colSums(potential[S == 1 & A == 0, , drop = FALSE]) / n mu00 <- colSums(potential[S == 0, , drop = FALSE]) / sum((1 - S) * w00) - # optimal weight - num <- sum(S * (1 - A) / w10^2 / (sum(S * (1 - A) / w10))^2) + # optimal borrowing weight + num <- sum(S * (1 - A) / (1 - pi_A)^2 / (sum(S * (1 - A) / (1 - pi_A)))^2) denom <- sum((1 - S) * w00^2 / (sum((1 - S) * w00))^2) w_opt <- num / (num + denom) borrow_weight <- if (is.null(weight)) w_opt else weight + # combine rct control and external control mu0 <- (1 - borrow_weight) * mu10 + borrow_weight * mu00 tau <- mu1 - mu0 - list(tau = tau, borrow_weight = borrow_weight, ps_model = ps_model, - pi_SX = pi_SX, pi_A = pi_A, rx = rx, w00 = w00, w10 = w10, + list(tau = tau, borrow_weight = borrow_weight, + ps_model = ps_model, pi_SX = pi_SX, pi_A = pi_A, + pi_S = pi_S, rx = rx, w00 = w00, Yr = Yr, mu1 = mu1, mu10 = mu10, mu00 = mu00) } -# sandwich variance (uses intermediates from _core)---- +# sandwich variance---- + +# constructs the M-estimator sandwich variance from the core intermediates. +# the bread matrix has blocks for: mu1, mu10, mu00, ps params, outcome params. +# the meat is the outer product of the stacked influence functions.---- #' @noRd -.ec_aipw_estimate <- function(df, Y, S, A, n, m, N, pi_S, n_time, - ps_formula, outcome_formula, weight) { - core <- .ec_aipw_core(df, Y, S, A, n, N, pi_S, n_time, - ps_formula, outcome_formula, weight) +.ec_aipw_sandwich <- function(df, core, n_time, outcome_formula) { + S <- df$S + A <- df$A + N <- nrow(df) X_ps <- model.matrix(core$ps_model) n_ps <- ncol(X_ps) - # outcome model matrices (fit on full data for sandwich) - Y0_models_full <- lapply(outcome_formula, \(f) lm(as.formula(f), data = df)) - Y0_model_dims <- vapply(Y0_models_full, \(m) ncol(model.matrix(m)), integer(1)) - n_outcome <- sum(Y0_model_dims) + # refit outcome models on full data (needed for sandwich, not for tau) + Y0_models_full <- lapply(outcome_formula, \(f) { + lm(as.formula(f), data = df) + }) - A33 <- diag(rep(-mean((1 - S) * core$w00 / (1 - pi_S)), n_time), nrow = n_time) - A34 <- t((1 - S) * core$pi_SX / (pi_S * (1 - core$pi_SX)) * + # bread: ps block---- + A33 <- diag( + rep(-mean((1 - S) * core$w00 / (1 - core$pi_S)), n_time), + nrow = n_time + ) + A34 <- t((1 - S) * core$pi_SX / (core$pi_S * (1 - core$pi_SX)) * sweep(core$Yr, 2, core$mu00)) %*% X_ps / N A44 <- t(X_ps) %*% diag(-core$pi_SX * (1 - core$pi_SX)) %*% X_ps / N A0 <- as.matrix(Matrix::bdiag( diag(-1, n_time), diag(-1, n_time), A33, A44 )) - A0[ - (2 * n_time + 1):(3 * n_time), - (3 * n_time + 1):(3 * n_time + n_ps) - ] <- A34 - - Phi1_gamma_list <- lapply(seq_len(n_time), \(t) { - Xm <- model.matrix(Y0_models_full[[t]]) - as.vector(-S * A / (pi_S * core$pi_A)) %*% Xm / N - }) - Phi2_gamma_list <- lapply(seq_len(n_time), \(t) { - Xm <- model.matrix(Y0_models_full[[t]]) - as.vector(-S * (1 - A) / (pi_S * (1 - core$pi_A))) %*% Xm / N - }) - Phi3_gamma_list <- lapply(seq_len(n_time), \(t) { - Xm <- model.matrix(Y0_models_full[[t]]) - as.vector(-(1 - S) * core$rx / (1 - pi_S)) %*% Xm / N - }) - Y0_gamma_list <- lapply(seq_len(n_time), \(t) { - Xm <- model.matrix(Y0_models_full[[t]]) - -t(Xm) %*% diag((1 - A) / (1 - mean(A))) %*% Xm / N - }) - - A45 <- matrix(0, nrow = n_ps, ncol = n_outcome) - A51 <- matrix(0, nrow = n_outcome, ncol = 3 * n_time + n_ps) - A_left <- rbind(A0, A51) + A0[(2 * n_time + 1):(3 * n_time), + (3 * n_time + 1):(3 * n_time + n_ps)] <- A34 + + # bread: outcome model blocks---- + Y0_model_mats <- lapply(Y0_models_full, model.matrix) + n_outcome <- sum(vapply(Y0_model_mats, ncol, integer(1))) + + Phi1_gamma <- as.matrix(Matrix::bdiag(lapply(seq_len(n_time), \(t) { + as.vector(-S * A / (core$pi_S * core$pi_A)) %*% Y0_model_mats[[t]] / N + }))) + Phi2_gamma <- as.matrix(Matrix::bdiag(lapply(seq_len(n_time), \(t) { + as.vector(-S * (1 - A) / (core$pi_S * (1 - core$pi_A))) %*% + Y0_model_mats[[t]] / N + }))) + Phi3_gamma <- as.matrix(Matrix::bdiag(lapply(seq_len(n_time), \(t) { + as.vector(-(1 - S) * core$rx / (1 - core$pi_S)) %*% + Y0_model_mats[[t]] / N + }))) + Y0_gamma <- as.matrix(Matrix::bdiag(lapply(seq_len(n_time), \(t) { + -t(Y0_model_mats[[t]]) %*% + diag((1 - A) / (1 - mean(A))) %*% Y0_model_mats[[t]] / N + }))) + + # assemble full bread matrix---- + A_left <- rbind(A0, matrix(0, nrow = n_outcome, ncol = 3 * n_time + n_ps)) A_right <- rbind( - as.matrix(Matrix::bdiag(Phi1_gamma_list)), - as.matrix(Matrix::bdiag(Phi2_gamma_list)), - as.matrix(Matrix::bdiag(Phi3_gamma_list)), - A45, - as.matrix(Matrix::bdiag(Y0_gamma_list)) + Phi1_gamma, Phi2_gamma, Phi3_gamma, + matrix(0, nrow = n_ps, ncol = n_outcome), + Y0_gamma ) A_mat <- cbind(A_left, A_right) - phi1 <- S * A * sweep(core$Yr, 2, core$mu1) / core$pi_A / pi_S - phi2 <- S * (1 - A) * sweep(core$Yr, 2, core$mu10) / (1 - core$pi_A) / pi_S - phi3 <- (1 - S) * sweep(core$Yr, 2, core$mu00) * core$w00 / (1 - pi_S) + # meat: influence functions---- + phi1 <- S * A * sweep(core$Yr, 2, core$mu1) / core$pi_A / core$pi_S + phi2 <- S * (1 - A) * sweep(core$Yr, 2, core$mu10) / + (1 - core$pi_A) / core$pi_S + phi3 <- (1 - S) * sweep(core$Yr, 2, core$mu00) * + core$w00 / (1 - core$pi_S) phi_ps <- (S - core$pi_SX) * X_ps phi_Y0 <- do.call(cbind, lapply(seq_len(n_time), \(t) { - Xm <- model.matrix(Y0_models_full[[t]]) - ((1 - A) / (1 - mean(A))) * (core$Yr[, t] * Xm) + ((1 - A) / (1 - mean(A))) * (core$Yr[, t] * Y0_model_mats[[t]]) })) B <- crossprod(cbind(phi1, phi2, phi3, phi_ps, phi_Y0)) / N + + # sandwich: A^{-1} B A^{-T}, then extract tau variance---- A_inv <- solve(A_mat) sigma <- A_inv %*% B %*% t(A_inv) @@ -272,9 +258,7 @@ setMethod("estimate", "ec_aipw_method", function(method, data, outcomes, -core$borrow_weight * diag(n_time), matrix(0, nrow = n_time, ncol = n_ps + n_outcome) ) - sd_tau <- sqrt(diag(coef_mat %*% sigma %*% t(coef_mat) / N)) - - list(tau = core$tau, sd_tau = sd_tau, borrow_weight = core$borrow_weight) + sqrt(diag(coef_mat %*% sigma %*% t(coef_mat) / N)) } # bootstrap statistic (calls _core, returns tau only)---- @@ -282,15 +266,6 @@ setMethod("estimate", "ec_aipw_method", function(method, data, outcomes, .ec_aipw_statistic <- function(data, indices, outcomes, covariates, ps_formula, outcome_formula, borrow_wt) { d <- data[indices, , drop = FALSE] - Y <- as.matrix(d[, outcomes, drop = FALSE]) - S <- d$S - A <- d$A - n <- sum(S) - N <- nrow(d) - n_time <- ncol(Y) - pi_S <- n / N - - core <- .ec_aipw_core(d, Y, S, A, n, N, pi_S, n_time, - ps_formula, outcome_formula, borrow_wt) + core <- .ec_aipw_core(d, outcomes, ps_formula, outcome_formula, borrow_wt) core$tau } diff --git a/R/ec_ipw.R b/R/ec_ipw.R index 48feea9..6cc8d24 100644 --- a/R/ec_ipw.R +++ b/R/ec_ipw.R @@ -4,10 +4,10 @@ NULL # class unions---- setClassUnion("numericOrNULL", c("numeric", "NULL")) -# s4 class definition---- +# S4 class definition---- .ec_ipw_method <- setClass( "ec_ipw_method", - contains = "method_primary_obj", + contains = "method_weighting_obj", slots = c( ps_formula = "character", weight = "numericOrNULL", @@ -112,17 +112,9 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, treatment, trial_status, covariates, alpha = 0.05, quiet = TRUE) { - Y <- as.matrix(data[, outcomes, drop = FALSE]) - S <- data[[trial_status]] - A <- data[[treatment]] - n_time <- ncol(Y) - N <- nrow(data) - n <- sum(S) - m <- N - n - pi_S <- n / N - ps_formula <- sub("^[^~]*~", paste0(trial_status, " ~"), method@ps_formula) - df <- data.frame(Y, S = S, A = A, data[, covariates, drop = FALSE]) + df <- .build_analysis_df(data, outcomes, treatment, trial_status, covariates) + n_time <- length(outcomes) if (!quiet) cat("Running EC-IPW estimator...\n") @@ -130,50 +122,27 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, use_borrowing <- is.null(weight) || weight > 0 if (!use_borrowing) { - result <- .ec_ipw_no_borrow(df, Y, S, A, n, n_time) + Y <- as.matrix(df[, outcomes, drop = FALSE]) + result <- .ec_ipw_no_borrow(df, Y, df$S, df$A, sum(df$S), n_time) borrow_weight <- 0 } else { - result <- .ec_ipw_borrow( - df, Y, S, A, n, m, N, pi_S, n_time, - ps_formula, weight - ) - borrow_weight <- result$borrow_weight + core <- .ec_ipw_borrow_core(df, as.matrix(df[, outcomes, drop = FALSE]), + df$S, df$A, sum(df$S), nrow(df), + sum(df$S) / nrow(df), n_time, + ps_formula, weight) + result <- .ec_ipw_sandwich(df, core, n_time) + borrow_weight <- core$borrow_weight } - tau <- result$tau - sd_tau <- result$sd_tau - cutoff <- qnorm(1 - alpha / 2) - - if (!is.null(method@bootstrap)) { - if (!quiet) cat("Running bootstrap inference...\n") - - boot_res <- .run_bootstrap( - df = df, statistic = .ec_ipw_statistic, - n_estimates = n_time, bootstrap = method@bootstrap, - bootstrap_ci_type = method@bootstrap_ci_type, alpha = alpha, - outcomes = outcomes, covariates = covariates, - ps_formula = ps_formula, borrow_wt = borrow_weight - ) - sd_tau <- boot_res$sd_boot - - results <- data.frame( - point_estimates = tau, - standard_deviation = sd_tau, - lower_CI_boot = boot_res$lower_ci, - upper_CI_boot = boot_res$upper_ci, - row.names = paste0("tau", seq_len(n_time)) - ) - } else { - results <- data.frame( - point_estimates = tau, - standard_deviation = sd_tau, - lower_CI_normal = tau - sd_tau * cutoff, - upper_CI_normal = tau + sd_tau * cutoff, - row.names = paste0("tau", seq_len(n_time)) - ) - } - - list(results = results, borrow_weight = borrow_weight) + .format_primary_results( + tau = result$tau, sd_tau = result$sd_tau, + borrow_weight = borrow_weight, + n_time = n_time, alpha = alpha, + method = method, df = df, quiet = quiet, + statistic = .ec_ipw_statistic, + outcomes = outcomes, covariates = covariates, + ps_formula = ps_formula + ) }) # internal helpers---- @@ -240,38 +209,40 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, mu1 = mu1, mu10 = mu10, mu00 = mu00) } -# sandwich variance with borrowing---- +# sandwich variance with borrowing (uses intermediates from _core)---- #' @noRd -.ec_ipw_borrow <- function(df, Y, S, A, n, m, N, pi_S, n_time, - ps_formula, weight) { - core <- .ec_ipw_borrow_core(df, Y, S, A, n, N, pi_S, n_time, - ps_formula, weight) +.ec_ipw_sandwich <- function(df, core, n_time) { + Y <- as.matrix(df[, seq_len(n_time), drop = FALSE]) + S <- df$S + A <- df$A + N <- nrow(df) + pi_S <- sum(S) / N X_model <- model.matrix(core$ps_model) n_ps <- ncol(X_model) + # bread matrix blocks A33 <- diag(rep(-mean((1 - S) * core$w00 / (1 - pi_S)), n_time), nrow = n_time) A34 <- t((1 - S) * core$pi_SX / (pi_S * (1 - core$pi_SX)) * sweep(Y, 2, core$mu00)) %*% X_model / N A44 <- t(X_model) %*% diag(-core$pi_SX * (1 - core$pi_SX)) %*% X_model / N - block_dim <- n_time + n_time + n_time + n_ps + block_dim <- 3 * n_time + n_ps A_mat <- matrix(0, nrow = block_dim, ncol = block_dim) A_mat[1:n_time, 1:n_time] <- diag(-1, n_time) - idx2 <- (n_time + 1):(2 * n_time) - A_mat[idx2, idx2] <- diag(-1, n_time) - idx3 <- (2 * n_time + 1):(3 * n_time) - A_mat[idx3, idx3] <- A33 - idx4 <- (3 * n_time + 1):block_dim - A_mat[idx4, idx4] <- A44 - A_mat[idx3, idx4] <- A34 + A_mat[(n_time + 1):(2 * n_time), (n_time + 1):(2 * n_time)] <- diag(-1, n_time) + A_mat[(2 * n_time + 1):(3 * n_time), (2 * n_time + 1):(3 * n_time)] <- A33 + A_mat[(3 * n_time + 1):block_dim, (3 * n_time + 1):block_dim] <- A44 + A_mat[(2 * n_time + 1):(3 * n_time), (3 * n_time + 1):block_dim] <- A34 + # meat: influence functions phi1 <- S * A * sweep(Y, 2, core$mu1) / core$pi_A / pi_S phi2 <- S * (1 - A) * sweep(Y, 2, core$mu10) / (1 - core$pi_A) / pi_S phi3 <- (1 - S) * sweep(Y, 2, core$mu00) * core$w00 / (1 - pi_S) phi_ps <- (S - core$pi_SX) * X_model B <- crossprod(cbind(phi1, phi2, phi3, phi_ps)) / N + # sandwich A_inv <- solve(A_mat) sigma <- A_inv %*% B %*% t(A_inv) @@ -283,7 +254,7 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, ) sd_tau <- sqrt(diag(coef_mat %*% sigma %*% t(coef_mat) / N)) - list(tau = core$tau, sd_tau = sd_tau, borrow_weight = core$borrow_weight) + list(tau = core$tau, sd_tau = sd_tau) } # bootstrap statistic (calls _core, returns tau only)---- diff --git a/R/method_weighting_class.R b/R/method_weighting_class.R index fa89c8d..1a35b46 100644 --- a/R/method_weighting_class.R +++ b/R/method_weighting_class.R @@ -94,3 +94,49 @@ setup_method_weighting <- function(method_name = "IPW", model_form_mu1_rct = model_form_mu1_rct ) } + +# shared helpers for weighting methods---- + +# prepares the internal data frame used by weighting estimators---- +#' @noRd +.build_analysis_df <- function(data, outcomes, treatment, trial_status, + covariates) { + Y <- as.matrix(data[, outcomes, drop = FALSE]) + data.frame(Y, S = data[[trial_status]], A = data[[treatment]], + data[, covariates, drop = FALSE]) +} + +# formats results and optionally runs bootstrap for weighting methods---- +#' @noRd +.format_primary_results <- function(tau, sd_tau, borrow_weight, n_time, + alpha, method, df, quiet, statistic, + ...) { + cutoff <- qnorm(1 - alpha / 2) + + if (!is.null(method@bootstrap)) { + if (!quiet) cat("Running bootstrap inference...\n") + boot_res <- .run_bootstrap( + df = df, statistic = statistic, + n_estimates = n_time, bootstrap = method@bootstrap, + bootstrap_ci_type = method@bootstrap_ci_type, alpha = alpha, + borrow_wt = borrow_weight, ... + ) + results <- data.frame( + point_estimates = tau, + standard_deviation = boot_res$sd_boot, + lower_CI_boot = boot_res$lower_ci, + upper_CI_boot = boot_res$upper_ci, + row.names = paste0("tau", seq_len(n_time)) + ) + } else { + results <- data.frame( + point_estimates = tau, + standard_deviation = sd_tau, + lower_CI_normal = tau - sd_tau * cutoff, + upper_CI_normal = tau + sd_tau * cutoff, + row.names = paste0("tau", seq_len(n_time)) + ) + } + + list(results = results, borrow_weight = borrow_weight) +} diff --git a/R/scm_method.R b/R/scm_method.R index c59a5d6..ba35fee 100644 --- a/R/scm_method.R +++ b/R/scm_method.R @@ -4,7 +4,7 @@ NULL # s4 class definition---- .scm_method <- setClass( "scm_method", - contains = "method_OLE_obj", + contains = "method_SCM_obj", slots = c( lambda_min = "numeric", lambda_max = "numeric", From 12140bb892b009046dbee0d41f4344920324fb17 Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Fri, 15 May 2026 09:56:30 -0700 Subject: [PATCH 15/16] move estimate, add comments --- R/ec_aipw.R | 16 ++++--- R/ec_ipw.R | 112 ++++++++++++++++++++++++++++------------------- R/method_class.R | 14 ++++++ man/ec_ipw.Rd | 13 +++--- man/estimate.Rd | 6 +-- 5 files changed, 100 insertions(+), 61 deletions(-) diff --git a/R/ec_aipw.R b/R/ec_aipw.R index 3a7d697..bc0adcb 100644 --- a/R/ec_aipw.R +++ b/R/ec_aipw.R @@ -10,7 +10,7 @@ NULL outcome_formula = "character", weight = "numericOrNULL", bootstrap = "numericOrNULL", - bootstrap_ci_type = "character" + bootstrap_ci_type = "characterOrNULL" ), prototype = list( method_name = "EC-AIPW", @@ -18,7 +18,7 @@ NULL outcome_formula = "", weight = NULL, bootstrap = NULL, - bootstrap_ci_type = "perc" + bootstrap_ci_type = NULL ) ) @@ -68,12 +68,14 @@ ec_aipw <- function(ps_formula, checkmate::assert_number(weight, lower = 0, upper = 1, null.ok = TRUE) checkmate::assert_count(bootstrap, positive = TRUE, null.ok = TRUE) - if (is.null(bootstrap_ci_type)) { + if (!is.null(bootstrap) && is.null(bootstrap_ci_type)) { bootstrap_ci_type <- "perc" } - checkmate::assert_choice( - bootstrap_ci_type, c("perc", "bca", "norm", "basic", "stud") - ) + if (!is.null(bootstrap_ci_type)) { + checkmate::assert_choice( + bootstrap_ci_type, c("perc", "bca", "norm", "basic", "stud") + ) + } .ec_aipw_method( ps_formula = ps_formula, @@ -85,7 +87,7 @@ ec_aipw <- function(ps_formula, bootstrap_flag = !is.null(bootstrap), bootstrap_obj = .bootstrap_obj( replicates = bootstrap %||% 500L, - bootstrap_CI_type = bootstrap_ci_type + bootstrap_CI_type = bootstrap_ci_type %||% "perc" ) ) } diff --git a/R/ec_ipw.R b/R/ec_ipw.R index 6cc8d24..1526b52 100644 --- a/R/ec_ipw.R +++ b/R/ec_ipw.R @@ -3,8 +3,9 @@ NULL # class unions---- setClassUnion("numericOrNULL", c("numeric", "NULL")) +setClassUnion("characterOrNULL", c("character", "NULL")) -# S4 class definition---- +# s4 class definition---- .ec_ipw_method <- setClass( "ec_ipw_method", contains = "method_weighting_obj", @@ -12,14 +13,14 @@ setClassUnion("numericOrNULL", c("numeric", "NULL")) ps_formula = "character", weight = "numericOrNULL", bootstrap = "numericOrNULL", - bootstrap_ci_type = "character" + bootstrap_ci_type = "characterOrNULL" ), prototype = list( method_name = "EC-IPW", ps_formula = "", weight = NULL, bootstrap = NULL, - bootstrap_ci_type = "perc" + bootstrap_ci_type = NULL ) ) @@ -38,9 +39,10 @@ setClassUnion("numericOrNULL", c("numeric", "NULL")) #' optimal weight, \code{0} for RCT-only, or a value in (0, 1]. #' @param bootstrap Number of bootstrap replicates, or \code{NULL} #' (default) for sandwich variance with normal CIs. -#' @param bootstrap_ci_type Bootstrap CI type. Defaults to \code{"perc"} -#' when \code{bootstrap} is non-NULL. One of \code{"perc"}, \code{"bca"}, -#' \code{"norm"}, \code{"basic"}, or \code{"stud"}. +#' @param bootstrap_ci_type Bootstrap CI type, or \code{NULL} (default) +#' which resolves to \code{"perc"} when \code{bootstrap} is set. One of +#' \code{"perc"}, \code{"bca"}, \code{"norm"}, \code{"basic"}, or +#' \code{"stud"}. #' #' @return An S4 object of class \code{ec_ipw_method}. #' @@ -52,13 +54,13 @@ setClassUnion("numericOrNULL", c("numeric", "NULL")) #' @export #' #' @examples -#' # Optimal weight, sandwich SE +#' # optimal weight, sandwich SE #' ec_ipw(ps_formula = "S ~ x1 + x2 + x3 + x4 + x5") #' -#' # No borrowing +#' # no borrowing #' ec_ipw(ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", weight = 0) #' -#' # Fixed weight with bootstrap +#' # fixed weight with bootstrap #' ec_ipw( #' ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", #' weight = 0.3, @@ -72,12 +74,15 @@ ec_ipw <- function(ps_formula, checkmate::assert_number(weight, lower = 0, upper = 1, null.ok = TRUE) checkmate::assert_count(bootstrap, positive = TRUE, null.ok = TRUE) - if (is.null(bootstrap_ci_type)) { - bootstrap_ci_type <- if (!is.null(bootstrap)) "perc" else "perc" + # resolve ci type only when bootstrap is requested + if (!is.null(bootstrap) && is.null(bootstrap_ci_type)) { + bootstrap_ci_type <- "perc" + } + if (!is.null(bootstrap_ci_type)) { + checkmate::assert_choice( + bootstrap_ci_type, c("perc", "bca", "norm", "basic", "stud") + ) } - checkmate::assert_choice( - bootstrap_ci_type, c("perc", "bca", "norm", "basic", "stud") - ) .ec_ipw_method( ps_formula = ps_formula, @@ -88,30 +93,19 @@ ec_ipw <- function(ps_formula, bootstrap_flag = !is.null(bootstrap), bootstrap_obj = .bootstrap_obj( replicates = bootstrap %||% 500L, - bootstrap_CI_type = bootstrap_ci_type + bootstrap_CI_type = bootstrap_ci_type %||% "perc" ) ) } -# estimate() generic and method---- - -#' Run estimation for a method object -#' -#' S4 generic that dispatches to the appropriate estimation logic -#' based on the method class. Each method defines its own arguments. -#' -#' @param method An S4 method object (e.g., from \code{\link{ec_ipw}}). -#' @param ... Method-specific arguments (data, outcomes, etc.). -#' -#' @return A list with estimation results. -#' @export -setGeneric("estimate", function(method, ...) standardGeneric("estimate")) +# estimate() method---- #' @rdname estimate setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, treatment, trial_status, covariates, alpha = 0.05, quiet = TRUE) { + # replace formula LHS with the actual trial status column name ps_formula <- sub("^[^~]*~", paste0(trial_status, " ~"), method@ps_formula) df <- .build_analysis_df(data, outcomes, treatment, trial_status, covariates) n_time <- length(outcomes) @@ -121,19 +115,24 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, weight <- method@weight use_borrowing <- is.null(weight) || weight > 0 + # weight=0 uses rct-only path (no PS model needed) if (!use_borrowing) { Y <- as.matrix(df[, outcomes, drop = FALSE]) result <- .ec_ipw_no_borrow(df, Y, df$S, df$A, sum(df$S), n_time) borrow_weight <- 0 } else { - core <- .ec_ipw_borrow_core(df, as.matrix(df[, outcomes, drop = FALSE]), - df$S, df$A, sum(df$S), nrow(df), - sum(df$S) / nrow(df), n_time, - ps_formula, weight) + # weight>0 or NULL: fit PS model, compute borrowing weight, sandwich SE + core <- .ec_ipw_borrow_core( + df, as.matrix(df[, outcomes, drop = FALSE]), + df$S, df$A, sum(df$S), nrow(df), + sum(df$S) / nrow(df), n_time, + ps_formula, weight + ) result <- .ec_ipw_sandwich(df, core, n_time) borrow_weight <- core$borrow_weight } + # format output and optionally run bootstrap .format_primary_results( tau = result$tau, sd_tau = result$sd_tau, borrow_weight = borrow_weight, @@ -147,69 +146,87 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, # internal helpers---- -# point estimate for weight=0 (rct-only, no PS model)---- +# rct-only point estimate (no PS model, Hajek estimator)---- #' @noRd .ec_ipw_no_borrow_core <- function(df, Y, S, A, n, n_time) { rct <- df[df$S == 1, , drop = FALSE] Y_rct <- Y[S == 1, , drop = FALSE] pi_A <- sum(rct$A) / n + # normalized IPTW (Hajek estimator) -- Def 1 with w=0 potential <- (rct$A / pi_A + (1 - rct$A) / (1 - pi_A)) * Y_rct mu1 <- colSums(potential[rct$A == 1, , drop = FALSE]) / n mu0 <- colSums(potential[rct$A == 0, , drop = FALSE]) / n - list(tau = mu1 - mu0, pi_A = pi_A, mu1 = mu1, mu0 = mu0, - Y_rct = Y_rct, rct = rct) + list( + tau = mu1 - mu0, pi_A = pi_A, + mu1 = mu1, mu0 = mu0, + Y_rct = Y_rct, rct = rct + ) } -# sandwich variance for weight=0 case---- +# rct-only sandwich variance (Theorem 3 with w=0)---- #' @noRd .ec_ipw_no_borrow <- function(df, Y, S, A, n, n_time) { core <- .ec_ipw_no_borrow_core(df, Y, S, A, n, n_time) + # influence functions for treated and control phi1 <- core$rct$A * sweep(core$Y_rct, 2, core$mu1) / core$pi_A phi0 <- (1 - core$rct$A) * sweep(core$Y_rct, 2, core$mu0) / (1 - core$pi_A) B <- crossprod(cbind(phi1, phi0)) / n + # tau = mu1 - mu0, so coef_mat = [I, -I] coef_mat <- cbind(diag(n_time), -diag(n_time)) sd_tau <- sqrt(diag(coef_mat %*% B %*% t(coef_mat) / n)) list(tau = core$tau, sd_tau = sd_tau) } -# point estimate with borrowing (shared by estimate and bootstrap)---- +# point estimate with borrowing (Def 1, shared by estimate and bootstrap)---- +# fits PS model, computes density ratio weights W00 (Eq 4), mu11/mu10/mu00, +# and optimal weight (Eq 11). returns intermediates for sandwich variance. #' @noRd .ec_ipw_borrow_core <- function(df, Y, S, A, n, N, pi_S, n_time, ps_formula, weight) { + # propensity score model for trial participation ps_model <- glm(as.formula(ps_formula), data = df, family = "binomial") pi_SX <- predict(ps_model, newdata = df, type = "response") pi_A <- sum(A[S == 1]) / n + + # density ratio weights W00 = pi_S(X)(1-pi_S) / (1-pi_S(X))pi_S (Eq 4) rx <- (pi_SX / (1 - pi_SX)) * ((1 - pi_S) / pi_S) w11 <- pi_A w10 <- 1 - pi_A w00 <- rx + # normalized weighted outcomes (Def 1, Eq 6) potential <- (S * A / w11 + S * (1 - A) / w10 + (1 - S) * w00) * Y mu1 <- colSums(potential[S == 1 & A == 1, , drop = FALSE]) / sum(S * A / w11) mu10 <- colSums(potential[S == 1 & A == 0, , drop = FALSE]) / sum(S * (1 - A) / w10) mu00 <- colSums(potential[S == 0, , drop = FALSE]) / sum((1 - S) * w00) - # optimal weight (Eq. 11 from Zhou et al. 2024) + # data-adaptive optimal weight (Eq 11, variance ratio approximation) num <- sum(S * (1 - A) / w10^2 / (sum(S * (1 - A) / w10))^2) denom <- sum((1 - S) * w00^2 / (sum((1 - S) * w00))^2) w_opt <- num / (num + denom) borrow_weight <- if (is.null(weight)) w_opt else weight + # hybrid control: convex combination of trial and external controls mu0 <- (1 - borrow_weight) * mu10 + borrow_weight * mu00 tau <- mu1 - mu0 - list(tau = tau, borrow_weight = borrow_weight, ps_model = ps_model, - pi_SX = pi_SX, pi_A = pi_A, w00 = w00, w10 = w10, - mu1 = mu1, mu10 = mu10, mu00 = mu00) + list( + tau = tau, borrow_weight = borrow_weight, + ps_model = ps_model, pi_SX = pi_SX, pi_A = pi_A, + w00 = w00, w10 = w10, + mu1 = mu1, mu10 = mu10, mu00 = mu00 + ) } -# sandwich variance with borrowing (uses intermediates from _core)---- +# sandwich variance with borrowing (Theorem 3, Eq 12-13)---- +# constructs the M-estimator bread matrix A (block diagonal with PS block) +# and the meat matrix B (outer product of influence functions). #' @noRd .ec_ipw_sandwich <- function(df, core, n_time) { Y <- as.matrix(df[, seq_len(n_time), drop = FALSE]) @@ -221,7 +238,11 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, X_model <- model.matrix(core$ps_model) n_ps <- ncol(X_model) - # bread matrix blocks + # bread: A matrix blocks (Eq 12) + # A11 = -I_T, A22 = -I_T (for mu1, mu10) + # A33 = external control block + # A34 = cross-term linking mu00 to PS params + # A44 = PS model Hessian A33 <- diag(rep(-mean((1 - S) * core$w00 / (1 - pi_S)), n_time), nrow = n_time) A34 <- t((1 - S) * core$pi_SX / (pi_S * (1 - core$pi_SX)) * sweep(Y, 2, core$mu00)) %*% X_model / N @@ -235,17 +256,18 @@ setMethod("estimate", "ec_ipw_method", function(method, data, outcomes, A_mat[(3 * n_time + 1):block_dim, (3 * n_time + 1):block_dim] <- A44 A_mat[(2 * n_time + 1):(3 * n_time), (3 * n_time + 1):block_dim] <- A34 - # meat: influence functions + # meat: influence functions (Psi_1 through Psi_4 in Theorem 3) phi1 <- S * A * sweep(Y, 2, core$mu1) / core$pi_A / pi_S phi2 <- S * (1 - A) * sweep(Y, 2, core$mu10) / (1 - core$pi_A) / pi_S phi3 <- (1 - S) * sweep(Y, 2, core$mu00) * core$w00 / (1 - pi_S) phi_ps <- (S - core$pi_SX) * X_model B <- crossprod(cbind(phi1, phi2, phi3, phi_ps)) / N - # sandwich + # sandwich: Sigma = A^{-1} B A^{-T} (Eq 13) A_inv <- solve(A_mat) sigma <- A_inv %*% B %*% t(A_inv) + # tau = mu1 - (1-w)*mu10 - w*mu00, extract variance via linear combination coef_mat <- cbind( diag(n_time), -(1 - core$borrow_weight) * diag(n_time), diff --git a/R/method_class.R b/R/method_class.R index 90edf8b..3418f0d 100644 --- a/R/method_class.R +++ b/R/method_class.R @@ -24,6 +24,20 @@ contains = "method_obj" ) +# estimate() generic---- + +#' Run estimation for a method object +#' +#' S4 generic that dispatches to the appropriate estimation logic +#' based on the method class. +#' +#' @param method An S4 method object (e.g., from \code{\link{ec_ipw}}). +#' @param ... Method-specific arguments (data, outcomes, etc.). +#' +#' @return A list with estimation results. +#' @export +setGeneric("estimate", function(method, ...) standardGeneric("estimate")) + # bootstrap helpers---- #' @noRd diff --git a/man/ec_ipw.Rd b/man/ec_ipw.Rd index 076ad18..884980f 100644 --- a/man/ec_ipw.Rd +++ b/man/ec_ipw.Rd @@ -17,9 +17,10 @@ optimal weight, \code{0} for RCT-only, or a value in (0, 1].} \item{bootstrap}{Number of bootstrap replicates, or \code{NULL} (default) for sandwich variance with normal CIs.} -\item{bootstrap_ci_type}{Bootstrap CI type. Defaults to \code{"perc"} -when \code{bootstrap} is non-NULL. One of \code{"perc"}, \code{"bca"}, -\code{"norm"}, \code{"basic"}, or \code{"stud"}.} +\item{bootstrap_ci_type}{Bootstrap CI type, or \code{NULL} (default) +which resolves to \code{"perc"} when \code{bootstrap} is set. One of +\code{"perc"}, \code{"bca"}, \code{"norm"}, \code{"basic"}, or +\code{"stud"}.} } \value{ An S4 object of class \code{ec_ipw_method}. @@ -30,13 +31,13 @@ borrowing (Zhou et al., 2024). Pass to \code{\link{setup_analysis_primary}} and \code{\link{run_analysis}}. } \examples{ -# Optimal weight, sandwich SE +# optimal weight, sandwich SE ec_ipw(ps_formula = "S ~ x1 + x2 + x3 + x4 + x5") -# No borrowing +# no borrowing ec_ipw(ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", weight = 0) -# Fixed weight with bootstrap +# fixed weight with bootstrap ec_ipw( ps_formula = "S ~ x1 + x2 + x3 + x4 + x5", weight = 0.3, diff --git a/man/estimate.Rd b/man/estimate.Rd index 3fbd2fe..be8f251 100644 --- a/man/estimate.Rd +++ b/man/estimate.Rd @@ -1,6 +1,6 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ec_ipw.R, R/did_ec_ipw.R, R/did_ec_aipw.R, -% R/did_ec_or.R, R/ec_aipw.R, R/scm_method.R +% Please edit documentation in R/method_class.R, R/ec_ipw.R, R/did_ec_ipw.R, +% R/did_ec_aipw.R, R/did_ec_or.R, R/ec_aipw.R, R/scm_method.R \name{estimate} \alias{estimate} \alias{estimate,ec_ipw_method-method} @@ -93,5 +93,5 @@ A list with estimation results. } \description{ S4 generic that dispatches to the appropriate estimation logic -based on the method class. Each method defines its own arguments. +based on the method class. } From 20fb2f6addd25ddd03d82674432918367ad2713e Mon Sep 17 00:00:00 2001 From: Matt Secrest Date: Fri, 15 May 2026 10:47:17 -0700 Subject: [PATCH 16/16] rename --- DESCRIPTION | 12 ++++++------ R/{DID_EC_AIPW.R => legacy_DID_EC_AIPW.R} | 2 +- ...PW_bootstrap.R => legacy_DID_EC_AIPW_bootstrap.R} | 0 R/{DID_EC_IPW.R => legacy_DID_EC_IPW.R} | 2 +- ...IPW_bootstrap.R => legacy_DID_EC_IPW_bootstrap.R} | 0 R/{DID_EC_OR.R => legacy_DID_EC_OR.R} | 2 +- ...C_OR_bootstrap.R => legacy_DID_EC_OR_bootstrap.R} | 0 R/run_analysis.R | 6 +++--- 8 files changed, 12 insertions(+), 12 deletions(-) rename R/{DID_EC_AIPW.R => legacy_DID_EC_AIPW.R} (99%) rename R/{DID_EC_AIPW_bootstrap.R => legacy_DID_EC_AIPW_bootstrap.R} (100%) rename R/{DID_EC_IPW.R => legacy_DID_EC_IPW.R} (99%) rename R/{DID_EC_IPW_bootstrap.R => legacy_DID_EC_IPW_bootstrap.R} (100%) rename R/{DID_EC_OR.R => legacy_DID_EC_OR.R} (99%) rename R/{DID_EC_OR_bootstrap.R => legacy_DID_EC_OR_bootstrap.R} (100%) diff --git a/DESCRIPTION b/DESCRIPTION index 69a579b..e9e3c9a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,12 +73,6 @@ Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 Collate: - 'DID_EC_AIPW_bootstrap.R' - 'DID_EC_AIPW.R' - 'DID_EC_IPW_bootstrap.R' - 'DID_EC_IPW.R' - 'DID_EC_OR_bootstrap.R' - 'DID_EC_OR.R' 'EC_AIPW_OPT_bootstrap.R' 'EC_AIPW_OPT.R' 'EC_IPW_OPT_bootstrap.R' @@ -99,6 +93,12 @@ Collate: 'did_ec_aipw.R' 'did_ec_or.R' 'ec_aipw.R' + 'legacy_DID_EC_AIPW_bootstrap.R' + 'legacy_DID_EC_AIPW.R' + 'legacy_DID_EC_IPW_bootstrap.R' + 'legacy_DID_EC_IPW.R' + 'legacy_DID_EC_OR_bootstrap.R' + 'legacy_DID_EC_OR.R' 'package.R' 'rdborrow-package.R' 'run_analysis.R' diff --git a/R/DID_EC_AIPW.R b/R/legacy_DID_EC_AIPW.R similarity index 99% rename from R/DID_EC_AIPW.R rename to R/legacy_DID_EC_AIPW.R index 97c310c..d1f1b08 100644 --- a/R/DID_EC_AIPW.R +++ b/R/legacy_DID_EC_AIPW.R @@ -17,7 +17,7 @@ #' @param T_cross numeric. The time point that separates the placebo-control period and the follow-up period. #' @param quiet Logical. If \code{TRUE}, suppress printed output. #' -#' @include DID_EC_AIPW_bootstrap.R +#' @include legacy_DID_EC_AIPW_bootstrap.R #' @return tau and standard deviation #' @noRd #' diff --git a/R/DID_EC_AIPW_bootstrap.R b/R/legacy_DID_EC_AIPW_bootstrap.R similarity index 100% rename from R/DID_EC_AIPW_bootstrap.R rename to R/legacy_DID_EC_AIPW_bootstrap.R diff --git a/R/DID_EC_IPW.R b/R/legacy_DID_EC_IPW.R similarity index 99% rename from R/DID_EC_IPW.R rename to R/legacy_DID_EC_IPW.R index 5657811..845e40f 100644 --- a/R/DID_EC_IPW.R +++ b/R/legacy_DID_EC_IPW.R @@ -14,7 +14,7 @@ #' @param alpha Significance level. #' @param quiet Logical. If \code{TRUE}, suppress printed output. #' -#' @include DID_EC_IPW_bootstrap.R +#' @include legacy_DID_EC_IPW_bootstrap.R #' @return tau and standard deviation #' @noRd #' diff --git a/R/DID_EC_IPW_bootstrap.R b/R/legacy_DID_EC_IPW_bootstrap.R similarity index 100% rename from R/DID_EC_IPW_bootstrap.R rename to R/legacy_DID_EC_IPW_bootstrap.R diff --git a/R/DID_EC_OR.R b/R/legacy_DID_EC_OR.R similarity index 99% rename from R/DID_EC_OR.R rename to R/legacy_DID_EC_OR.R index 629343f..aabb244 100644 --- a/R/DID_EC_OR.R +++ b/R/legacy_DID_EC_OR.R @@ -15,7 +15,7 @@ #' @param alpha Significance level. #' @param quiet Logical. If \code{TRUE}, suppress printed output. #' -#' @include DID_EC_OR_bootstrap.R +#' @include legacy_DID_EC_OR_bootstrap.R #' @return tau and standard deviation #' @noRd #' diff --git a/R/DID_EC_OR_bootstrap.R b/R/legacy_DID_EC_OR_bootstrap.R similarity index 100% rename from R/DID_EC_OR_bootstrap.R rename to R/legacy_DID_EC_OR_bootstrap.R diff --git a/R/run_analysis.R b/R/run_analysis.R index 178e4ab..4c59ca2 100644 --- a/R/run_analysis.R +++ b/R/run_analysis.R @@ -7,9 +7,9 @@ #' @return a list containing: tau (effect size), sd.tau (standard deviation), wt (weight) #' @include EC_IPW_OPT.R #' @include EC_AIPW_OPT.R -#' @include DID_EC_IPW.R -#' @include DID_EC_AIPW.R -#' @include DID_EC_OR.R +#' @include legacy_DID_EC_IPW.R +#' @include legacy_DID_EC_AIPW.R +#' @include legacy_DID_EC_OR.R #' @include SCM.R #' #' @export