diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..00f60529 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ + 1 { + 2 "python.terminal.activateEnvironment": false + 3 } diff --git a/001_sampling_files/figure-pdf/cell-43-output-1.pdf b/001_sampling_files/figure-pdf/cell-43-output-1.pdf index a5775f5c..d2e9fd46 100644 Binary files a/001_sampling_files/figure-pdf/cell-43-output-1.pdf and b/001_sampling_files/figure-pdf/cell-43-output-1.pdf differ diff --git a/001_sampling_files/figure-pdf/cell-47-output-1.pdf b/001_sampling_files/figure-pdf/cell-47-output-1.pdf index 354f0ee1..47bfc76b 100644 Binary files a/001_sampling_files/figure-pdf/cell-47-output-1.pdf and b/001_sampling_files/figure-pdf/cell-47-output-1.pdf differ diff --git a/001_sampling_files/figure-pdf/fig-forre08a-1-2-output-1.pdf b/001_sampling_files/figure-pdf/fig-forre08a-1-2-output-1.pdf index 1783c85f..6f4cdffb 100644 Binary files a/001_sampling_files/figure-pdf/fig-forre08a-1-2-output-1.pdf and b/001_sampling_files/figure-pdf/fig-forre08a-1-2-output-1.pdf differ diff --git a/001_sampling_files/figure-pdf/fig-forre08a-3-output-1.pdf b/001_sampling_files/figure-pdf/fig-forre08a-3-output-1.pdf index 55d95e61..765eccc6 100644 Binary files a/001_sampling_files/figure-pdf/fig-forre08a-3-output-1.pdf and b/001_sampling_files/figure-pdf/fig-forre08a-3-output-1.pdf differ diff --git a/001_sampling_files/figure-pdf/fig-forre08a-4-output-1.pdf b/001_sampling_files/figure-pdf/fig-forre08a-4-output-1.pdf index 67716cac..7e940118 100644 Binary files a/001_sampling_files/figure-pdf/fig-forre08a-4-output-1.pdf and b/001_sampling_files/figure-pdf/fig-forre08a-4-output-1.pdf differ diff --git a/001_sampling_files/figure-pdf/fig-forre08a-6-output-2.pdf b/001_sampling_files/figure-pdf/fig-forre08a-6-output-2.pdf index 65443a10..17c49228 100644 Binary files a/001_sampling_files/figure-pdf/fig-forre08a-6-output-2.pdf and b/001_sampling_files/figure-pdf/fig-forre08a-6-output-2.pdf differ diff --git a/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges0-output-1.pdf b/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges0-output-1.pdf index 0aaa20ee..f5c3333c 100644 Binary files a/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges0-output-1.pdf and b/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges0-output-1.pdf differ diff --git a/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges1-output-1.pdf b/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges1-output-1.pdf index bb90eeb8..f61f7a51 100644 Binary files a/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges1-output-1.pdf and b/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges1-output-1.pdf differ diff --git a/001_sampling_files/figure-pdf/fig-jd-3points-output-1.pdf b/001_sampling_files/figure-pdf/fig-jd-3points-output-1.pdf index 5926879a..227c1b2d 100644 Binary files a/001_sampling_files/figure-pdf/fig-jd-3points-output-1.pdf and b/001_sampling_files/figure-pdf/fig-jd-3points-output-1.pdf differ diff --git a/001_sampling_files/figure-pdf/fig-mm-3points-output-1.pdf b/001_sampling_files/figure-pdf/fig-mm-3points-output-1.pdf index b731a837..0152ee70 100644 Binary files a/001_sampling_files/figure-pdf/fig-mm-3points-output-1.pdf and b/001_sampling_files/figure-pdf/fig-mm-3points-output-1.pdf differ diff --git a/001_sampling_files/figure-pdf/fig-randorient-2d-output-1.pdf b/001_sampling_files/figure-pdf/fig-randorient-2d-output-1.pdf index 5b607a53..53d7cf5c 100644 Binary files a/001_sampling_files/figure-pdf/fig-randorient-2d-output-1.pdf and b/001_sampling_files/figure-pdf/fig-randorient-2d-output-1.pdf differ diff --git a/001_sampling_files/figure-pdf/fig-rlh-edges0-output-1.pdf b/001_sampling_files/figure-pdf/fig-rlh-edges0-output-1.pdf index e8db66f0..05fb9783 100644 Binary files a/001_sampling_files/figure-pdf/fig-rlh-edges0-output-1.pdf and b/001_sampling_files/figure-pdf/fig-rlh-edges0-output-1.pdf differ diff --git a/001_sampling_files/figure-pdf/fig-rlh-edges1-output-1.pdf b/001_sampling_files/figure-pdf/fig-rlh-edges1-output-1.pdf index b34847cc..518bc957 100644 Binary files a/001_sampling_files/figure-pdf/fig-rlh-edges1-output-1.pdf and b/001_sampling_files/figure-pdf/fig-rlh-edges1-output-1.pdf differ diff --git a/002_awwe_files/figure-pdf/cell-12-output-1.pdf b/002_awwe_files/figure-pdf/cell-12-output-1.pdf index 2c8cf879..251a7c53 100644 Binary files a/002_awwe_files/figure-pdf/cell-12-output-1.pdf and b/002_awwe_files/figure-pdf/cell-12-output-1.pdf differ diff --git a/002_awwe_files/figure-pdf/cell-14-output-1.pdf b/002_awwe_files/figure-pdf/cell-14-output-1.pdf index ddf7c640..dbf0e20f 100644 Binary files a/002_awwe_files/figure-pdf/cell-14-output-1.pdf and b/002_awwe_files/figure-pdf/cell-14-output-1.pdf differ diff --git a/002_awwe_files/figure-pdf/cell-6-output-1.pdf b/002_awwe_files/figure-pdf/cell-6-output-1.pdf index 67939be4..6dc7e6eb 100644 Binary files a/002_awwe_files/figure-pdf/cell-6-output-1.pdf and b/002_awwe_files/figure-pdf/cell-6-output-1.pdf differ diff --git a/002_awwe_files/figure-pdf/cell-7-output-1.pdf b/002_awwe_files/figure-pdf/cell-7-output-1.pdf index ea51caf5..7b743c89 100644 Binary files a/002_awwe_files/figure-pdf/cell-7-output-1.pdf and b/002_awwe_files/figure-pdf/cell-7-output-1.pdf differ diff --git a/002_awwe_files/figure-pdf/cell-8-output-1.pdf b/002_awwe_files/figure-pdf/cell-8-output-1.pdf index fc73dd91..00453955 100644 Binary files a/002_awwe_files/figure-pdf/cell-8-output-1.pdf and b/002_awwe_files/figure-pdf/cell-8-output-1.pdf differ diff --git a/002_awwe_files/figure-pdf/fig-awwe-example-output-1.pdf b/002_awwe_files/figure-pdf/fig-awwe-example-output-1.pdf index 3e5b8d26..14f79e1b 100644 Binary files a/002_awwe_files/figure-pdf/fig-awwe-example-output-1.pdf and b/002_awwe_files/figure-pdf/fig-awwe-example-output-1.pdf differ diff --git a/005_num_rsm_files/figure-pdf/cell-11-output-2.pdf b/005_num_rsm_files/figure-pdf/cell-11-output-2.pdf index f6788a85..5bfc2549 100644 Binary files a/005_num_rsm_files/figure-pdf/cell-11-output-2.pdf and b/005_num_rsm_files/figure-pdf/cell-11-output-2.pdf differ diff --git a/005_num_rsm_files/figure-pdf/cell-13-output-2.pdf b/005_num_rsm_files/figure-pdf/cell-13-output-2.pdf index 58a152ed..7b72d8d2 100644 Binary files a/005_num_rsm_files/figure-pdf/cell-13-output-2.pdf and b/005_num_rsm_files/figure-pdf/cell-13-output-2.pdf differ diff --git a/005_num_rsm_files/figure-pdf/cell-15-output-2.pdf b/005_num_rsm_files/figure-pdf/cell-15-output-2.pdf index 1e10f1d0..f47e61d2 100644 Binary files a/005_num_rsm_files/figure-pdf/cell-15-output-2.pdf and b/005_num_rsm_files/figure-pdf/cell-15-output-2.pdf differ diff --git a/005_num_rsm_files/figure-pdf/cell-2-output-1.pdf b/005_num_rsm_files/figure-pdf/cell-2-output-1.pdf index e4f16d7a..0c7a2407 100644 Binary files a/005_num_rsm_files/figure-pdf/cell-2-output-1.pdf and b/005_num_rsm_files/figure-pdf/cell-2-output-1.pdf differ diff --git a/005_num_rsm_files/figure-pdf/cell-3-output-2.pdf b/005_num_rsm_files/figure-pdf/cell-3-output-2.pdf index d6540268..0d2c8256 100644 Binary files a/005_num_rsm_files/figure-pdf/cell-3-output-2.pdf and b/005_num_rsm_files/figure-pdf/cell-3-output-2.pdf differ diff --git a/005_num_rsm_files/figure-pdf/cell-5-output-2.pdf b/005_num_rsm_files/figure-pdf/cell-5-output-2.pdf index bbd03cbb..7658cdb9 100644 Binary files a/005_num_rsm_files/figure-pdf/cell-5-output-2.pdf and b/005_num_rsm_files/figure-pdf/cell-5-output-2.pdf differ diff --git a/005_num_rsm_files/figure-pdf/cell-7-output-2.pdf b/005_num_rsm_files/figure-pdf/cell-7-output-2.pdf index 1584ec03..987f7504 100644 Binary files a/005_num_rsm_files/figure-pdf/cell-7-output-2.pdf and b/005_num_rsm_files/figure-pdf/cell-7-output-2.pdf differ diff --git a/005_num_rsm_files/figure-pdf/cell-9-output-2.pdf b/005_num_rsm_files/figure-pdf/cell-9-output-2.pdf index 88ba88dd..c679b9a5 100644 Binary files a/005_num_rsm_files/figure-pdf/cell-9-output-2.pdf and b/005_num_rsm_files/figure-pdf/cell-9-output-2.pdf differ diff --git a/005_num_rsm_files/figure-pdf/linreg-example-output-2.pdf b/005_num_rsm_files/figure-pdf/linreg-example-output-2.pdf index 550726b9..965ad1eb 100644 Binary files a/005_num_rsm_files/figure-pdf/linreg-example-output-2.pdf and b/005_num_rsm_files/figure-pdf/linreg-example-output-2.pdf differ diff --git a/006_num_gp_files/figure-pdf/cell-21-output-1.pdf b/006_num_gp_files/figure-pdf/cell-21-output-1.pdf index ea1eb1da..06c17f19 100644 Binary files a/006_num_gp_files/figure-pdf/cell-21-output-1.pdf and b/006_num_gp_files/figure-pdf/cell-21-output-1.pdf differ diff --git a/006_num_gp_files/figure-pdf/cell-22-output-2.pdf b/006_num_gp_files/figure-pdf/cell-22-output-2.pdf index 0b0923e8..70982730 100644 Binary files a/006_num_gp_files/figure-pdf/cell-22-output-2.pdf and b/006_num_gp_files/figure-pdf/cell-22-output-2.pdf differ diff --git a/006_num_gp_files/figure-pdf/cell-22-output-4.pdf b/006_num_gp_files/figure-pdf/cell-22-output-4.pdf index b6e58516..e4bad20d 100644 Binary files a/006_num_gp_files/figure-pdf/cell-22-output-4.pdf and b/006_num_gp_files/figure-pdf/cell-22-output-4.pdf differ diff --git a/006_num_gp_files/figure-pdf/fig-corr-matrix-build_psi-output-1.pdf b/006_num_gp_files/figure-pdf/fig-corr-matrix-build_psi-output-1.pdf index bb92761d..9af0842b 100644 Binary files a/006_num_gp_files/figure-pdf/fig-corr-matrix-build_psi-output-1.pdf and b/006_num_gp_files/figure-pdf/fig-corr-matrix-build_psi-output-1.pdf differ diff --git a/006_num_gp_files/figure-pdf/fig-pval12-output-1.pdf b/006_num_gp_files/figure-pdf/fig-pval12-output-1.pdf index 4c5151bb..2d9853e6 100644 Binary files a/006_num_gp_files/figure-pdf/fig-pval12-output-1.pdf and b/006_num_gp_files/figure-pdf/fig-pval12-output-1.pdf differ diff --git a/006_num_gp_files/figure-pdf/fig-sin-corr-output-1.pdf b/006_num_gp_files/figure-pdf/fig-sin-corr-output-1.pdf index 3319ec14..82ab1f23 100644 Binary files a/006_num_gp_files/figure-pdf/fig-sin-corr-output-1.pdf and b/006_num_gp_files/figure-pdf/fig-sin-corr-output-1.pdf differ diff --git a/006_num_gp_files/figure-pdf/fig-sin-corr-pred-output-1.pdf b/006_num_gp_files/figure-pdf/fig-sin-corr-pred-output-1.pdf index ff357f1b..23a2cd4a 100644 Binary files a/006_num_gp_files/figure-pdf/fig-sin-corr-pred-output-1.pdf and b/006_num_gp_files/figure-pdf/fig-sin-corr-pred-output-1.pdf differ diff --git a/006_num_gp_files/figure-pdf/fig-sin-data-output-1.pdf b/006_num_gp_files/figure-pdf/fig-sin-data-output-1.pdf index e612fcbe..baa466d3 100644 Binary files a/006_num_gp_files/figure-pdf/fig-sin-data-output-1.pdf and b/006_num_gp_files/figure-pdf/fig-sin-data-output-1.pdf differ diff --git a/006_num_gp_files/figure-pdf/fig-theta12-output-1.pdf b/006_num_gp_files/figure-pdf/fig-theta12-output-1.pdf index 56bcf7b5..5ba30b24 100644 Binary files a/006_num_gp_files/figure-pdf/fig-theta12-output-1.pdf and b/006_num_gp_files/figure-pdf/fig-theta12-output-1.pdf differ diff --git a/006_num_poly_files/figure-pdf/fig-aerofoil-drag-output-1.pdf b/006_num_poly_files/figure-pdf/fig-aerofoil-drag-output-1.pdf index 2a0c5763..d0887bfa 100644 Binary files a/006_num_poly_files/figure-pdf/fig-aerofoil-drag-output-1.pdf and b/006_num_poly_files/figure-pdf/fig-aerofoil-drag-output-1.pdf differ diff --git a/006_num_poly_files/figure-pdf/fig-onevar-output-1.pdf b/006_num_poly_files/figure-pdf/fig-onevar-output-1.pdf index ba3f296b..e13409eb 100644 Binary files a/006_num_poly_files/figure-pdf/fig-onevar-output-1.pdf and b/006_num_poly_files/figure-pdf/fig-onevar-output-1.pdf differ diff --git a/006_num_rbf_files/figure-pdf/cell-7-output-2.pdf b/006_num_rbf_files/figure-pdf/cell-7-output-2.pdf index 523be94f..5f1515f5 100644 Binary files a/006_num_rbf_files/figure-pdf/cell-7-output-2.pdf and b/006_num_rbf_files/figure-pdf/cell-7-output-2.pdf differ diff --git a/006_num_rbf_files/figure-pdf/fig-rbf-approximation-output-1.pdf b/006_num_rbf_files/figure-pdf/fig-rbf-approximation-output-1.pdf index 4c6abff1..6a9e0b16 100644 Binary files a/006_num_rbf_files/figure-pdf/fig-rbf-approximation-output-1.pdf and b/006_num_rbf_files/figure-pdf/fig-rbf-approximation-output-1.pdf differ diff --git a/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k1-output-1.pdf b/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k1-output-1.pdf index 5384978f..494e4b79 100644 Binary files a/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k1-output-1.pdf and b/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k1-output-1.pdf differ diff --git a/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k2-output-1.pdf b/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k2-output-1.pdf index 83350310..8a85ea33 100644 Binary files a/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k2-output-1.pdf and b/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k2-output-1.pdf differ diff --git a/007_challenge_files/figure-pdf/fig-comparison-output-1.pdf b/007_challenge_files/figure-pdf/fig-comparison-output-1.pdf index a67b79f4..b07c5f69 100644 Binary files a/007_challenge_files/figure-pdf/fig-comparison-output-1.pdf and b/007_challenge_files/figure-pdf/fig-comparison-output-1.pdf differ diff --git a/007_challenge_files/figure-pdf/fig-convergence-output-1.pdf b/007_challenge_files/figure-pdf/fig-convergence-output-1.pdf index 8fc5d988..a7b66b09 100644 Binary files a/007_challenge_files/figure-pdf/fig-convergence-output-1.pdf and b/007_challenge_files/figure-pdf/fig-convergence-output-1.pdf differ diff --git a/007_challenge_files/figure-pdf/fig-landscapes-output-1.pdf b/007_challenge_files/figure-pdf/fig-landscapes-output-1.pdf index d5c422c9..350335c5 100644 Binary files a/007_challenge_files/figure-pdf/fig-landscapes-output-1.pdf and b/007_challenge_files/figure-pdf/fig-landscapes-output-1.pdf differ diff --git a/007_challenge_files/figure-pdf/fig-robot-arm-hard-output-1.pdf b/007_challenge_files/figure-pdf/fig-robot-arm-hard-output-1.pdf index de17ab86..6494a463 100644 Binary files a/007_challenge_files/figure-pdf/fig-robot-arm-hard-output-1.pdf and b/007_challenge_files/figure-pdf/fig-robot-arm-hard-output-1.pdf differ diff --git a/019_spotoptim_sk_matern_files/figure-pdf/cell-13-output-1.pdf b/019_spotoptim_sk_matern_files/figure-pdf/cell-13-output-1.pdf index 35153529..82e70e14 100644 Binary files a/019_spotoptim_sk_matern_files/figure-pdf/cell-13-output-1.pdf and b/019_spotoptim_sk_matern_files/figure-pdf/cell-13-output-1.pdf differ diff --git a/019_spotoptim_sk_matern_files/figure-pdf/cell-7-output-1.pdf b/019_spotoptim_sk_matern_files/figure-pdf/cell-7-output-1.pdf index 8fc32c21..a18825b6 100644 Binary files a/019_spotoptim_sk_matern_files/figure-pdf/cell-7-output-1.pdf and b/019_spotoptim_sk_matern_files/figure-pdf/cell-7-output-1.pdf differ diff --git a/100_ddmo_eda_files/figure-pdf/cell-12-output-1.pdf b/100_ddmo_eda_files/figure-pdf/cell-12-output-1.pdf index d325dbce..3b577756 100644 Binary files a/100_ddmo_eda_files/figure-pdf/cell-12-output-1.pdf and b/100_ddmo_eda_files/figure-pdf/cell-12-output-1.pdf differ diff --git a/100_ddmo_eda_files/figure-pdf/cell-17-output-1.pdf b/100_ddmo_eda_files/figure-pdf/cell-17-output-1.pdf index dbacc9ed..26b390d4 100644 Binary files a/100_ddmo_eda_files/figure-pdf/cell-17-output-1.pdf and b/100_ddmo_eda_files/figure-pdf/cell-17-output-1.pdf differ diff --git a/100_ddmo_eda_files/figure-pdf/cell-18-output-1.pdf b/100_ddmo_eda_files/figure-pdf/cell-18-output-1.pdf index 48928d12..353f18e2 100644 Binary files a/100_ddmo_eda_files/figure-pdf/cell-18-output-1.pdf and b/100_ddmo_eda_files/figure-pdf/cell-18-output-1.pdf differ diff --git a/100_ddmo_eda_files/figure-pdf/fig-bi9000-output-1.pdf b/100_ddmo_eda_files/figure-pdf/fig-bi9000-output-1.pdf index 09fe9e89..cee7b15e 100644 Binary files a/100_ddmo_eda_files/figure-pdf/fig-bi9000-output-1.pdf and b/100_ddmo_eda_files/figure-pdf/fig-bi9000-output-1.pdf differ diff --git a/100_ddmo_eda_files/figure-pdf/fig-bi9040-output-1.pdf b/100_ddmo_eda_files/figure-pdf/fig-bi9040-output-1.pdf index 7eae28db..dda3bf2b 100644 Binary files a/100_ddmo_eda_files/figure-pdf/fig-bi9040-output-1.pdf and b/100_ddmo_eda_files/figure-pdf/fig-bi9040-output-1.pdf differ diff --git a/100_ddmo_eda_files/figure-pdf/fig-bi90403d-output-1.pdf b/100_ddmo_eda_files/figure-pdf/fig-bi90403d-output-1.pdf index 37cf5905..5076aec5 100644 Binary files a/100_ddmo_eda_files/figure-pdf/fig-bi90403d-output-1.pdf and b/100_ddmo_eda_files/figure-pdf/fig-bi90403d-output-1.pdf differ diff --git a/100_ddmo_eda_files/figure-pdf/fig-bi9449-output-1.pdf b/100_ddmo_eda_files/figure-pdf/fig-bi9449-output-1.pdf index 2b205c45..e4e030b3 100644 Binary files a/100_ddmo_eda_files/figure-pdf/fig-bi9449-output-1.pdf and b/100_ddmo_eda_files/figure-pdf/fig-bi9449-output-1.pdf differ diff --git a/100_ddmo_eda_files/figure-pdf/fig-histogram-output-2.pdf b/100_ddmo_eda_files/figure-pdf/fig-histogram-output-2.pdf index 9fef2a1d..a69853dd 100644 Binary files a/100_ddmo_eda_files/figure-pdf/fig-histogram-output-2.pdf and b/100_ddmo_eda_files/figure-pdf/fig-histogram-output-2.pdf differ diff --git a/100_ddmo_eda_files/figure-pdf/fig-normal-cdf-output-1.pdf b/100_ddmo_eda_files/figure-pdf/fig-normal-cdf-output-1.pdf index 20dd7dd4..77ecb15a 100644 Binary files a/100_ddmo_eda_files/figure-pdf/fig-normal-cdf-output-1.pdf and b/100_ddmo_eda_files/figure-pdf/fig-normal-cdf-output-1.pdf differ diff --git a/100_ddmo_eda_files/figure-pdf/fig-normal-density-output-1.pdf b/100_ddmo_eda_files/figure-pdf/fig-normal-density-output-1.pdf index e81b1e5c..3417d452 100644 Binary files a/100_ddmo_eda_files/figure-pdf/fig-normal-density-output-1.pdf and b/100_ddmo_eda_files/figure-pdf/fig-normal-density-output-1.pdf differ diff --git a/100_ddmo_eda_files/figure-pdf/fig-normal-distribution-output-1.pdf b/100_ddmo_eda_files/figure-pdf/fig-normal-distribution-output-1.pdf index 743fbfc4..22044eff 100644 Binary files a/100_ddmo_eda_files/figure-pdf/fig-normal-distribution-output-1.pdf and b/100_ddmo_eda_files/figure-pdf/fig-normal-distribution-output-1.pdf differ diff --git a/100_ddmo_pca_files/figure-pdf/cell-29-output-2.pdf b/100_ddmo_pca_files/figure-pdf/cell-29-output-2.pdf index dec2c6df..5877b124 100644 Binary files a/100_ddmo_pca_files/figure-pdf/cell-29-output-2.pdf and b/100_ddmo_pca_files/figure-pdf/cell-29-output-2.pdf differ diff --git a/100_ddmo_pca_files/figure-pdf/fig-pca_loading_scores-10-output-1.pdf b/100_ddmo_pca_files/figure-pdf/fig-pca_loading_scores-10-output-1.pdf index 7dd301b3..42ef44af 100644 Binary files a/100_ddmo_pca_files/figure-pdf/fig-pca_loading_scores-10-output-1.pdf and b/100_ddmo_pca_files/figure-pdf/fig-pca_loading_scores-10-output-1.pdf differ diff --git a/100_ddmo_pca_files/figure-pdf/fig-pcvals2-output-1.pdf b/100_ddmo_pca_files/figure-pdf/fig-pcvals2-output-1.pdf index a8fb659a..4b429677 100644 Binary files a/100_ddmo_pca_files/figure-pdf/fig-pcvals2-output-1.pdf and b/100_ddmo_pca_files/figure-pdf/fig-pcvals2-output-1.pdf differ diff --git a/100_ddmo_pca_files/figure-pdf/fig-scree_plot_fa-output-1.pdf b/100_ddmo_pca_files/figure-pdf/fig-scree_plot_fa-output-1.pdf index bc74b508..8c9083cb 100644 Binary files a/100_ddmo_pca_files/figure-pdf/fig-scree_plot_fa-output-1.pdf and b/100_ddmo_pca_files/figure-pdf/fig-scree_plot_fa-output-1.pdf differ diff --git a/100_ddmo_pca_files/figure-pdf/fig-scree_plot_pca-1-output-1.pdf b/100_ddmo_pca_files/figure-pdf/fig-scree_plot_pca-1-output-1.pdf index 5d9510e6..4381eacd 100644 Binary files a/100_ddmo_pca_files/figure-pdf/fig-scree_plot_pca-1-output-1.pdf and b/100_ddmo_pca_files/figure-pdf/fig-scree_plot_pca-1-output-1.pdf differ diff --git a/100_ddmo_regression_files/figure-pdf/fig-plot-coeff-vs-pvals-output-1.pdf b/100_ddmo_regression_files/figure-pdf/fig-plot-coeff-vs-pvals-output-1.pdf index e394e278..6c8286f5 100644 Binary files a/100_ddmo_regression_files/figure-pdf/fig-plot-coeff-vs-pvals-output-1.pdf and b/100_ddmo_regression_files/figure-pdf/fig-plot-coeff-vs-pvals-output-1.pdf differ diff --git a/Sequential-Parameter-Optimization-Cookbook.tex b/Sequential-Parameter-Optimization-Cookbook.tex index 38ed63fd..cd768c7b 100644 --- a/Sequential-Parameter-Optimization-Cookbook.tex +++ b/Sequential-Parameter-Optimization-Cookbook.tex @@ -237,14 +237,14 @@ \floatname{codelisting}{Listing} \newcommand*\listoflistings{\listof{codelisting}{List of Listings}} \usepackage{amsthm} -\theoremstyle{plain} -\newtheorem{lemma}{Lemma}[chapter] \theoremstyle{definition} \newtheorem{example}{Example}[chapter] \theoremstyle{definition} -\newtheorem{definition}{Definition}[chapter] -\theoremstyle{definition} \newtheorem{exercise}{Exercise}[chapter] +\theoremstyle{plain} +\newtheorem{lemma}{Lemma}[chapter] +\theoremstyle{definition} +\newtheorem{definition}{Definition}[chapter] \theoremstyle{remark} \AtBeginDocument{\renewcommand*{\proofname}{Proof}} \newtheorem*{remark}{Remark} @@ -712,24 +712,24 @@ \section{\texorpdfstring{1. Main Optimization Method: \begin{verbatim} TensorBoard logging disabled -Initial best: f(x) = 6.298504 -Iter 1 | Best: 1.224940 | Rate: 1.00 | Evals: 30.0% -Iter 2 | Best: 1.224940 | Curr: 15.498947 | Rate: 0.50 | Evals: 35.0% -Iter 3 | Best: 1.101052 | Rate: 0.67 | Evals: 40.0% -Iter 4 | Best: 0.665611 | Rate: 0.75 | Evals: 45.0% -Iter 5 | Best: 0.108463 | Rate: 0.80 | Evals: 50.0% -Iter 6 | Best: 0.031757 | Rate: 0.83 | Evals: 55.0% -Iter 7 | Best: 0.000269 | Rate: 0.86 | Evals: 60.0% -Iter 8 | Best: 0.000013 | Rate: 0.88 | Evals: 65.0% -Iter 9 | Best: 0.000001 | Rate: 0.89 | Evals: 70.0% -Iter 10 | Best: 0.000001 | Curr: 0.000001 | Rate: 0.80 | Evals: 75.0% -Iter 11 | Best: 0.000001 | Rate: 0.82 | Evals: 80.0% -Iter 12 | Best: 0.000000 | Rate: 0.83 | Evals: 85.0% -Iter 13 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.77 | Evals: 90.0% -Iter 14 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.71 | Evals: 95.0% -Iter 15 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.67 | Evals: 100.0% - -Best point found: [0.00019864 0.00062741] +Initial best: f(x) = 0.118645 +Iter 1 | Best: 0.118645 | Curr: 7.148656 | Rate: 0.00 | Evals: 30.0% +Iter 2 | Best: 0.118645 | Curr: 0.184535 | Rate: 0.00 | Evals: 35.0% +Iter 3 | Best: 0.083203 | Rate: 0.33 | Evals: 40.0% +Iter 4 | Best: 0.038982 | Rate: 0.50 | Evals: 45.0% +Iter 5 | Best: 0.000644 | Rate: 0.60 | Evals: 50.0% +Iter 6 | Best: 0.000124 | Rate: 0.67 | Evals: 55.0% +Iter 7 | Best: 0.000076 | Rate: 0.71 | Evals: 60.0% +Iter 8 | Best: 0.000007 | Rate: 0.75 | Evals: 65.0% +Iter 9 | Best: 0.000000 | Rate: 0.78 | Evals: 70.0% +Iter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.70 | Evals: 75.0% +Iter 11 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.64 | Evals: 80.0% +Iter 12 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.58 | Evals: 85.0% +Iter 13 | Best: 0.000000 | Rate: 0.62 | Evals: 90.0% +Iter 14 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.57 | Evals: 95.0% +Iter 15 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.53 | Evals: 100.0% + +Best point found: [-1.92657282e-05 -1.50558623e-04] Best value: 0.000000 Total evaluations: 20 Sequential iterations: 15 @@ -768,30 +768,30 @@ \subsection{Changing the Optimizer}\label{changing-the-optimizer} \begin{verbatim} TensorBoard logging disabled -Initial best: f(x) = 0.616062 -Iter 1 | Best: 0.616062 | Curr: 5.901289 | Rate: 0.00 | Evals: 30.0% -Iter 2 | Best: 0.616062 | Curr: 5.246832 | Rate: 0.00 | Evals: 35.0% -Iter 3 | Best: 0.616062 | Curr: 3.035174 | Rate: 0.00 | Evals: 40.0% -Iter 4 | Best: 0.616062 | Curr: 2.438754 | Rate: 0.00 | Evals: 45.0% -Iter 5 | Best: 0.616062 | Curr: 1.670579 | Rate: 0.00 | Evals: 50.0% -Iter 6 | Best: 0.616062 | Curr: 1.045781 | Rate: 0.00 | Evals: 55.0% -Iter 7 | Best: 0.107339 | Rate: 0.14 | Evals: 60.0% -Iter 8 | Best: 0.001791 | Rate: 0.25 | Evals: 65.0% -Iter 9 | Best: 0.001791 | Curr: 0.012779 | Rate: 0.22 | Evals: 70.0% -Iter 10 | Best: 0.001791 | Curr: 0.049731 | Rate: 0.20 | Evals: 75.0% -Iter 11 | Best: 0.001791 | Curr: 0.001893 | Rate: 0.18 | Evals: 80.0% -Iter 12 | Best: 0.001607 | Rate: 0.25 | Evals: 85.0% -Iter 13 | Best: 0.000250 | Rate: 0.31 | Evals: 90.0% -Iter 14 | Best: 0.000060 | Rate: 0.36 | Evals: 95.0% -Iter 15 | Best: 0.000060 | Curr: 0.000265 | Rate: 0.33 | Evals: 100.0% - -Best point found: [-0.00673006 -0.0038705 ] -Best value: 0.000060 +Initial best: f(x) = 12.545847 +Iter 1 | Best: 1.216945 | Rate: 1.00 | Evals: 30.0% +Iter 2 | Best: 1.216945 | Curr: 1.647816 | Rate: 0.50 | Evals: 35.0% +Iter 3 | Best: 1.216945 | Curr: 2.561889 | Rate: 0.33 | Evals: 40.0% +Iter 4 | Best: 1.216945 | Curr: 2.952185 | Rate: 0.25 | Evals: 45.0% +Iter 5 | Best: 0.909420 | Rate: 0.40 | Evals: 50.0% +Iter 6 | Best: 0.464324 | Rate: 0.50 | Evals: 55.0% +Iter 7 | Best: 0.232200 | Rate: 0.57 | Evals: 60.0% +Iter 8 | Best: 0.232200 | Curr: 0.315588 | Rate: 0.50 | Evals: 65.0% +Iter 9 | Best: 0.232200 | Curr: 0.306943 | Rate: 0.44 | Evals: 70.0% +Iter 10 | Best: 0.232200 | Curr: 0.266283 | Rate: 0.40 | Evals: 75.0% +Iter 11 | Best: 0.232200 | Curr: 0.254042 | Rate: 0.36 | Evals: 80.0% +Iter 12 | Best: 0.232200 | Curr: 0.245736 | Rate: 0.33 | Evals: 85.0% +Iter 13 | Best: 0.232200 | Curr: 0.239848 | Rate: 0.31 | Evals: 90.0% +Iter 14 | Best: 0.232200 | Curr: 0.245629 | Rate: 0.29 | Evals: 95.0% +Iter 15 | Best: 0.232200 | Curr: 0.243600 | Rate: 0.27 | Evals: 100.0% + +Best point found: [ 0.35250723 -0.32853986] +Best value: 0.232200 Total evaluations: 20 Sequential iterations: 15 Success: True Message: Optimization terminated: maximum evaluations (20) reached - Current function value: 0.000060 + Current function value: 0.232200 Iterations: 15 Function evaluations: 20 \end{verbatim} @@ -2098,9 +2098,9 @@ \section{Method 1: SpotOptim (Surrogate Model Based \end{Shaded} \begin{verbatim} -Removed old TensorBoard logs: runs/spotoptim_20260404_090741 +Removed old TensorBoard logs: runs/spotoptim_20260411_210457 Cleaned 1 old TensorBoard log directory -TensorBoard logging enabled: runs/spotoptim_20260404_090758 +TensorBoard logging enabled: runs/spotoptim_20260411_210514 \end{verbatim} \section{Design Table}\label{design-table} @@ -2156,7 +2156,7 @@ \section{Run optimization}\label{run-optimization} Iter 18 | Best: 119.503672 | Curr: 119.511109 | Rate: 0.61 | Evals: 93.3% Iter 19 | Best: 119.503672 | Rate: 0.63 | Evals: 96.7% Iter 20 | Best: 119.503672 | Curr: 119.522520 | Rate: 0.60 | Evals: 100.0% -TensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260404_090758 +TensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260411_210514 \end{verbatim} \begin{Shaded} @@ -2177,7 +2177,7 @@ \section{Run optimization}\label{run-optimization} SpotOptim Results: Best weight: 119.5037 lb Function evaluations: 30 - Time elapsed: 10.95 seconds + Time elapsed: 10.68 seconds Success: True \end{verbatim} @@ -2427,9 +2427,9 @@ \section{Comparison of Results}\label{comparison-of-results} ================================================================================ Method Best Weight (lb) Improvement (%) Function Evals Time (s) Success Baseline 233.908405 0.000000 1 0.000000 True - SpotOptim 119.503672 48.910057 30 10.948515 True -Nelder-Mead 220.544928 5.713124 30 0.000830 False - BFGS 119.503672 48.910057 60 0.001359 False + SpotOptim 119.503672 48.910057 30 10.675841 True +Nelder-Mead 220.544928 5.713124 30 0.000783 False + BFGS 119.503672 48.910057 60 0.001448 False ================================================================================ \end{verbatim} @@ -2661,9 +2661,9 @@ \section{Method Efficiency METHOD EFFICIENCY METRICS ================================================================================ Method Weight Reduction (lb) Evals to Best Time per Eval (ms) - SpotOptim 114.404734 29 364.950498 -Nelder-Mead 13.363478 30 0.027672 - BFGS 114.404734 60 0.022654 + SpotOptim 114.404734 29 355.861362 +Nelder-Mead 13.363478 30 0.026107 + BFGS 114.404734 60 0.024132 ================================================================================ \end{verbatim} @@ -3694,18 +3694,18 @@ \section{Second Example}\label{second-example} \begin{verbatim} TensorBoard logging disabled -Initial best: f(x) = 1.233040 -Iter 1 | Best: 0.720474 | Rate: 1.00 | Evals: 55.0% -Iter 2 | Best: 0.321391 | Rate: 1.00 | Evals: 60.0% -Iter 3 | Best: 0.060787 | Rate: 1.00 | Evals: 65.0% -Iter 4 | Best: 0.017868 | Rate: 1.00 | Evals: 70.0% -Iter 5 | Best: 0.004664 | Rate: 1.00 | Evals: 75.0% -Iter 6 | Best: 0.001700 | Rate: 1.00 | Evals: 80.0% -Iter 7 | Best: 0.001173 | Rate: 1.00 | Evals: 85.0% -Iter 8 | Best: 0.000017 | Rate: 1.00 | Evals: 90.0% -Iter 9 | Best: 0.000001 | Rate: 1.00 | Evals: 95.0% -Iter 10 | Best: 0.000001 | Curr: 0.000001 | Rate: 0.90 | Evals: 100.0% -Final success rate: 90.00% +Initial best: f(x) = 6.573621 +Iter 1 | Best: 6.573621 | Curr: 9.817400 | Rate: 0.00 | Evals: 55.0% +Iter 2 | Best: 5.184105 | Rate: 0.50 | Evals: 60.0% +Iter 3 | Best: 1.579794 | Rate: 0.67 | Evals: 65.0% +Iter 4 | Best: 0.076975 | Rate: 0.75 | Evals: 70.0% +Iter 5 | Best: 0.076975 | Curr: 0.313812 | Rate: 0.60 | Evals: 75.0% +Iter 6 | Best: 0.013770 | Rate: 0.67 | Evals: 80.0% +Iter 7 | Best: 0.005335 | Rate: 0.71 | Evals: 85.0% +Iter 8 | Best: 0.000539 | Rate: 0.75 | Evals: 90.0% +Iter 9 | Best: 0.000003 | Rate: 0.78 | Evals: 95.0% +Iter 10 | Best: 0.000002 | Rate: 0.80 | Evals: 100.0% +Final success rate: 80.00% Total evaluations: 20 \end{verbatim} @@ -3736,8 +3736,8 @@ \section{Accessing Success Rate}\label{accessing-success-rate} \end{Shaded} \begin{verbatim} -Success rate: 60.00% -Via getter method: 60.00% +Success rate: 90.00% +Via getter method: 90.00% \end{verbatim} \section{Interpreting Success Rate}\label{interpreting-success-rate} @@ -4325,12 +4325,12 @@ \subsection{Benchmark Script}\label{benchmark-script} Iters per run: 10 Starting Sequential Benchmark (n_jobs=1)... -Sequential Total Time: 8.91s +Sequential Total Time: 8.72s Starting Parallel Benchmark (n_jobs=4)... -Parallel Total Time: 5.74s +Parallel Total Time: 5.44s ------------------------------ -Speedup: 1.55x +Speedup: 1.60x \end{verbatim} \pandocbounded{\includegraphics[keepaspectratio]{spotoptim_parallel_files/figure-pdf/benchmark_script-output-2.pdf}} @@ -5335,8 +5335,8 @@ \subsection{Neural Network Activation Function \end{Shaded} \begin{verbatim} -Best activation function: ReLU -Best test MSE: 26475.2923 +Best activation function: Sigmoid +Best test MSE: 26512.0501 \end{verbatim} \section{Mixed Variable Types}\label{mixed-variable-types} @@ -5433,63 +5433,63 @@ \subsection{Combining Factor, Integer, and Continuous \end{Shaded} \begin{verbatim} -lr=0.002010, l1=85, layers=1, activation=ReLU -lr=0.007756, l1=78, layers=1, activation=LeakyReLU -lr=0.001115, l1=51, layers=3, activation=Sigmoid -lr=0.000461, l1=41, layers=4, activation=Tanh -lr=0.005460, l1=95, layers=0, activation=Tanh -lr=0.003141, l1=120, layers=2, activation=Sigmoid -lr=0.000189, l1=20, layers=2, activation=Tanh -lr=0.000384, l1=68, layers=2, activation=Sigmoid -lr=0.000988, l1=30, layers=1, activation=ReLU -lr=0.000151, l1=116, layers=3, activation=LeakyReLU -lr=0.002274, l1=51, layers=4, activation=Sigmoid -lr=0.000222, l1=32, layers=2, activation=LeakyReLU -lr=0.006930, l1=115, layers=2, activation=Tanh -lr=0.006619, l1=65, layers=4, activation=LeakyReLU -lr=0.004757, l1=60, layers=1, activation=LeakyReLU -lr=0.005282, l1=47, layers=4, activation=Sigmoid -lr=0.000125, l1=109, layers=3, activation=Tanh -lr=0.007019, l1=19, layers=1, activation=Sigmoid -lr=0.001478, l1=69, layers=1, activation=Sigmoid -lr=0.001111, l1=127, layers=2, activation=Tanh -lr=0.000298, l1=123, layers=2, activation=Tanh -lr=0.000207, l1=50, layers=2, activation=LeakyReLU -lr=0.000434, l1=128, layers=2, activation=Sigmoid -lr=0.004167, l1=76, layers=2, activation=Tanh -lr=0.002071, l1=121, layers=1, activation=Sigmoid -lr=0.000451, l1=26, layers=3, activation=Sigmoid -lr=0.000472, l1=84, layers=0, activation=LeakyReLU -lr=0.000199, l1=34, layers=0, activation=Sigmoid -lr=0.008507, l1=98, layers=0, activation=Tanh -lr=0.001866, l1=55, layers=3, activation=Sigmoid -lr=0.000405, l1=68, layers=4, activation=LeakyReLU -lr=0.000734, l1=56, layers=2, activation=Sigmoid -lr=0.009935, l1=39, layers=4, activation=Tanh -lr=0.002115, l1=29, layers=1, activation=ReLU -lr=0.006448, l1=18, layers=0, activation=Tanh -lr=0.007156, l1=84, layers=3, activation=Sigmoid -lr=0.000112, l1=82, layers=3, activation=Sigmoid -lr=0.000666, l1=69, layers=2, activation=Sigmoid -lr=0.006315, l1=120, layers=2, activation=Tanh -lr=0.006475, l1=95, layers=3, activation=Tanh -lr=0.001703, l1=23, layers=1, activation=Tanh -lr=0.000124, l1=96, layers=0, activation=Tanh -lr=0.000450, l1=57, layers=2, activation=Sigmoid -lr=0.000629, l1=116, layers=4, activation=LeakyReLU -lr=0.002053, l1=98, layers=1, activation=Sigmoid -lr=0.009235, l1=79, layers=2, activation=Tanh -lr=0.000127, l1=95, layers=3, activation=Tanh -lr=0.003554, l1=93, layers=3, activation=ReLU -lr=0.000159, l1=73, layers=2, activation=Tanh -lr=0.000458, l1=16, layers=1, activation=Sigmoid +lr=0.001244, l1=55, layers=4, activation=Sigmoid +lr=0.000319, l1=110, layers=2, activation=LeakyReLU +lr=0.004348, l1=47, layers=3, activation=ReLU +lr=0.001615, l1=62, layers=2, activation=Sigmoid +lr=0.007619, l1=118, layers=1, activation=Sigmoid +lr=0.000731, l1=91, layers=3, activation=LeakyReLU +lr=0.000104, l1=34, layers=0, activation=Tanh +lr=0.000183, l1=78, layers=1, activation=Tanh +lr=0.002909, l1=20, layers=2, activation=Tanh +lr=0.000411, l1=97, layers=2, activation=Sigmoid +lr=0.000820, l1=110, layers=1, activation=Tanh +lr=0.000104, l1=34, layers=0, activation=Tanh +lr=0.005292, l1=64, layers=1, activation=Sigmoid +lr=0.002111, l1=89, layers=3, activation=Tanh +lr=0.001275, l1=101, layers=3, activation=LeakyReLU +lr=0.004821, l1=44, layers=3, activation=Tanh +lr=0.000177, l1=76, layers=2, activation=Tanh +lr=0.001381, l1=115, layers=1, activation=Sigmoid +lr=0.000421, l1=48, layers=1, activation=Sigmoid +lr=0.008880, l1=93, layers=3, activation=Sigmoid +lr=0.003566, l1=114, layers=0, activation=Sigmoid +lr=0.008129, l1=89, layers=2, activation=Sigmoid +lr=0.001085, l1=103, layers=3, activation=Tanh +lr=0.001265, l1=120, layers=1, activation=Sigmoid +lr=0.003381, l1=99, layers=3, activation=Tanh +lr=0.006645, l1=27, layers=2, activation=Sigmoid +lr=0.001329, l1=87, layers=0, activation=Tanh +lr=0.004093, l1=50, layers=2, activation=Tanh +lr=0.001930, l1=91, layers=2, activation=Tanh +lr=0.000223, l1=94, layers=2, activation=ReLU +lr=0.001175, l1=61, layers=0, activation=LeakyReLU +lr=0.001837, l1=92, layers=2, activation=LeakyReLU +lr=0.002186, l1=63, layers=3, activation=Sigmoid +lr=0.000575, l1=106, layers=1, activation=Sigmoid +lr=0.004582, l1=126, layers=2, activation=LeakyReLU +lr=0.000811, l1=93, layers=2, activation=Sigmoid +lr=0.003384, l1=24, layers=1, activation=Tanh +lr=0.002273, l1=118, layers=4, activation=Tanh +lr=0.006885, l1=76, layers=1, activation=ReLU +lr=0.008604, l1=59, layers=1, activation=Sigmoid +lr=0.000315, l1=61, layers=0, activation=Sigmoid +lr=0.001200, l1=92, layers=4, activation=Tanh +lr=0.001664, l1=62, layers=4, activation=LeakyReLU +lr=0.004207, l1=39, layers=2, activation=Tanh +lr=0.000263, l1=108, layers=1, activation=Tanh +lr=0.004217, l1=39, layers=2, activation=Tanh +lr=0.000492, l1=126, layers=2, activation=Sigmoid +lr=0.000953, l1=62, layers=1, activation=Sigmoid +lr=0.001287, l1=104, layers=2, activation=LeakyReLU +lr=0.000185, l1=40, layers=0, activation=Sigmoid Optimization Results: -Best learning rate: 0.001478 -Best layer size: 69 -Best num layers: 1 +Best learning rate: 0.008880 +Best layer size: 93 +Best num layers: 3 Best activation: Sigmoid -Best test MSE: 26469.1875 +Best test MSE: 26343.3835 \end{verbatim} \section{Multiple Factor Variables}\label{multiple-factor-variables} @@ -5570,9 +5570,9 @@ \subsection{Optimizing Both Activation and \end{Shaded} \begin{verbatim} -Best activation: ReLU +Best activation: Tanh Best optimizer: SGD -Best learning rate: 0.004407 +Best learning rate: 0.009850 \end{verbatim} \section{Advanced Usage}\label{advanced-usage} @@ -7020,10 +7020,10 @@ \subsection{Results Table (After \begin{verbatim} | name | type | default | lower | upper | tuned | transform | |---------|--------|-----------|---------|---------|---------|-------------| -| lr | float | 0.5005 | 0.001 | 1 | 0.004 | log10 | -| alpha | float | 5.005 | 0.01 | 10 | 0.037 | log | -| neurons | float | 505 | 10 | 1000 | 21.8479 | sqrt | -| bias | float | 0 | -5 | 5 | 0.542 | - | +| lr | float | 0.5005 | 0.001 | 1 | 0.3481 | log10 | +| alpha | float | 5.005 | 0.01 | 10 | 1.2563 | log | +| neurons | float | 505 | 10 | 1000 | 17.4921 | sqrt | +| bias | float | 0 | -5 | 5 | 0.934 | - | \end{verbatim} Output shows the ``trans'' column with transformation types, helping you @@ -7851,9 +7851,9 @@ \subsection{Enable TensorBoard \end{Shaded} \begin{verbatim} -Removed old TensorBoard logs: runs/spotoptim_20260404_094059 +Removed old TensorBoard logs: runs/spotoptim_20260411_213816 Cleaned 1 old TensorBoard log directory -TensorBoard logging enabled: runs/spotoptim_20260404_094112 +TensorBoard logging enabled: runs/spotoptim_20260411_213829 Initial best: f(x) = 3.925721 Iter 1 | Best: 0.020506 | Rate: 1.00 | Evals: 55.0% Iter 2 | Best: 0.000751 | Rate: 1.00 | Evals: 60.0% @@ -7865,9 +7865,9 @@ \subsection{Enable TensorBoard Iter 8 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.75 | Evals: 90.0% Iter 9 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.67 | Evals: 95.0% Iter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.60 | Evals: 100.0% -TensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260404_094112 +TensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260411_213829 Best value: 0.000000 -Logs saved to: runs/runs/spotoptim_20260404_094112 +Logs saved to: runs/runs/spotoptim_20260411_213829 \end{verbatim} \subsection{View Logs in TensorBoard}\label{view-logs-in-tensorboard} @@ -8029,9 +8029,9 @@ \subsection{Examples}\label{examples-2} \end{Shaded} \begin{verbatim} -Removed old TensorBoard logs: runs/spotoptim_20260404_094112 +Removed old TensorBoard logs: runs/spotoptim_20260411_213829 Cleaned 1 old TensorBoard log directory -TensorBoard logging enabled: runs/spotoptim_20260404_094120 +TensorBoard logging enabled: runs/spotoptim_20260411_213837 Initial best: f(x) = 3.925721 Iter 1 | Best: 0.020506 | Rate: 1.00 | Evals: 55.0% Iter 2 | Best: 0.000751 | Rate: 1.00 | Evals: 60.0% @@ -8043,7 +8043,7 @@ \subsection{Examples}\label{examples-2} Iter 8 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.75 | Evals: 90.0% Iter 9 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.67 | Evals: 95.0% Iter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.60 | Evals: 100.0% -TensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260404_094120 +TensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260411_213837 Best value: 0.000000 \end{verbatim} @@ -8276,9 +8276,9 @@ \subsection{Basic Usage}\label{basic-usage-1} \end{Shaded} \begin{verbatim} -Removed old TensorBoard logs: runs/spotoptim_20260404_094129 +Removed old TensorBoard logs: runs/spotoptim_20260411_213846 Cleaned 1 old TensorBoard log directory -TensorBoard logging enabled: runs/spotoptim_20260404_094129 +TensorBoard logging enabled: runs/spotoptim_20260411_213846 Initial best: f(x) = 3.925721 Iter 1 | Best: 0.020506 | Rate: 1.00 | Evals: 55.0% Iter 2 | Best: 0.000751 | Rate: 1.00 | Evals: 60.0% @@ -8290,9 +8290,9 @@ \subsection{Basic Usage}\label{basic-usage-1} Iter 8 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.75 | Evals: 90.0% Iter 9 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.67 | Evals: 95.0% Iter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.60 | Evals: 100.0% -TensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260404_094129 +TensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260411_213846 Best value: 0.000000 -Logs saved to: runs/runs/spotoptim_20260404_094129 +Logs saved to: runs/runs/spotoptim_20260411_213846 \end{verbatim} \subsection{Use Cases}\label{use-cases-1} @@ -10397,7 +10397,7 @@ \section{1. Default Surrogate: Gaussian Process with Matern Results: Best weight: 119.8664 lb Function evaluations: 20 - Time: 1.66s + Time: 1.72s Success: True \end{verbatim} @@ -10510,7 +10510,7 @@ \section{2. Gaussian Process with RBF (Radial Basis Function) Results: Best weight: 120.1093 lb Function evaluations: 20 - Time: 0.47s + Time: 0.48s Success: True \end{verbatim} @@ -10621,7 +10621,7 @@ \section{3. Gaussian Process with Matern nu=1.5 Results: Best weight: 119.5465 lb Function evaluations: 20 - Time: 0.45s + Time: 0.46s Success: True \end{verbatim} @@ -10842,7 +10842,7 @@ \section{5. SpotOptim Kriging Model}\label{spotoptim-kriging-model} Results: Best weight: 121.2932 lb Function evaluations: 20 - Time: 8.13s + Time: 8.30s Success: True \end{verbatim} @@ -10950,7 +10950,7 @@ \section{6. Random Forest Regressor}\label{random-forest-regressor} Results: Best weight: 145.9792 lb Function evaluations: 20 - Time: 0.88s + Time: 0.86s Success: True Note: Using acquisition='y' (greedy) since RF doesn't provide uncertainty \end{verbatim} @@ -11360,15 +11360,15 @@ \section{Comprehensive Comparison}\label{comprehensive-comparison} SURROGATE MODEL COMPARISON ==================================================================================================== Surrogate Best Weight Evaluations Time (s) Success Gap to Best (%) - GP Matern nu=1.5 119.546454 20 0.453608 True 0.000000 -GP Matern nu=2.5 (Default) 119.866409 20 1.658157 True 0.267641 - GP Rational Quadratic 120.107282 20 0.615704 True 0.469130 - GP RBF 120.109251 20 0.470190 True 0.470777 - SpotOptim Kriging 121.293180 20 8.125980 True 1.461127 - Gradient Boosting 134.515037 20 0.182062 True 12.521144 - Random Forest 145.979231 20 0.875429 True 22.110883 - SVR (RBF) 156.707097 20 0.071593 True 31.084689 - XGBoost 165.306404 20 0.089988 True 38.277965 + GP Matern nu=1.5 119.546454 20 0.459755 True 0.000000 +GP Matern nu=2.5 (Default) 119.866409 20 1.719273 True 0.267641 + GP Rational Quadratic 120.107282 20 0.622137 True 0.469130 + GP RBF 120.109251 20 0.476415 True 0.470777 + SpotOptim Kriging 121.293180 20 8.299220 True 1.461127 + Gradient Boosting 134.515037 20 0.184378 True 12.521144 + Random Forest 145.979231 20 0.863158 True 22.110883 + SVR (RBF) 156.707097 20 0.071520 True 31.084689 + XGBoost 165.306404 20 0.091116 True 38.277965 ==================================================================================================== \end{verbatim} @@ -11528,7 +11528,7 @@ \section{Key Insights and 1. BEST OVERALL PERFORMANCE: Surrogate: GP Matern nu=1.5 Best Weight: 119.5465 lb - Computation Time: 0.45s + Computation Time: 0.46s 2. FASTEST OPTIMIZATION: Surrogate: SVR (RBF) @@ -11537,7 +11537,7 @@ \section{Key Insights and 3. MOST EFFICIENT (weight reduction per second): Surrogate: SVR (RBF) - Efficiency: 1078.3317 lb/s + Efficiency: 1079.4425 lb/s 4. RECOMMENDATIONS BY PROBLEM TYPE: - Smooth, continuous functions: Gaussian Process with RBF or Matern nu=2.5 @@ -11550,10 +11550,10 @@ \section{Key Insights and 5. KERNEL COMPARISON (Gaussian Process): Surrogate Best Weight Time (s) - GP Matern nu=1.5 119.546454 0.453608 -GP Matern nu=2.5 (Default) 119.866409 1.658157 - GP Rational Quadratic 120.107282 0.615704 - GP RBF 120.109251 0.470190 + GP Matern nu=1.5 119.546454 0.459755 +GP Matern nu=2.5 (Default) 119.866409 1.719273 + GP Rational Quadratic 120.107282 0.622137 + GP RBF 120.109251 0.476415 ==================================================================================================== \end{verbatim} @@ -11605,8 +11605,8 @@ \section{Summary Statistics}\label{summary-statistics} Average Weight 133.7145 lb Std Dev Weight 18.0098 lb Fastest Time 0.07 s - Slowest Time 8.13 s - Average Time 1.39 s + Slowest Time 8.30 s + Average Time 1.42 s ==================================================================================================== \end{verbatim} @@ -11738,47 +11738,47 @@ \subsection{Random Space-Filling Design \begin{verbatim} TensorBoard logging disabled -Initial best: f(x) = 2.669264 -Iter 1 | Best: 2.669264 | Curr: 2.840167 | Rate: 0.00 | Evals: 22.0% -Iter 2 | Best: 0.017728 | Rate: 0.50 | Evals: 24.0% -Iter 3 | Best: 0.000008 | Rate: 0.67 | Evals: 26.0% -Iter 4 | Best: 0.000003 | Rate: 0.75 | Evals: 28.0% -Iter 5 | Best: 0.000003 | Rate: 0.80 | Evals: 30.0% -Iter 6 | Best: 0.000002 | Rate: 0.83 | Evals: 32.0% -Iter 7 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.71 | Evals: 34.0% -Iter 8 | Best: 0.000002 | Rate: 0.75 | Evals: 36.0% -Iter 9 | Best: 0.000002 | Curr: 0.000003 | Rate: 0.67 | Evals: 38.0% -Iter 10 | Best: 0.000002 | Curr: 0.000003 | Rate: 0.60 | Evals: 40.0% -Iter 11 | Best: 0.000002 | Curr: 0.000003 | Rate: 0.55 | Evals: 42.0% -Iter 12 | Best: 0.000002 | Rate: 0.58 | Evals: 44.0% -Iter 13 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.54 | Evals: 46.0% -Iter 14 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.50 | Evals: 48.0% -Iter 15 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.47 | Evals: 50.0% -Iter 16 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.44 | Evals: 52.0% -Iter 17 | Best: 0.000001 | Rate: 0.47 | Evals: 54.0% -Iter 18 | Best: 0.000001 | Curr: 0.000001 | Rate: 0.44 | Evals: 56.0% -Iter 19 | Best: 0.000001 | Curr: 0.000001 | Rate: 0.42 | Evals: 58.0% -Iter 20 | Best: 0.000001 | Curr: 0.000001 | Rate: 0.40 | Evals: 60.0% -Iter 21 | Best: 0.000001 | Curr: 0.000001 | Rate: 0.38 | Evals: 62.0% -Iter 22 | Best: 0.000001 | Curr: 0.000006 | Rate: 0.36 | Evals: 64.0% -Iter 23 | Best: 0.000001 | Curr: 0.000002 | Rate: 0.35 | Evals: 66.0% -Iter 24 | Best: 0.000001 | Rate: 0.38 | Evals: 68.0% -Iter 25 | Best: 0.000001 | Curr: 0.000001 | Rate: 0.36 | Evals: 70.0% -Iter 26 | Best: 0.000001 | Curr: 0.000001 | Rate: 0.35 | Evals: 72.0% -Iter 27 | Best: 0.000001 | Curr: 0.000053 | Rate: 0.33 | Evals: 74.0% -Iter 28 | Best: 0.000001 | Curr: 0.000002 | Rate: 0.32 | Evals: 76.0% -Iter 29 | Best: 0.000001 | Rate: 0.34 | Evals: 78.0% -Iter 30 | Best: 0.000001 | Curr: 0.000001 | Rate: 0.33 | Evals: 80.0% -Iter 31 | Best: 0.000001 | Curr: 0.000002 | Rate: 0.32 | Evals: 82.0% -Iter 32 | Best: 0.000001 | Curr: 0.000002 | Rate: 0.31 | Evals: 84.0% -Iter 33 | Best: 0.000001 | Curr: 0.000992 | Rate: 0.30 | Evals: 86.0% -Iter 34 | Best: 0.000000 | Rate: 0.32 | Evals: 88.0% -Iter 35 | Best: 0.000000 | Rate: 0.34 | Evals: 90.0% -Iter 36 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.33 | Evals: 92.0% -Iter 37 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.32 | Evals: 94.0% -Iter 38 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.32 | Evals: 96.0% -Iter 39 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.31 | Evals: 98.0% -Iter 40 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.30 | Evals: 100.0% +Initial best: f(x) = 0.410244 +Iter 1 | Best: 0.102407 | Rate: 1.00 | Evals: 22.0% +Iter 2 | Best: 0.005497 | Rate: 1.00 | Evals: 24.0% +Iter 3 | Best: 0.000135 | Rate: 1.00 | Evals: 26.0% +Iter 4 | Best: 0.000024 | Rate: 1.00 | Evals: 28.0% +Iter 5 | Best: 0.000003 | Rate: 1.00 | Evals: 30.0% +Iter 6 | Best: 0.000001 | Rate: 1.00 | Evals: 32.0% +Iter 7 | Best: 0.000001 | Rate: 1.00 | Evals: 34.0% +Iter 8 | Best: 0.000000 | Rate: 1.00 | Evals: 36.0% +Iter 9 | Best: 0.000000 | Rate: 1.00 | Evals: 38.0% +Iter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.90 | Evals: 40.0% +Iter 11 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.82 | Evals: 42.0% +Iter 12 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.75 | Evals: 44.0% +Iter 13 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.69 | Evals: 46.0% +Iter 14 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.64 | Evals: 48.0% +Iter 15 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.60 | Evals: 50.0% +Iter 16 | Best: 0.000000 | Rate: 0.62 | Evals: 52.0% +Iter 17 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.59 | Evals: 54.0% +Iter 18 | Best: 0.000000 | Rate: 0.61 | Evals: 56.0% +Iter 19 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.58 | Evals: 58.0% +Iter 20 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.55 | Evals: 60.0% +Iter 21 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.52 | Evals: 62.0% +Iter 22 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.50 | Evals: 64.0% +Iter 23 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.48 | Evals: 66.0% +Iter 24 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.46 | Evals: 68.0% +Iter 25 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.44 | Evals: 70.0% +Iter 26 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.42 | Evals: 72.0% +Iter 27 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.41 | Evals: 74.0% +Iter 28 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.39 | Evals: 76.0% +Iter 29 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.38 | Evals: 78.0% +Iter 30 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.37 | Evals: 80.0% +Iter 31 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.35 | Evals: 82.0% +Iter 32 | Best: 0.000000 | Rate: 0.38 | Evals: 84.0% +Iter 33 | Best: 0.000000 | Rate: 0.39 | Evals: 86.0% +Iter 34 | Best: 0.000000 | Curr: 0.000002 | Rate: 0.38 | Evals: 88.0% +Iter 35 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.37 | Evals: 90.0% +Iter 36 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.36 | Evals: 92.0% +Iter 37 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.35 | Evals: 94.0% +Iter 38 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.34 | Evals: 96.0% +Iter 39 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.33 | Evals: 98.0% +Iter 40 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.33 | Evals: 100.0% \end{verbatim} \section{How It Works}\label{how-it-works-2} @@ -12282,7 +12282,7 @@ \subsection{1. Default Configuration (Differential \end{Shaded} \begin{verbatim} -Best y: 0.021827024089791806 +Best y: 0.025998550917603842 \end{verbatim} \subsection{2. Customizing Differential @@ -12322,7 +12322,7 @@ \subsection{2. Customizing Differential \end{Shaded} \begin{verbatim} -Best y with Custom DE: 0.05934755416951033 +Best y with Custom DE: 0.013052397638474466 \end{verbatim} \subsection{3. Using Gradient-Based Optimization @@ -12366,7 +12366,7 @@ \subsection{3. Using Gradient-Based Optimization \end{Shaded} \begin{verbatim} -Best y with L-BFGS-B: 0.2080879920357322 +Best y with L-BFGS-B: 0.39904568864966816 \end{verbatim} \subsection{4. Using Gradient-Free Optimization @@ -12409,7 +12409,7 @@ \subsection{4. Using Gradient-Free Optimization \end{Shaded} \begin{verbatim} -Best y with Nelder-Mead: 0.13896260953804296 +Best y with Nelder-Mead: 0.19619489412886978 \end{verbatim} \subsection{5. Returning Multiple @@ -12442,20 +12442,20 @@ \subsection{5. Returning Multiple \begin{verbatim} message: Optimization terminated: maximum evaluations (5) reached - Current function value: 0.133533 + Current function value: 15.988585 Iterations: 3 Function evaluations: 5 success: True - fun: 0.1335332452820042 - x: [-3.284e-01 1.603e-01] - X: [[ 7.256e-01 5.958e-01] - [-3.174e+00 -3.234e+00] - [ 8.490e-01 8.915e-03] - [ 3.515e+00 3.442e-01] - [-3.284e-01 1.603e-01]] + fun: 15.988584777706675 + x: [ 2.298e+00 -3.272e+00] + X: [[-4.888e+00 2.805e+00] + [ 2.763e+00 -4.092e+00] + [ 2.712e+00 -4.135e+00] + [ 5.000e+00 -2.528e+00] + [ 2.298e+00 -3.272e+00]] nit: 3 nfev: 5 - y: [ 8.814e-01 2.054e+01 7.208e-01 1.248e+01 1.335e-01] + y: [ 3.176e+01 2.438e+01 2.445e+01 3.139e+01 1.599e+01] \end{verbatim} \part{Kriging} @@ -17317,7 +17317,7 @@ \subsection{Example: Tuning with \begin{verbatim} Best Parameters: -{'l1': 64, 'num_hidden_layers': 1, 'activation': 'Sigmoid', 'lr': 69.0246143221913, 'optimizer': 'SGD', 'epochs': 10} +{'l1': 128, 'num_hidden_layers': 2, 'activation': 'ELU', 'lr': 1.0330297381458524, 'optimizer': 'Adam', 'epochs': 13} \end{verbatim} This setup automatically tunes the architecture (\texttt{l1}, @@ -17373,7 +17373,7 @@ \chapter{Setup}\label{setup-1} \end{Shaded} \begin{verbatim} - + \end{verbatim} \chapter{The Neural Network}\label{the-neural-network} @@ -18574,9 +18574,9 @@ \section{Run the Optimization}\label{run-the-optimization} Factor variable at dimension 3: Levels: ['Adam', 'SGD', 'RMSprop', 'AdamW'] Mapped to integers: 0 to 3 -Removed old TensorBoard logs: runs/spotoptim_20260404_101907 +Removed old TensorBoard logs: runs/spotoptim_20260411_221608 Cleaned 1 old TensorBoard log directory -TensorBoard logging enabled: runs/spotoptim_20260404_102031 +TensorBoard logging enabled: runs/spotoptim_20260411_221729 \end{verbatim} Display search space configuration. The \texttt{trans}column shows @@ -18703,7 +18703,7 @@ \section{Run the Optimization}\label{run-the-optimization} optimizer=Adam, lr_unified=0.2781, alpha=0.0168 Validation MSE: 0.202393 Iter 6 | Best: 0.169332 | Curr: 0.202393 | Rate: 0.17 | Evals: 100.0% -TensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260404_102031 +TensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260411_221729 \end{verbatim} \chapter{Results Analysis}\label{results-analysis} @@ -20894,12 +20894,12 @@ \subsection{Training a Model}\label{training-a-model} \end{Shaded} \begin{verbatim} -Epoch 20/100: Loss = 32678.4499 -Epoch 40/100: Loss = 28932.9481 -Epoch 60/100: Loss = 31657.7358 -Epoch 80/100: Loss = 34718.9435 -Epoch 100/100: Loss = 32451.5555 -Test MSE: 26449.6042 +Epoch 20/100: Loss = 29950.8608 +Epoch 40/100: Loss = 32215.8185 +Epoch 60/100: Loss = 33187.7674 +Epoch 80/100: Loss = 33194.3327 +Epoch 100/100: Loss = 28769.2459 +Test MSE: 26482.7585 \end{verbatim} \section{Function Reference}\label{function-reference-1} @@ -20927,8 +20927,8 @@ \subsection{get\_diabetes\_dataloaders()}\label{get_diabetes_dataloaders} \end{Shaded} \begin{verbatim} -(, - , +(, + , StandardScaler()) \end{verbatim} @@ -21023,7 +21023,7 @@ \section{DiabetesDataset Class}\label{diabetesdataset-class} \end{Shaded} \begin{verbatim} - + \end{verbatim} \textbf{Parameters:} @@ -21210,7 +21210,7 @@ \subsection{Without Feature Scaling}\label{without-feature-scaling} \begin{verbatim} Scaler: None -Mean: tensor([-0.0040, 0.0060, -0.0057]) +Mean: tensor([ 0.0077, -0.0089, 0.0099]) \end{verbatim} \subsection{Larger Batch Sizes}\label{larger-batch-sizes} @@ -21377,19 +21377,19 @@ \section{Complete Training Example}\label{complete-training-example} Test samples: 89 Batches per epoch: 12 ------------------------------------------------------------ -Epoch 20/200: Train Loss = 28960.1559, Test Loss = 26626.3255 -Epoch 40/200: Train Loss = 30007.8149, Test Loss = 26621.7988 -Epoch 60/200: Train Loss = 29115.1273, Test Loss = 26617.2148 -Epoch 80/200: Train Loss = 27702.4583, Test Loss = 26612.6478 -Epoch 100/200: Train Loss = 33083.9486, Test Loss = 26608.0026 -Epoch 120/200: Train Loss = 28845.3983, Test Loss = 26603.2083 -Epoch 140/200: Train Loss = 31009.7324, Test Loss = 26598.1393 -Epoch 160/200: Train Loss = 27617.0401, Test Loss = 26593.0189 -Epoch 180/200: Train Loss = 28030.4511, Test Loss = 26587.7044 -Epoch 200/200: Train Loss = 30788.4162, Test Loss = 26582.0996 +Epoch 20/200: Train Loss = 31806.1104, Test Loss = 26625.3529 +Epoch 40/200: Train Loss = 27860.2657, Test Loss = 26620.5586 +Epoch 60/200: Train Loss = 28580.3246, Test Loss = 26615.6191 +Epoch 80/200: Train Loss = 28165.9618, Test Loss = 26610.5488 +Epoch 100/200: Train Loss = 28048.5522, Test Loss = 26605.3333 +Epoch 120/200: Train Loss = 27761.9672, Test Loss = 26600.0527 +Epoch 140/200: Train Loss = 28083.7457, Test Loss = 26594.6120 +Epoch 160/200: Train Loss = 27702.5830, Test Loss = 26589.1087 +Epoch 180/200: Train Loss = 28449.0728, Test Loss = 26583.5807 +Epoch 200/200: Train Loss = 30423.7790, Test Loss = 26577.8385 ------------------------------------------------------------ Training complete! -Best test loss: 26582.0996 +Best test loss: 26577.8385 \end{verbatim} \section{Integration with SpotOptim}\label{integration-with-spotoptim-1} @@ -21554,7 +21554,7 @@ \subsection{2. Set Random Seeds for \end{Shaded} \begin{verbatim} - + \end{verbatim} \subsection{3. Don't Shuffle Test Data}\label{dont-shuffle-test-data} @@ -28497,7 +28497,7 @@ \subsubsection{Estimating the Distribution of Elementary \begin{verbatim} Random orientation of the sampling matrix: [[0. 0.5 0.5] - [0. 0. 0.5] + [0.5 0.5 0.5] [0.5 0. 0.5] [0.5 0. 0. ]] \end{verbatim} @@ -29133,11 +29133,11 @@ \subsection{Latin Squares and Random Latin \end{Shaded} \begin{verbatim} -[[0.9 0.1] - [0.5 0.3] - [0.1 0.9] - [0.3 0.5] - [0.7 0.7]] +[[0.1 0.7] + [0.7 0.1] + [0.9 0.5] + [0.3 0.3] + [0.5 0.9]] \end{verbatim} Figure~\ref{fig-rlh-edges0} shows the points in the unit hypercube for @@ -29172,11 +29172,11 @@ \subsection{Latin Squares and Random Latin \end{Shaded} \begin{verbatim} -[[0.25 0.5 ] - [0. 0.25] - [0.5 0.75] - [1. 0. ] - [0.75 1. ]] +[[0.5 1. ] + [0. 0.5 ] + [0.75 0. ] + [0.25 0.25] + [1. 0.75]] \end{verbatim} Figure~\ref{fig-rlh-edges1} shows the points in the unit hypercube for @@ -29922,9 +29922,9 @@ \subsubsection{\texorpdfstring{The Function [3 1] [4 2]] Perturbed Sampling Plan: -[[3 3] +[[1 1] [2 4] - [1 1] + [3 3] [4 2]] \end{verbatim} @@ -37955,17 +37955,17 @@ \subsection{Histograms}\label{histograms} \end{Shaded} \begin{verbatim} -Bin 1 Probability: 0.0050 -Bin 2 Probability: 0.0130 -Bin 3 Probability: 0.0490 -Bin 4 Probability: 0.1070 -Bin 5 Probability: 0.1770 -Bin 6 Probability: 0.2470 -Bin 7 Probability: 0.2170 -Bin 8 Probability: 0.1140 -Bin 9 Probability: 0.0540 -Bin 10 Probability: 0.0140 -Bin 11 Probability: 0.0030 +Bin 1 Probability: 0.0030 +Bin 2 Probability: 0.0100 +Bin 3 Probability: 0.0340 +Bin 4 Probability: 0.0960 +Bin 5 Probability: 0.1660 +Bin 6 Probability: 0.2280 +Bin 7 Probability: 0.2360 +Bin 8 Probability: 0.1390 +Bin 9 Probability: 0.0630 +Bin 10 Probability: 0.0180 +Bin 11 Probability: 0.0070 Sum of probabilities: 1.0 \end{verbatim} @@ -38670,8 +38670,8 @@ \subsection{The Normal Distribution}\label{the-normal-distribution} \protect\phantomsection\label{gen-normal-10} \begin{verbatim} -array([1.96983288, 1.75377096, 1.98704414, 1.92713823, 1.91571811, - 2.02321265, 2.01838247, 1.89942614, 1.96403588, 1.99793713]) +array([2.00056658, 1.87424157, 2.06973261, 1.89451209, 1.97990826, + 2.08745157, 2.16559174, 1.80798445, 1.97297247, 2.03540185]) \end{verbatim} Verify the mean: @@ -38683,7 +38683,7 @@ \subsection{The Normal Distribution}\label{the-normal-distribution} \end{Shaded} \begin{verbatim} -np.float64(0.05435014117675396) +np.float64(0.01116368071888596) \end{verbatim} Note: To verify the standard deviation, we use \texttt{ddof\ =\ 1} @@ -38696,7 +38696,7 @@ \subsection{The Normal Distribution}\label{the-normal-distribution} \end{Shaded} \begin{verbatim} -np.float64(0.020509217569515847) +np.float64(0.007953380313152209) \end{verbatim} \begin{Shaded} @@ -41477,8 +41477,8 @@ \subsubsection{Model Summary (ANOVA Dep. Variable: ln_sales R-squared: 0.485 Model: OLS Adj. R-squared: 0.449 Method: Least Squares F-statistic: 13.73 -Date: Sat, 04 Apr 2026 Prob (F-statistic): 7.69e-17 -Time: 10:42:27 Log-Likelihood: -213.62 +Date: Sat, 11 Apr 2026 Prob (F-statistic): 7.69e-17 +Time: 22:39:06 Log-Likelihood: -213.62 No. Observations: 157 AIC: 449.2 Df Residuals: 146 BIC: 482.9 Df Model: 10 @@ -41991,8 +41991,8 @@ \subsection{Creating the Regression Model with Principal Dep. Variable: ln_sales R-squared: 0.485 Model: OLS Adj. R-squared: 0.449 Method: Least Squares F-statistic: 13.73 -Date: Sat, 04 Apr 2026 Prob (F-statistic): 7.69e-17 -Time: 10:42:27 Log-Likelihood: -213.62 +Date: Sat, 11 Apr 2026 Prob (F-statistic): 7.69e-17 +Time: 22:39:07 Log-Likelihood: -213.62 No. Observations: 157 AIC: 449.2 Df Residuals: 146 BIC: 482.9 Df Model: 10 @@ -42126,8 +42126,8 @@ \subsection{PCA: Creating the Regression Model with three Principle Dep. Variable: ln_sales R-squared: 0.389 Model: OLS Adj. R-squared: 0.377 Method: Least Squares F-statistic: 32.48 -Date: Sat, 04 Apr 2026 Prob (F-statistic): 2.66e-16 -Time: 10:42:27 Log-Likelihood: -226.97 +Date: Sat, 11 Apr 2026 Prob (F-statistic): 2.66e-16 +Time: 22:39:07 Log-Likelihood: -226.97 No. Observations: 157 AIC: 461.9 Df Residuals: 153 BIC: 474.2 Df Model: 3 @@ -42421,8 +42421,8 @@ \subsection{Creating the Regression Model with Extracted Factors (from Dep. Variable: ln_sales R-squared: 0.485 Model: OLS Adj. R-squared: 0.449 Method: Least Squares F-statistic: 13.73 -Date: Sat, 04 Apr 2026 Prob (F-statistic): 7.69e-17 -Time: 10:42:27 Log-Likelihood: -213.62 +Date: Sat, 11 Apr 2026 Prob (F-statistic): 7.69e-17 +Time: 22:39:07 Log-Likelihood: -213.62 No. Observations: 157 AIC: 449.2 Df Residuals: 146 BIC: 482.9 Df Model: 10 @@ -42536,8 +42536,8 @@ \subsubsection{Setting Up the Regression Model with Reduced Dep. Variable: ln_sales R-squared: 0.350 Model: OLS Adj. R-squared: 0.337 Method: Least Squares F-statistic: 27.43 -Date: Sat, 04 Apr 2026 Prob (F-statistic): 2.99e-14 -Time: 10:42:27 Log-Likelihood: -231.87 +Date: Sat, 11 Apr 2026 Prob (F-statistic): 2.99e-14 +Time: 22:39:07 Log-Likelihood: -231.87 No. Observations: 157 AIC: 471.7 Df Residuals: 153 BIC: 484.0 Df Model: 3 @@ -43598,7 +43598,7 @@ \section{Assessing Confounding Effects in Multiple \begin{verbatim} The basic model is: y ~ x1 -The following features will be used for fitting the basic model: Index(['y', 'x3', 'x2', 'x1'], dtype='str') +The following features will be used for fitting the basic model: Index(['y', 'x2', 'x3', 'x1'], dtype='str') p-values: 0.34343741859526244 estimate: 1.025306391110114 conf_int: 0 -1.111963 diff --git a/a_04_gp_background_files/figure-pdf/cell-7-output-1.pdf b/a_04_gp_background_files/figure-pdf/cell-7-output-1.pdf index ba9ef84f..1aacd7a5 100644 Binary files a/a_04_gp_background_files/figure-pdf/cell-7-output-1.pdf and b/a_04_gp_background_files/figure-pdf/cell-7-output-1.pdf differ diff --git a/a_04_gp_background_files/figure-pdf/fig-exp2euclid-output-1.pdf b/a_04_gp_background_files/figure-pdf/fig-exp2euclid-output-1.pdf index 3537514c..b76a5044 100644 Binary files a/a_04_gp_background_files/figure-pdf/fig-exp2euclid-output-1.pdf and b/a_04_gp_background_files/figure-pdf/fig-exp2euclid-output-1.pdf differ diff --git a/a_04_gp_background_files/figure-pdf/fig-mvn1-1-output-1.pdf b/a_04_gp_background_files/figure-pdf/fig-mvn1-1-output-1.pdf index c6721a65..2890e523 100644 Binary files a/a_04_gp_background_files/figure-pdf/fig-mvn1-1-output-1.pdf and b/a_04_gp_background_files/figure-pdf/fig-mvn1-1-output-1.pdf differ diff --git a/a_04_gp_background_files/figure-pdf/fig-mvn1-3-output-1.pdf b/a_04_gp_background_files/figure-pdf/fig-mvn1-3-output-1.pdf index a755a782..8c1e550f 100644 Binary files a/a_04_gp_background_files/figure-pdf/fig-mvn1-3-output-1.pdf and b/a_04_gp_background_files/figure-pdf/fig-mvn1-3-output-1.pdf differ diff --git a/a_04_gp_background_files/figure-pdf/fig-mvn2-output-1.pdf b/a_04_gp_background_files/figure-pdf/fig-mvn2-output-1.pdf index ed84a532..74730cd4 100644 Binary files a/a_04_gp_background_files/figure-pdf/fig-mvn2-output-1.pdf and b/a_04_gp_background_files/figure-pdf/fig-mvn2-output-1.pdf differ diff --git a/a_04_gp_background_files/figure-pdf/fig-mvn5-output-1.pdf b/a_04_gp_background_files/figure-pdf/fig-mvn5-output-1.pdf index 44e92a0b..60674977 100644 Binary files a/a_04_gp_background_files/figure-pdf/fig-mvn5-output-1.pdf and b/a_04_gp_background_files/figure-pdf/fig-mvn5-output-1.pdf differ diff --git a/awwe_optimization_files/figure-pdf/cell-12-output-1.pdf b/awwe_optimization_files/figure-pdf/cell-12-output-1.pdf index e1323d81..d9b4cdc7 100644 Binary files a/awwe_optimization_files/figure-pdf/cell-12-output-1.pdf and b/awwe_optimization_files/figure-pdf/cell-12-output-1.pdf differ diff --git a/awwe_optimization_files/figure-pdf/cell-13-output-2.pdf b/awwe_optimization_files/figure-pdf/cell-13-output-2.pdf index 489d808c..afd91f3e 100644 Binary files a/awwe_optimization_files/figure-pdf/cell-13-output-2.pdf and b/awwe_optimization_files/figure-pdf/cell-13-output-2.pdf differ diff --git a/awwe_optimization_files/figure-pdf/cell-13-output-4.pdf b/awwe_optimization_files/figure-pdf/cell-13-output-4.pdf index f8460c7d..5586db55 100644 Binary files a/awwe_optimization_files/figure-pdf/cell-13-output-4.pdf and b/awwe_optimization_files/figure-pdf/cell-13-output-4.pdf differ diff --git a/awwe_optimization_files/figure-pdf/cell-13-output-6.pdf b/awwe_optimization_files/figure-pdf/cell-13-output-6.pdf index 96179191..2abbf64a 100644 Binary files a/awwe_optimization_files/figure-pdf/cell-13-output-6.pdf and b/awwe_optimization_files/figure-pdf/cell-13-output-6.pdf differ diff --git a/awwe_optimization_files/figure-pdf/cell-17-output-1.pdf b/awwe_optimization_files/figure-pdf/cell-17-output-1.pdf index b6c34097..1715a551 100644 Binary files a/awwe_optimization_files/figure-pdf/cell-17-output-1.pdf and b/awwe_optimization_files/figure-pdf/cell-17-output-1.pdf differ diff --git a/awwe_optimization_files/figure-pdf/cell-19-output-1.pdf b/awwe_optimization_files/figure-pdf/cell-19-output-1.pdf index 99fa3088..5d76b0ae 100644 Binary files a/awwe_optimization_files/figure-pdf/cell-19-output-1.pdf and b/awwe_optimization_files/figure-pdf/cell-19-output-1.pdf differ diff --git a/awwe_optimization_files/figure-pdf/cell-22-output-1.pdf b/awwe_optimization_files/figure-pdf/cell-22-output-1.pdf index 5a75adb9..88df5a21 100644 Binary files a/awwe_optimization_files/figure-pdf/cell-22-output-1.pdf and b/awwe_optimization_files/figure-pdf/cell-22-output-1.pdf differ diff --git a/checkpoint_res.pkl b/checkpoint_res.pkl index e55df0dc..29db0b01 100644 Binary files a/checkpoint_res.pkl and b/checkpoint_res.pkl differ diff --git a/custom_name.pkl b/custom_name.pkl index d87858c0..fa433544 100644 Binary files a/custom_name.pkl and b/custom_name.pkl differ diff --git a/docs/001_sampling.html b/docs/001_sampling.html index e5606381..c2288f5c 100644 --- a/docs/001_sampling.html +++ b/docs/001_sampling.html @@ -780,7 +780,7 @@

36  Forrester et al. (2008).
  • The following Python packages are imported:
  • -
    +
    import pandas as pd
     import numpy as np
     from typing import Tuple, Optional
    @@ -902,7 +902,7 @@ 

    \(B^*\), see https://github.com/sequential-parameter-optimization/spotPython/blob/main/src/spotpython/utils/effects.py.

    Here is the corresponding code:

    -
    +
    def randorient(k, p, xi, seed=None):
         # Initialize random number generator with the provided seed
         if seed is not None:
    @@ -971,7 +971,7 @@ 

    Example 36.2 (Random Orientation of the Sampling Matrix in 2-D)  

    -
    +
    k = 2
     p = 3
     xi = 1
    @@ -1011,7 +1011,7 @@ 

    Example 36.3 (Random Orientation of the Sampling Matrix)  

    -
    +
    k = 3
     p = 3
     xi = 1
    @@ -1019,10 +1019,10 @@ 

    print(f"Random orientation of the sampling matrix:\n{Bstar}")

    Random orientation of the sampling matrix:
    -[[0.  0.  0.5]
    - [0.  0.5 0.5]
    +[[0.5 0.  0.5]
      [0.5 0.5 0.5]
    - [0.5 0.5 0. ]]
    + [0. 0.5 0.5] + [0. 0.5 0. ]]

    @@ -1037,7 +1037,7 @@

    https://github.com/sequential-parameter-optimization/spotPython/blob/main/src/spotpython/utils/effects.py):

    -
    +
    def screeningplan(k, p, xi, r):
         # Empty list to accumulate screening plan rows
         X = []
    @@ -1066,7 +1066,7 @@ 

    +
    def _screening(X, fun, xi, p, labels, bounds=None) -> tuple:
         """Helper function to calculate elementary effects for a screening design.
     
    @@ -1195,7 +1195,7 @@ 

    - +
    Figure 36.2: Estimated means and standard deviations of the elementary effects for the 10 design variables of the wing weight function. Example based on Forrester et al. (2008). @@ -1252,7 +1252,7 @@

    The most straightforward way of sampling a design space in a uniform fashion is by means of a rectangular grid of points. This is the full factorial sampling technique.

    Here is the simplified version of a Python function that will sample the unit hypercube at all levels in all dimensions, with the \(k\)-vector \(q\) containing the number of points required along each dimension, see https://github.com/sequential-parameter-optimization/spotPython/blob/main/src/spotpython/utils/sampling.py.

    The variable Edges specifies whether we want the points to be equally spaced from edge to edge (Edges=1) or we want them to be in the centres of \(n = q_1 \times q_2 \times \ldots \times q_k\) bins filling the unit hypercube (for any other value of Edges).

    -
    +
    def fullfactorial(q_param, Edges=1) -> np.ndarray:
         """Generates a full factorial sampling plan in the unit cube.
     
    @@ -1311,7 +1311,7 @@ 

    X[:, j] = column return X

    -
    +
    q = [3, 2]
     X = fullfactorial(q, Edges=0)
     print(X)
    @@ -1339,7 +1339,7 @@

    -
    +
    X = fullfactorial(q, Edges=1)
     print(X)
    @@ -1393,7 +1393,7 @@

    \([0, 1]^k\).

    This approach ensures multidimensional stratification and uniformity in projections. Here is the code:

    -
    +
    def rlh(n: int, k: int, edges: int = 0) -> np.ndarray:
         # Initialize array
         X = np.zeros((n, k), dtype=float)
    @@ -1415,15 +1415,15 @@ 

    Example 36.5 (Random Latin Hypercube) The following code can be used to generate a 2D Latin hypercube with 5 points and edges=0:

    -
    +
    X = rlh(n=5, k=2, edges=0)
     print(X)
    -
    [[0.3 0.3]
    - [0.1 0.5]
    - [0.5 0.1]
    - [0.9 0.9]
    - [0.7 0.7]]
    +
    [[0.3 0.5]
    + [0.5 0.9]
    + [0.7 0.3]
    + [0.9 0.7]
    + [0.1 0.1]]

    Figure 36.5 shows the points in the unit hypercube for the case of 5 points with edges=0.

    @@ -1444,15 +1444,15 @@

    Example 36.6 (Random Latin Hypercube with Edges) The following code can be used to generate a 2D Latin hypercube with 5 points and edges=1:

    -
    +
    X = rlh(n=5, k=2, edges=1)
     print(X)
    -
    [[0.75 0.5 ]
    - [0.5  0.25]
    - [0.   0.  ]
    +
    [[0.25 0.  ]
    + [0.   0.75]
      [1.   1.  ]
    - [0.25 0.75]]
    + [0.5 0.5 ] + [0.75 0.25]]

    Figure 36.6 shows the points in the unit hypercube for the case of 5 points with edges=1.

    @@ -1496,7 +1496,7 @@

    36.3.3.1 The Function jd

    The function jd computes the distinct p-norm distances between all pairs of points in a given set and counts their occurrences. It returns two arrays: one for the distinct distances and another for their multiplicities.

    -
    +
    def jd(X: np.ndarray, p: float = 1.0) -> Tuple[np.ndarray, np.ndarray]:
         """
         Args:
    @@ -1555,7 +1555,7 @@ 

    -
    +
    J, distinct_d = jd(X, p=2.0)
     print("Distinct distances (d_i):", distinct_d)
     print("Occurrences (J_i):", J)
    @@ -1578,7 +1578,7 @@

    36.3.4.1 The Function mm

    The function mm compares two sampling plans based on the Morris-Mitchell criterion. It uses the jd function to compute the distances and multiplicities, constructs vectors for comparison, and determines which plan is more space-filling.

    -
    +
    def mm(X1: np.ndarray, X2: np.ndarray, p: Optional[float] = 1.0) -> int:
         """
         Args:
    @@ -1631,7 +1631,7 @@ 

    Example 36.8 (The Function mm) We can use the mm function to compare two sampling plans. The following code creates two 3-point sampling plans in 2D (shown in Figure 36.8) and compares them using the Morris-Mitchell criterion:

    -
    +
    X1 = np.array([[0.0, 0.0],[0.5, 0.5],[0.0, 1.0], [1.0, 1.0]])
     X2 = np.array([[0.1, 0.1],[0.4, 0.6],[0.1, 0.9], [0.9, 0.9]])
    @@ -1650,7 +1650,7 @@

    We can compare which plan has better space-filling (Morris-Mitchell). The output is either 0, 1, or 2 depending on which plan is more space-filling.

    -
    +
    better = mm(X1, X2, p=2.0)
     print(f"Plan {better} is more space-filling.")
    @@ -1672,7 +1672,7 @@

    The smaller the value of \(\Phi_q\), the better the space-filling properties of \(X\) will be.

    The function mmphi computes the Morris-Mitchell sampling plan quality criterion for a given sampling plan. It takes a 2D array of points and calculates the space-fillingness metric based on the distances between points. This can be implemented in Python as follows:

    -
    +
    def mmphi(X: np.ndarray,
               q: Optional[float] = 2.0,
               p: Optional[float] = 1.0) -> float:
    @@ -1704,7 +1704,7 @@ 

    Example 36.9 (The Function mmphi) We can use the mmphi function to evaluate the space-filling quality of the two sampling plans from Example 36.8. The following code uses these two 3-point sampling plans in 2D and computes their quality using the Morris-Mitchell criterion:

    -
    +
    # Two simple sampling plans from above
     quality1 = mmphi(X1, q=2, p=2)
     quality2 = mmphi(X2, q=2, p=2)
    @@ -1723,7 +1723,7 @@ 

    36.3.4.3 The Function mmsort

    The function mmsort is designed to rank multiple sampling plans based on their space-filling properties using the Morris-Mitchell criterion. It takes a 3D array of sampling plans and returns the indices of the plans sorted in ascending order of their space-filling quality.

    -
    +
    def mmsort(X3D: np.ndarray, p: Optional[float] = 1.0) -> np.ndarray:
         """
         Args:
    @@ -1764,7 +1764,7 @@ 

    Example 36.10 (The Function mmsort) The mmsort function can be used to rank multiple sampling plans based on their space-filling properties. The following code demonstrates how to use mmsort to compare two 3-point sampling plans in 3D space:

    Suppose we have two 3-point sampling plans X1 and X1 from above. They are sorted using the Morris-Mitchell criterion with \(p=2.0\). For example, the output [1, 2] indicates that X1 is more space-filling than X2:

    -
    +
    X3D = np.stack([X1, X2], axis=2)
     ranking = mmsort(X3D, p=2.0)
     print(ranking)
    @@ -1780,7 +1780,7 @@

    \(q\) as an additional argument, as well as the comparison line being:

    if mmphi(X3D[:, :, Index[i] - 1], q=q, p=p) >
         mmphi(X3D[:, :, Index[i + 1] - 1], q=q, p=p):
    -
    +
    def phisort(X3D: np.ndarray,
                 q: Optional[float] = 2.0,
                 p: Optional[float] = 1.0) -> np.ndarray:
    @@ -1823,7 +1823,7 @@ 

    Example 36.11 (The Function phisort) The phisort function can be used to rank multiple sampling plans based on the Morris-Mitchell criterion. The following code demonstrates how to use phisort to compare two 3-point sampling plans in 3D space:

    -
    +
    X1 = bestlh(n=5, k=2, population=5, iterations=10)
     X2 = bestlh(n=5, k=2, population=15, iterations=20)
     X3 = bestlh(n=5, k=2, population=25, iterations=30)
    @@ -1850,7 +1850,7 @@ 

    36.3.5.1 The Function perturb()

    The function perturb randomly swaps elements in a Latin hypercube sampling plan. It takes a 2D array representing the sampling plan and performs a specified number of random element swaps, ensuring that the result remains a valid Latin hypercube.

    -
    +
    def perturb(X: np.ndarray,
                 PertNum: Optional[int] = 1) -> np.ndarray:
         """
    @@ -1887,7 +1887,7 @@ 

    Example 36.12 (The Function perturb()) The perturb function can be used to randomly swap elements in a Latin hypercube sampling plan. The following code demonstrates how to use perturb to create a perturbed version of a 4x2 sampling plan:

    -
    +
    X_original = np.array([[1, 3],[2, 4],[3, 1],[4, 2]])
     print("Original Sampling Plan:")
     print(X_original)
    @@ -1901,10 +1901,10 @@ 

  • Evaluates the space-fillingness of each offspring via the Morris-Mitchell metric (using mmphi).
  • Updates the best plan whenever a better offspring is found.
  • -
    +
    def mmlhs(X_start: np.ndarray,
               population: int,
               iterations: int,
    @@ -2001,7 +2001,7 @@ 

    Example 36.13 (The Function mmlhs) The mmlhs function can be used to optimize a Latin hypercube sampling plan. The following code demonstrates how to use mmlhs to optimize a 4x2 Latin hypercube starting from an initial plan:

    -
    +
    # Suppose we have an initial 4x2 plan
     X_start = np.array([[0.1, 0.3],[.1, .4],[.2, .9],[.9, .2]])
     print("Initial plan:")
    @@ -2017,8 +2017,8 @@ 

    [0.2 0.9] [0.9 0.2]] Optimized plan: -[[0.2 0.4] - [0.9 0.3] +[[0.9 0.3] + [0.2 0.4] [0.1 0.2] [0.1 0.9]]

    @@ -2043,7 +2043,7 @@

    36.3.7.2 The Function bestlh

    Generates an optimized Latin hypercube by evolving the Morris-Mitchell criterion across multiple exponents (q values) and selecting the best plan.

    -
    +
    def bestlh(n: int,
                k: int,
                population: int,
    @@ -2128,7 +2128,7 @@ 

    Example 36.14 (The Function bestlh) The bestlh function can be used to generate an optimized Latin hypercube sampling plan. The following code demonstrates how to use bestlh to create a 5x2 Latin hypercube with a population of 5 and 10 iterations:

    -
    +
    Xbestlh= bestlh(n=5, k=2, population=5, iterations=10)

    Figure 36.10 shows the best Latin hypercube sampling in 2D. The red points represent the optimized plan.

    @@ -2158,7 +2158,7 @@

    +
    N_POINTS = 16
     N_DIM = 2
     RANDOM_SEED = 42
    @@ -2168,7 +2168,7 @@ 

    36.4.1 Evaluation of Sampling Designs

    We generate various sampling designs and evaluate their space-filling properties using the Morris-Mitchell criterion.

    -
    +
    designs = {}
     bounds = [(0, 1)] * N_DIM
     
    @@ -2204,7 +2204,7 @@ 

    +
    if N_DIM == 2:
         num_designs = len(designs)
         cols = 2
    @@ -2242,7 +2242,7 @@ 

    36.4.2 Demonstrate the Impact of mmphi Parameters

    Demonstrating Impact of mmphi Parameters on ‘LHS’ Design

    -
    +
    X_lhs = designs["LHS"]
     
     # 1. Default parameters (already calculated)
    @@ -2267,7 +2267,7 @@ 

    36.4.3 Morris-Mitchell Criterion: Impact of Adding Points

    Impact of adding a point to a 2x2 grid design

    -
    +
    # Initial 2x2 Grid Design
     X_initial = np.array([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
     mmphi_initial = mmphi(X_initial, q=q, p=p)
    @@ -2290,7 +2290,7 @@ 

    +
    scenarios = {
         "Scenario 1: Add to Center": {
             "new_point": np.array([[0.5, 0.5]]),
    @@ -2351,7 +2351,7 @@ 

    +
    num_designs = len(augmented_designs_for_plotting)
     cols = 2
     rows = int(np.ceil(num_designs / cols))
    diff --git a/docs/001_sampling_files/figure-html/fig-forre08a-1-2-output-1.png b/docs/001_sampling_files/figure-html/fig-forre08a-1-2-output-1.png
    index ca5f3ccb..bcbecf4a 100644
    Binary files a/docs/001_sampling_files/figure-html/fig-forre08a-1-2-output-1.png and b/docs/001_sampling_files/figure-html/fig-forre08a-1-2-output-1.png differ
    diff --git a/docs/001_sampling_files/figure-html/fig-forre08a-4-output-1.png b/docs/001_sampling_files/figure-html/fig-forre08a-4-output-1.png
    index c25ce8ec..fee8af3a 100644
    Binary files a/docs/001_sampling_files/figure-html/fig-forre08a-4-output-1.png and b/docs/001_sampling_files/figure-html/fig-forre08a-4-output-1.png differ
    diff --git a/docs/001_sampling_files/figure-html/fig-rlh-edges0-output-1.png b/docs/001_sampling_files/figure-html/fig-rlh-edges0-output-1.png
    index fd1a5ed8..2fe87a6a 100644
    Binary files a/docs/001_sampling_files/figure-html/fig-rlh-edges0-output-1.png and b/docs/001_sampling_files/figure-html/fig-rlh-edges0-output-1.png differ
    diff --git a/docs/001_sampling_files/figure-html/fig-rlh-edges1-output-1.png b/docs/001_sampling_files/figure-html/fig-rlh-edges1-output-1.png
    index 39e6c0eb..7ca57039 100644
    Binary files a/docs/001_sampling_files/figure-html/fig-rlh-edges1-output-1.png and b/docs/001_sampling_files/figure-html/fig-rlh-edges1-output-1.png differ
    diff --git a/docs/001_sampling_files/figure-pdf/cell-43-output-1.pdf b/docs/001_sampling_files/figure-pdf/cell-43-output-1.pdf
    index a5775f5c..d2e9fd46 100644
    Binary files a/docs/001_sampling_files/figure-pdf/cell-43-output-1.pdf and b/docs/001_sampling_files/figure-pdf/cell-43-output-1.pdf differ
    diff --git a/docs/001_sampling_files/figure-pdf/cell-47-output-1.pdf b/docs/001_sampling_files/figure-pdf/cell-47-output-1.pdf
    index 354f0ee1..47bfc76b 100644
    Binary files a/docs/001_sampling_files/figure-pdf/cell-47-output-1.pdf and b/docs/001_sampling_files/figure-pdf/cell-47-output-1.pdf differ
    diff --git a/docs/001_sampling_files/figure-pdf/fig-forre08a-1-2-output-1.pdf b/docs/001_sampling_files/figure-pdf/fig-forre08a-1-2-output-1.pdf
    index 1783c85f..6f4cdffb 100644
    Binary files a/docs/001_sampling_files/figure-pdf/fig-forre08a-1-2-output-1.pdf and b/docs/001_sampling_files/figure-pdf/fig-forre08a-1-2-output-1.pdf differ
    diff --git a/docs/001_sampling_files/figure-pdf/fig-forre08a-3-output-1.pdf b/docs/001_sampling_files/figure-pdf/fig-forre08a-3-output-1.pdf
    index 55d95e61..765eccc6 100644
    Binary files a/docs/001_sampling_files/figure-pdf/fig-forre08a-3-output-1.pdf and b/docs/001_sampling_files/figure-pdf/fig-forre08a-3-output-1.pdf differ
    diff --git a/docs/001_sampling_files/figure-pdf/fig-forre08a-4-output-1.pdf b/docs/001_sampling_files/figure-pdf/fig-forre08a-4-output-1.pdf
    index 67716cac..7e940118 100644
    Binary files a/docs/001_sampling_files/figure-pdf/fig-forre08a-4-output-1.pdf and b/docs/001_sampling_files/figure-pdf/fig-forre08a-4-output-1.pdf differ
    diff --git a/docs/001_sampling_files/figure-pdf/fig-forre08a-6-output-2.pdf b/docs/001_sampling_files/figure-pdf/fig-forre08a-6-output-2.pdf
    index 65443a10..17c49228 100644
    Binary files a/docs/001_sampling_files/figure-pdf/fig-forre08a-6-output-2.pdf and b/docs/001_sampling_files/figure-pdf/fig-forre08a-6-output-2.pdf differ
    diff --git a/docs/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges0-output-1.pdf b/docs/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges0-output-1.pdf
    index 0aaa20ee..f5c3333c 100644
    Binary files a/docs/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges0-output-1.pdf and b/docs/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges0-output-1.pdf differ
    diff --git a/docs/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges1-output-1.pdf b/docs/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges1-output-1.pdf
    index bb90eeb8..f61f7a51 100644
    Binary files a/docs/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges1-output-1.pdf and b/docs/001_sampling_files/figure-pdf/fig-fullfactorial-2d-edges1-output-1.pdf differ
    diff --git a/docs/001_sampling_files/figure-pdf/fig-jd-3points-output-1.pdf b/docs/001_sampling_files/figure-pdf/fig-jd-3points-output-1.pdf
    index 5926879a..227c1b2d 100644
    Binary files a/docs/001_sampling_files/figure-pdf/fig-jd-3points-output-1.pdf and b/docs/001_sampling_files/figure-pdf/fig-jd-3points-output-1.pdf differ
    diff --git a/docs/001_sampling_files/figure-pdf/fig-mm-3points-output-1.pdf b/docs/001_sampling_files/figure-pdf/fig-mm-3points-output-1.pdf
    index b731a837..0152ee70 100644
    Binary files a/docs/001_sampling_files/figure-pdf/fig-mm-3points-output-1.pdf and b/docs/001_sampling_files/figure-pdf/fig-mm-3points-output-1.pdf differ
    diff --git a/docs/001_sampling_files/figure-pdf/fig-randorient-2d-output-1.pdf b/docs/001_sampling_files/figure-pdf/fig-randorient-2d-output-1.pdf
    index 5b607a53..53d7cf5c 100644
    Binary files a/docs/001_sampling_files/figure-pdf/fig-randorient-2d-output-1.pdf and b/docs/001_sampling_files/figure-pdf/fig-randorient-2d-output-1.pdf differ
    diff --git a/docs/001_sampling_files/figure-pdf/fig-rlh-edges0-output-1.pdf b/docs/001_sampling_files/figure-pdf/fig-rlh-edges0-output-1.pdf
    index e8db66f0..05fb9783 100644
    Binary files a/docs/001_sampling_files/figure-pdf/fig-rlh-edges0-output-1.pdf and b/docs/001_sampling_files/figure-pdf/fig-rlh-edges0-output-1.pdf differ
    diff --git a/docs/001_sampling_files/figure-pdf/fig-rlh-edges1-output-1.pdf b/docs/001_sampling_files/figure-pdf/fig-rlh-edges1-output-1.pdf
    index b34847cc..518bc957 100644
    Binary files a/docs/001_sampling_files/figure-pdf/fig-rlh-edges1-output-1.pdf and b/docs/001_sampling_files/figure-pdf/fig-rlh-edges1-output-1.pdf differ
    diff --git a/docs/002_awwe.html b/docs/002_awwe.html
    index 12fc7086..0603f3be 100644
    --- a/docs/002_awwe.html
    +++ b/docs/002_awwe.html
    @@ -765,7 +765,7 @@ 

    This section is based on chapter 1.3 “A ten-variable weight function” in Forrester et al. (2008).
  • The following Python packages are imported:
  • -
    +
    import math
     import matplotlib.pyplot as plt
     import numpy as np
    @@ -899,7 +899,7 @@ 

    \[ g(y) = a + (b - a) y \tag{34.2}\] can be used. The function wingwt() expects inputs from the unit cube, which are then transformed back to their original scales using Equation 34.2. The function is defined as follows:

    -
    +
    def wingwt(Sw=0.48, Wfw=0.4, A=0.38, L=0.5, q=0.62, l=0.344,  Rtc=0.4, Nz=0.37, Wdg=0.38):
         # put coded inputs back on natural scale
         Sw = Sw * (200 - 150) + 150 
    @@ -922,7 +922,7 @@ 

    +
    x = np.linspace(0, 1, 3)
     y = np.linspace(0, 1, 3)
     X, Y = np.meshgrid(x, y)
    @@ -941,7 +941,7 @@ 

    +
    %matplotlib inline
     # plt.style.use('seaborn-white')
     x = np.linspace(0, 1, 100)
    @@ -952,7 +952,7 @@ 

    34.5 Plot 1: Load Factor (\(N_z\)) and Aspect Ratio (\(A\))

    We will vary \(N_z\) and \(A\), with other inputs fixed at their baseline values.

    -
    +
    z = wingwt(A = X, Nz = Y)
     fig = plt.figure(figsize=(7., 5.))
     plt.contourf(X, Y, z, 20, cmap='jet')
    @@ -970,7 +970,7 @@ 

    +
    contours = plt.contour(X, Y, z, 4, colors='black')
     plt.clabel(contours, inline=True, fontsize=8)
     plt.xlabel("A")
    @@ -1002,7 +1002,7 @@ 

    \(\lambda\) and fuel weight \(W_{fw}\) -
    +
    z = wingwt(Wfw = X,  Nz = Y)
     contours = plt.contour(X, Y, z, 4, colors='black')
     plt.clabel(contours, inline=True, fontsize=8)
    @@ -1033,10 +1033,10 @@ 

    34.7 The Big Picture: Combining all Variables

    -
    +
    pl = ["Sw", "Wfw", "A", "L", "q", "l",  "Rtc", "Nz", "Wdg"]
    -
    +
    Z = []
     Zlab = []
     l = len(pl)
    @@ -1050,7 +1050,7 @@ 

    Zlab.append([pl[i],pl[j]])

    Now we can generate all 36 combinations, e.g., our first example is combination p = 19.

    -
    +
    p = 19
     Zlab[p]
    @@ -1058,7 +1058,7 @@

    +
    plt.contourf(X, Y, Z[p], 20, cmap='jet', vmin=180, vmax=360)
     plt.xlabel(Zlab[p][0])
     plt.ylabel(Zlab[p][1])
    @@ -1075,14 +1075,14 @@ 

    \(\lambda\) and fuel weight \(W_{fw}\)
  • This is combination 11:
  • -
    +
    p = 11
     Zlab[p]
    ['Wfw', 'l']
    -
    +
    plt.contourf(X, Y, Z[p], 20, cmap='jet', vmin=180, vmax=360)
     plt.xlabel(Zlab[p][0])
     plt.ylabel(Zlab[p][1])
    diff --git a/docs/002_awwe_files/figure-html/cell-15-output-1.png b/docs/002_awwe_files/figure-html/cell-15-output-1.png
    deleted file mode 100644
    index da1a3304..00000000
    Binary files a/docs/002_awwe_files/figure-html/cell-15-output-1.png and /dev/null differ
    diff --git a/docs/002_awwe_files/figure-pdf/cell-12-output-1.pdf b/docs/002_awwe_files/figure-pdf/cell-12-output-1.pdf
    index 2c8cf879..251a7c53 100644
    Binary files a/docs/002_awwe_files/figure-pdf/cell-12-output-1.pdf and b/docs/002_awwe_files/figure-pdf/cell-12-output-1.pdf differ
    diff --git a/docs/002_awwe_files/figure-pdf/cell-14-output-1.pdf b/docs/002_awwe_files/figure-pdf/cell-14-output-1.pdf
    index ddf7c640..dbf0e20f 100644
    Binary files a/docs/002_awwe_files/figure-pdf/cell-14-output-1.pdf and b/docs/002_awwe_files/figure-pdf/cell-14-output-1.pdf differ
    diff --git a/docs/002_awwe_files/figure-pdf/cell-6-output-1.pdf b/docs/002_awwe_files/figure-pdf/cell-6-output-1.pdf
    index 67939be4..6dc7e6eb 100644
    Binary files a/docs/002_awwe_files/figure-pdf/cell-6-output-1.pdf and b/docs/002_awwe_files/figure-pdf/cell-6-output-1.pdf differ
    diff --git a/docs/002_awwe_files/figure-pdf/cell-7-output-1.pdf b/docs/002_awwe_files/figure-pdf/cell-7-output-1.pdf
    index ea51caf5..7b743c89 100644
    Binary files a/docs/002_awwe_files/figure-pdf/cell-7-output-1.pdf and b/docs/002_awwe_files/figure-pdf/cell-7-output-1.pdf differ
    diff --git a/docs/002_awwe_files/figure-pdf/cell-8-output-1.pdf b/docs/002_awwe_files/figure-pdf/cell-8-output-1.pdf
    index fc73dd91..00453955 100644
    Binary files a/docs/002_awwe_files/figure-pdf/cell-8-output-1.pdf and b/docs/002_awwe_files/figure-pdf/cell-8-output-1.pdf differ
    diff --git a/docs/002_awwe_files/figure-pdf/fig-awwe-example-output-1.pdf b/docs/002_awwe_files/figure-pdf/fig-awwe-example-output-1.pdf
    index 3e5b8d26..14f79e1b 100644
    Binary files a/docs/002_awwe_files/figure-pdf/fig-awwe-example-output-1.pdf and b/docs/002_awwe_files/figure-pdf/fig-awwe-example-output-1.pdf differ
    diff --git a/docs/005_num_rsm.html b/docs/005_num_rsm.html
    index cf05e972..48add9be 100644
    --- a/docs/005_num_rsm.html
    +++ b/docs/005_num_rsm.html
    @@ -793,7 +793,7 @@ 

    RSM is related to various fields, including Design of Experiments (DoE), quality management, reliability, and productivity. Its applications are widespread in industry and manufacturing, focusing on designing, developing, and formulating new products and improving existing ones, as well as from laboratory research. RSM is commonly applied in domains such as materials science, manufacturing, applied chemistry, climate science, and many others.

    An example of RSM involves studying the relationship between a response variable, such as yield (\(y\)) in a chemical process, and two process variables: reaction time (\(\xi_1\)) and reaction temperature (\(\xi_2\)). The provided code illustrates this scenario, following a variation of the so-called “banana function.”

    In the context of visualization, RSM offers the choice between 3D plots and contour plots. In a 3D plot, the independent variables \(\xi_1\) and \(\xi_2\) are represented, with \(y\) as the dependent variable.

    -
    +
    import numpy as np
     import matplotlib.pyplot as plt
     
    @@ -831,7 +831,7 @@ 

    \(y\) is the dependent variable -
    +
    import numpy as np
     import matplotlib.cm as cm
     import matplotlib.pyplot as plt
    @@ -946,11 +946,11 @@ 

    \(x^{(0)} = (0,0)\) -
    +
    def fun_1(x1,x2):
         return 50 + 8*x1 + 3*x2
    -
    +
    import numpy as np
     import matplotlib.cm as cm
     import matplotlib.pyplot as plt
    @@ -993,11 +993,11 @@ 

    \((x_1, x_2)\)
  • Responses may be observed over a mesh in the same double-unit square
  • -
    +
    def fun_11(x1,x2):
         return 50 + 8 * x1 + 3 * x2 - 4 * x1 * x2
    -
    +
    import numpy as np
     import matplotlib.cm as cm
     import matplotlib.pyplot as plt
    @@ -1040,11 +1040,11 @@ 

    For example \[\eta = 50 + 8 x_1 + 3x_2 - 7x_1^2 - 3 x_2^2 - 4x_1x_2\]

  • Implementation of the Second-Order Model as fun_2().

  • -
    +
    def fun_2(x1,x2):
         return 50 + 8 * x1 + 3 * x2 - 7 * x1**2 - 3*x2**2 - 4 * x1 * x2
    -
    +
    import numpy as np
     import matplotlib.cm as cm
     import matplotlib.pyplot as plt
    @@ -1086,11 +1086,11 @@ 

    +
    def fun_ridge(x1, x2):
         return 80 + 4*x1 + 8*x2 - 3*x1**2 - 12*x2**2 - 12*x1*x2
    -
    +
    import numpy as np
     import matplotlib.cm as cm
     import matplotlib.pyplot as plt
    @@ -1131,11 +1131,11 @@ 

    • An example of a rising ridge is implemented by the code below.
    -
    +
    def fun_ridge_rise(x1, x2):
          return 80 - 4*x1 + 12*x2 - 3*x1**2 - 12*x2**2 - 12*x1*x2
    -
    +
    import numpy as np
     import matplotlib.cm as cm
     import matplotlib.pyplot as plt
    @@ -1186,11 +1186,11 @@ 

  • Finally, we can get what’s called a saddle or minimax system.
  • -
    +
    def fun_saddle(x1, x2):
         return 80 + 4*x1 + 8*x2 - 2*x2**2 - 12*x1*x2 
    -
    +
    import numpy as np
     import matplotlib.cm as cm
     import matplotlib.pyplot as plt
    diff --git a/docs/005_num_rsm_files/figure-pdf/cell-11-output-2.pdf b/docs/005_num_rsm_files/figure-pdf/cell-11-output-2.pdf
    index f6788a85..5bfc2549 100644
    Binary files a/docs/005_num_rsm_files/figure-pdf/cell-11-output-2.pdf and b/docs/005_num_rsm_files/figure-pdf/cell-11-output-2.pdf differ
    diff --git a/docs/005_num_rsm_files/figure-pdf/cell-13-output-2.pdf b/docs/005_num_rsm_files/figure-pdf/cell-13-output-2.pdf
    index 58a152ed..7b72d8d2 100644
    Binary files a/docs/005_num_rsm_files/figure-pdf/cell-13-output-2.pdf and b/docs/005_num_rsm_files/figure-pdf/cell-13-output-2.pdf differ
    diff --git a/docs/005_num_rsm_files/figure-pdf/cell-15-output-2.pdf b/docs/005_num_rsm_files/figure-pdf/cell-15-output-2.pdf
    index 1e10f1d0..f47e61d2 100644
    Binary files a/docs/005_num_rsm_files/figure-pdf/cell-15-output-2.pdf and b/docs/005_num_rsm_files/figure-pdf/cell-15-output-2.pdf differ
    diff --git a/docs/005_num_rsm_files/figure-pdf/cell-2-output-1.pdf b/docs/005_num_rsm_files/figure-pdf/cell-2-output-1.pdf
    index e4f16d7a..0c7a2407 100644
    Binary files a/docs/005_num_rsm_files/figure-pdf/cell-2-output-1.pdf and b/docs/005_num_rsm_files/figure-pdf/cell-2-output-1.pdf differ
    diff --git a/docs/005_num_rsm_files/figure-pdf/cell-3-output-2.pdf b/docs/005_num_rsm_files/figure-pdf/cell-3-output-2.pdf
    index d6540268..0d2c8256 100644
    Binary files a/docs/005_num_rsm_files/figure-pdf/cell-3-output-2.pdf and b/docs/005_num_rsm_files/figure-pdf/cell-3-output-2.pdf differ
    diff --git a/docs/005_num_rsm_files/figure-pdf/cell-5-output-2.pdf b/docs/005_num_rsm_files/figure-pdf/cell-5-output-2.pdf
    index bbd03cbb..7658cdb9 100644
    Binary files a/docs/005_num_rsm_files/figure-pdf/cell-5-output-2.pdf and b/docs/005_num_rsm_files/figure-pdf/cell-5-output-2.pdf differ
    diff --git a/docs/005_num_rsm_files/figure-pdf/cell-7-output-2.pdf b/docs/005_num_rsm_files/figure-pdf/cell-7-output-2.pdf
    index 1584ec03..987f7504 100644
    Binary files a/docs/005_num_rsm_files/figure-pdf/cell-7-output-2.pdf and b/docs/005_num_rsm_files/figure-pdf/cell-7-output-2.pdf differ
    diff --git a/docs/005_num_rsm_files/figure-pdf/cell-9-output-2.pdf b/docs/005_num_rsm_files/figure-pdf/cell-9-output-2.pdf
    index 88ba88dd..c679b9a5 100644
    Binary files a/docs/005_num_rsm_files/figure-pdf/cell-9-output-2.pdf and b/docs/005_num_rsm_files/figure-pdf/cell-9-output-2.pdf differ
    diff --git a/docs/005_num_rsm_files/figure-pdf/linreg-example-output-2.pdf b/docs/005_num_rsm_files/figure-pdf/linreg-example-output-2.pdf
    index 550726b9..965ad1eb 100644
    Binary files a/docs/005_num_rsm_files/figure-pdf/linreg-example-output-2.pdf and b/docs/005_num_rsm_files/figure-pdf/linreg-example-output-2.pdf differ
    diff --git a/docs/006_matrices.html b/docs/006_matrices.html
    index 23c95137..714da974 100644
    --- a/docs/006_matrices.html
    +++ b/docs/006_matrices.html
    @@ -805,7 +805,7 @@ 

    A small value, eps, can be passed to the function build_Psi to improve the condition number. For example, eps=sqrt(spacing(1)) can be used. The numpy function spacing() returns the distance between a number and its nearest adjacent number.

    The condition number of a matrix is a measure of its sensitivity to small changes in its elements. It is used to estimate how much the output of a function will change if the input is slightly altered.

    A matrix with a low condition number is well-conditioned, which means its behavior is relatively stable, while a matrix with a high condition number is ill-conditioned, meaning its behavior is unstable with respect to numerical precision.

    -
    +
    import numpy as np
     
     # Define a well-conditioned matrix (low condition number)
    @@ -842,7 +842,7 @@ 

    42.3.2 Implementation in Python

    -
    +
    import numpy as np
     from numpy.linalg import pinv
     A = np.array([[1, 2], [3, 4], [5, 6]])
    @@ -910,7 +910,7 @@ 

    +
    import numpy as np
     
     def is_positive_definite(matrix):
    @@ -932,7 +932,7 @@ 

    Example 42.2 (Cholesky decomposition using numpy) linalg.cholesky computes the Cholesky decomposition of a matrix, i.e., it computes a lower triangular matrix \(L\) such that \(LL^T = A\). If the matrix is not positive definite, an error (LinAlgError) is raised.

    -
    +
    import numpy as np
     
     # Define a Hermitian, positive-definite matrix
    @@ -961,7 +961,7 @@ 

    +
    import numpy as np
     
     def is_pd(K):
    @@ -1083,7 +1083,7 @@ 

    +
    import numpy as np
     from scipy.linalg import cholesky, inv
     E = np.exp(1)
    diff --git a/docs/006_num_gp.html b/docs/006_num_gp.html
    index 75957c96..f73fc7b5 100644
    --- a/docs/006_num_gp.html
    +++ b/docs/006_num_gp.html
    @@ -775,7 +775,7 @@ 

    41  Forrester et al. (2008).
  • The following Python packages are imported:
  • -
    +
    import matplotlib.pyplot as plt
     import numpy as np
     from numpy import (array, zeros, power, ones, exp, multiply,
    @@ -958,7 +958,7 @@ 

    \[ \Psi = \exp(-D)\] is computed.

    Next, we will demonstrate how this computation can be implemented in Python. We will consider four points in three dimensions and compute the correlation matrix \(\Psi\) using the basis function from Equation 41.1. These points are placed at the origin, at the unit vectors, and at the points \((100, 100, 100)\) and \((101, 100, 100)\). So, they form two clusters: one at the origin and one at \((100, 100, 100)\).

    -
    +
    theta = np.array([1,2,3])
     X = np.array([ [1,0,0], [0,1,0], [100, 100, 100], [101, 100, 100]])
     X
    @@ -969,7 +969,7 @@

    +
    def build_Psi(X, theta):
         n = X.shape[0]
         k = X.shape[1]
    @@ -982,7 +982,7 @@ 

    D = D + D.T return exp(-D)

    -
    +
    Psi = build_Psi(X, theta)
     Psi
    @@ -1010,7 +1010,7 @@

    Example 41.4 (Example: The Correlation Matrix (Using Existing Functions)) The same result as computed in Example 41.3 can be obtained with existing python functions, e.g., from the package scipy.

    -
    +
    def build_Psi(X, theta, eps=sqrt(spacing(1))):
         return exp(- squareform(pdist(X,
                                 metric='sqeuclidean',
    @@ -1028,7 +1028,7 @@ 

    \(\Psi\) is a measure of how well the matrix can be inverted. A high condition number indicates that the matrix is close to singular, which can lead to numerical instability in computations involving the inverse of the matrix, see Section 42.2.

    -
    +
    np.linalg.cond(Psi)
    np.float64(2.163953413738652)
    @@ -1247,7 +1247,7 @@

    41.6.1 Calculating the Correlation Matrix \(\Psi\)

    The correlation matrix \(\Psi\) is based on the pairwise squared distances between the input locations. Here we will use \(n=8\) sample locations and \(\theta\) is set to 1.0.

    -
    +
    n = 8
     X = np.linspace(0, 2*np.pi, n, endpoint=False).reshape(-1,1)
     print(np.round(X, 2))
    @@ -1263,7 +1263,7 @@

    +
    y = np.sin(X)
     print(np.round(y, 2))
    @@ -1352,7 +1352,7 @@

    41.6.2 Computing the \(\Psi\) Matrix

    We will use the build_Psi function from Example 41.4 to compute the correlation matrix \(\Psi\). \(\theta\) should be an array of one value, because we are only working in one dimension (\(k=1\)).

    -
    +
    theta = np.array([1.0])
     Psi = build_Psi(X, theta)
     print(np.round(Psi, 2))
    @@ -1386,7 +1386,7 @@

    41.6.3 Selecting the New Locations

    We would like to predict at \(m = 100\) new locations (or testign locations) in the interval \([0, 2\pi]\). The new locations are stored in the variable x.

    -
    +
    m = 100
     x = np.linspace(0, 2*np.pi, m, endpoint=False).reshape(-1,1)
    @@ -1394,7 +1394,7 @@

    41.6.4 Computing the \(\psi\) Vector

    Distances between testing locations \(x\) and training data locations \(X\).

    -
    +
    def build_psi(X, x, theta, eps=sqrt(spacing(1))):
         n = X.shape[0]
         k = X.shape[1]
    @@ -1436,7 +1436,7 @@ 

    41.6.5 Predicting at New Locations

    Computation of the predictive equations.

    -
    +
    U = cholesky(Psi).T
     one = np.ones(n).reshape(-1,1)
     mu = (one.T.dot(solve(U, solve(U.T, y)))) / one.T.dot(solve(U, solve(U.T, one)))
    @@ -1450,7 +1450,7 @@ 

    41.6.6 Visualization

    -
    +
    plt.plot(x, f, color = "orange", label="Fitted")
     plt.plot(x, np.sin(x), color = "grey", label="Original")
     plt.plot(X, y, "bo", label="Measurements")
    @@ -1469,7 +1469,7 @@ 

    41.6.7 The Complete Python Code for the Example

    Here is the self-contained Python code for direct use in a notebook:

    -
    +
    import numpy as np
     import matplotlib.pyplot as plt
     from numpy import (array, zeros, power, ones, exp, multiply, eye, linspace, spacing, sqrt, arange, append, ravel)
    diff --git a/docs/006_num_gp_files/figure-pdf/cell-21-output-1.pdf b/docs/006_num_gp_files/figure-pdf/cell-21-output-1.pdf
    index ea1eb1da..06c17f19 100644
    Binary files a/docs/006_num_gp_files/figure-pdf/cell-21-output-1.pdf and b/docs/006_num_gp_files/figure-pdf/cell-21-output-1.pdf differ
    diff --git a/docs/006_num_gp_files/figure-pdf/cell-22-output-2.pdf b/docs/006_num_gp_files/figure-pdf/cell-22-output-2.pdf
    index 0b0923e8..70982730 100644
    Binary files a/docs/006_num_gp_files/figure-pdf/cell-22-output-2.pdf and b/docs/006_num_gp_files/figure-pdf/cell-22-output-2.pdf differ
    diff --git a/docs/006_num_gp_files/figure-pdf/cell-22-output-4.pdf b/docs/006_num_gp_files/figure-pdf/cell-22-output-4.pdf
    index b6e58516..e4bad20d 100644
    Binary files a/docs/006_num_gp_files/figure-pdf/cell-22-output-4.pdf and b/docs/006_num_gp_files/figure-pdf/cell-22-output-4.pdf differ
    diff --git a/docs/006_num_gp_files/figure-pdf/fig-corr-matrix-build_psi-output-1.pdf b/docs/006_num_gp_files/figure-pdf/fig-corr-matrix-build_psi-output-1.pdf
    index bb92761d..9af0842b 100644
    Binary files a/docs/006_num_gp_files/figure-pdf/fig-corr-matrix-build_psi-output-1.pdf and b/docs/006_num_gp_files/figure-pdf/fig-corr-matrix-build_psi-output-1.pdf differ
    diff --git a/docs/006_num_gp_files/figure-pdf/fig-pval12-output-1.pdf b/docs/006_num_gp_files/figure-pdf/fig-pval12-output-1.pdf
    index 4c5151bb..2d9853e6 100644
    Binary files a/docs/006_num_gp_files/figure-pdf/fig-pval12-output-1.pdf and b/docs/006_num_gp_files/figure-pdf/fig-pval12-output-1.pdf differ
    diff --git a/docs/006_num_gp_files/figure-pdf/fig-sin-corr-output-1.pdf b/docs/006_num_gp_files/figure-pdf/fig-sin-corr-output-1.pdf
    index 3319ec14..82ab1f23 100644
    Binary files a/docs/006_num_gp_files/figure-pdf/fig-sin-corr-output-1.pdf and b/docs/006_num_gp_files/figure-pdf/fig-sin-corr-output-1.pdf differ
    diff --git a/docs/006_num_gp_files/figure-pdf/fig-sin-corr-pred-output-1.pdf b/docs/006_num_gp_files/figure-pdf/fig-sin-corr-pred-output-1.pdf
    index ff357f1b..23a2cd4a 100644
    Binary files a/docs/006_num_gp_files/figure-pdf/fig-sin-corr-pred-output-1.pdf and b/docs/006_num_gp_files/figure-pdf/fig-sin-corr-pred-output-1.pdf differ
    diff --git a/docs/006_num_gp_files/figure-pdf/fig-sin-data-output-1.pdf b/docs/006_num_gp_files/figure-pdf/fig-sin-data-output-1.pdf
    index e612fcbe..baa466d3 100644
    Binary files a/docs/006_num_gp_files/figure-pdf/fig-sin-data-output-1.pdf and b/docs/006_num_gp_files/figure-pdf/fig-sin-data-output-1.pdf differ
    diff --git a/docs/006_num_gp_files/figure-pdf/fig-theta12-output-1.pdf b/docs/006_num_gp_files/figure-pdf/fig-theta12-output-1.pdf
    index 56bcf7b5..5ba30b24 100644
    Binary files a/docs/006_num_gp_files/figure-pdf/fig-theta12-output-1.pdf and b/docs/006_num_gp_files/figure-pdf/fig-theta12-output-1.pdf differ
    diff --git a/docs/006_num_poly.html b/docs/006_num_poly.html
    index ddecfe93..886c5663 100644
    --- a/docs/006_num_poly.html
    +++ b/docs/006_num_poly.html
    @@ -762,7 +762,7 @@ 

    39  Forrester et al. (2008).
  • The following Python packages are imported:
  • -
    +
    import numpy as np
     import matplotlib.pyplot as plt
    @@ -856,7 +856,7 @@

    +
    # Sample data (e.g., X_norm)
     X_norm = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
     print(f"Original data indices: {np.arange(len(X_norm))}\n")
    @@ -908,7 +908,7 @@ 

    \[ \bar{x} = \frac{x - \mu(X)}{\sigma(X)} \]

    -
    +
    def predict_polynomial_fit(X, coeff, mnstd):
         """
         Generates predictions for the polynomial fit.
    @@ -936,7 +936,7 @@ 

    39.2.4 Plotting the Results

    -
    +
    def plot_polynomial_fit(X, Y, X_pred_original, Y_pred, best_order, y_true=None):
         """
         Visualizes the polynomial fit.
    @@ -973,7 +973,7 @@ 

    \(-0.3\) untion to \(0.1\) unit. The data is normalized to the range of \(0\) to \(1\) for the computation with the aerofoilcd function. The data is then fitted with a polynomial of order \(m\). To obtain the best polynomial through this data, the following Python code can be used:

    -
    +
    from spotoptim.function import aerofoilcd
     import numpy as np
     import matplotlib.pyplot as plt
    @@ -1025,7 +1025,7 @@ 

    \[ f(x) = (6x - 2)^2 \sin(12x - 4). \]

    -
    +
    import numpy as np
     from spotoptim.function import onevar
     X = np.linspace(0, 1, 51)
    diff --git a/docs/006_num_poly_files/figure-pdf/fig-aerofoil-drag-output-1.pdf b/docs/006_num_poly_files/figure-pdf/fig-aerofoil-drag-output-1.pdf
    index 2a0c5763..d0887bfa 100644
    Binary files a/docs/006_num_poly_files/figure-pdf/fig-aerofoil-drag-output-1.pdf and b/docs/006_num_poly_files/figure-pdf/fig-aerofoil-drag-output-1.pdf differ
    diff --git a/docs/006_num_poly_files/figure-pdf/fig-onevar-output-1.pdf b/docs/006_num_poly_files/figure-pdf/fig-onevar-output-1.pdf
    index ba3f296b..e13409eb 100644
    Binary files a/docs/006_num_poly_files/figure-pdf/fig-onevar-output-1.pdf and b/docs/006_num_poly_files/figure-pdf/fig-onevar-output-1.pdf differ
    diff --git a/docs/006_num_rbf.html b/docs/006_num_rbf.html
    index da3def19..907166a6 100644
    --- a/docs/006_num_rbf.html
    +++ b/docs/006_num_rbf.html
    @@ -772,7 +772,7 @@ 

    This section is based on chapter 2.3 in Forrester et al. (2008).
  • The following Python packages are imported:
  • -
    +
    import numpy as np
     import matplotlib.pyplot as plt
    @@ -844,7 +844,7 @@

    \(\sigma\) is the width parameter that controls how quickly the function’s response diminishes with distance from the center.

    The Gaussian RBF produces a bell-shaped response that reaches its maximum value of 1 when \(\vec{x} = \vec{c}\) and asymptotically approaches zero as the distance increases. The parameter \(\sigma\) determines how “localized” the response is—smaller values create a narrower peak with faster decay, while larger values produce a broader, more gradual response across the input space. Figure 40.1 shows the Gaussian RBF for different values of \(\sigma\) in an one-dimensional space. The center of the RBF is set at 0, and the width parameter \(\sigma\) varies to illustrate how it affects the shape of the function.

    -
    +
    def gaussian_rbf(x, center, sigma):
         """
         Compute the Gaussian Radial Basis Function.
    @@ -1059,7 +1059,7 @@ 

    40.2.1 The Rbf Class

    The Rbf class implements the Radial Basis Function model. It encapsulates all the data and methods needed for fitting the model and making predictions.

    -
    +
    import numpy as np
     from scipy.linalg import cholesky, cho_solve
     import numpy.random as rnd
    @@ -1322,7 +1322,7 @@ 

    40.3 RBF Example: The One-Dimensional sin Function

    -
    +
    import numpy as np
     import matplotlib.pyplot as plt
     from scipy.linalg import cholesky, cho_solve
    @@ -1399,7 +1399,7 @@ 

    40.4 RBF Example: The Two-Diemnsional dome Function

    The dome function is an example of a test function that can be used to evaluate the performance of the Radial Basis Function model. It is a simple mathematical function defined over a two-dimensional space.

    -
    +
    def dome(x) -> float:
       """
       Dome test function.
    @@ -1416,7 +1416,7 @@ 

    return np.sum(1 - (2*x - 1)**2) / len(x)

    The following code demonstrates how to use the Radial Basis Function model to approximate a function. It generates a Latin Hypercube sample, computes the objective function values, estimates the model parameters, and plots the results.

    -
    +
    def generate_rbf_data(n_samples=10, grid_points=41):
         """
         Generates data for RBF visualization.
    diff --git a/docs/006_num_rbf_files/figure-html/fig-rbf-approximation-output-1.png b/docs/006_num_rbf_files/figure-html/fig-rbf-approximation-output-1.png
    index d53e0dc0..8db7800b 100644
    Binary files a/docs/006_num_rbf_files/figure-html/fig-rbf-approximation-output-1.png and b/docs/006_num_rbf_files/figure-html/fig-rbf-approximation-output-1.png differ
    diff --git a/docs/006_num_rbf_files/figure-pdf/cell-7-output-2.pdf b/docs/006_num_rbf_files/figure-pdf/cell-7-output-2.pdf
    index 523be94f..5f1515f5 100644
    Binary files a/docs/006_num_rbf_files/figure-pdf/cell-7-output-2.pdf and b/docs/006_num_rbf_files/figure-pdf/cell-7-output-2.pdf differ
    diff --git a/docs/006_num_rbf_files/figure-pdf/fig-rbf-approximation-output-1.pdf b/docs/006_num_rbf_files/figure-pdf/fig-rbf-approximation-output-1.pdf
    index 4c6abff1..6a9e0b16 100644
    Binary files a/docs/006_num_rbf_files/figure-pdf/fig-rbf-approximation-output-1.pdf and b/docs/006_num_rbf_files/figure-pdf/fig-rbf-approximation-output-1.pdf differ
    diff --git a/docs/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k1-output-1.pdf b/docs/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k1-output-1.pdf
    index 5384978f..494e4b79 100644
    Binary files a/docs/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k1-output-1.pdf and b/docs/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k1-output-1.pdf differ
    diff --git a/docs/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k2-output-1.pdf b/docs/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k2-output-1.pdf
    index 83350310..8a85ea33 100644
    Binary files a/docs/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k2-output-1.pdf and b/docs/006_num_rbf_files/figure-pdf/fig-rbf-gaussian-k2-output-1.pdf differ
    diff --git a/docs/007_challenge.html b/docs/007_challenge.html
    index 194b4d5e..291a253c 100644
    --- a/docs/007_challenge.html
    +++ b/docs/007_challenge.html
    @@ -784,7 +784,7 @@ 

    +
    import matplotlib.pyplot as plt
     import time
     import numpy as np
    diff --git a/docs/007_challenge_files/figure-pdf/fig-comparison-output-1.pdf b/docs/007_challenge_files/figure-pdf/fig-comparison-output-1.pdf
    index a67b79f4..b07c5f69 100644
    Binary files a/docs/007_challenge_files/figure-pdf/fig-comparison-output-1.pdf and b/docs/007_challenge_files/figure-pdf/fig-comparison-output-1.pdf differ
    diff --git a/docs/007_challenge_files/figure-pdf/fig-convergence-output-1.pdf b/docs/007_challenge_files/figure-pdf/fig-convergence-output-1.pdf
    index 8fc5d988..a7b66b09 100644
    Binary files a/docs/007_challenge_files/figure-pdf/fig-convergence-output-1.pdf and b/docs/007_challenge_files/figure-pdf/fig-convergence-output-1.pdf differ
    diff --git a/docs/007_challenge_files/figure-pdf/fig-landscapes-output-1.pdf b/docs/007_challenge_files/figure-pdf/fig-landscapes-output-1.pdf
    index d5c422c9..350335c5 100644
    Binary files a/docs/007_challenge_files/figure-pdf/fig-landscapes-output-1.pdf and b/docs/007_challenge_files/figure-pdf/fig-landscapes-output-1.pdf differ
    diff --git a/docs/007_challenge_files/figure-pdf/fig-robot-arm-hard-output-1.pdf b/docs/007_challenge_files/figure-pdf/fig-robot-arm-hard-output-1.pdf
    index de17ab86..6494a463 100644
    Binary files a/docs/007_challenge_files/figure-pdf/fig-robot-arm-hard-output-1.pdf and b/docs/007_challenge_files/figure-pdf/fig-robot-arm-hard-output-1.pdf differ
    diff --git a/docs/019_spotoptim_sk_matern.html b/docs/019_spotoptim_sk_matern.html
    index 6653b453..e009b73f 100644
    --- a/docs/019_spotoptim_sk_matern.html
    +++ b/docs/019_spotoptim_sk_matern.html
    @@ -724,7 +724,7 @@ 

    5 

    5.1 SpotOptim with Sklearn Kriging in 6 Dimensions: Rosenbrock Function

    This section demonstrates how to use the SpotOptim class with sklearn’s Gaussian Process Regressor (using Matern kernel) as a surrogate on the 6-dimensional Rosenbrock function. We use a maximum of 100 function evaluations.

    -
    +
    import warnings
     warnings.filterwarnings("ignore")
     import json
    @@ -734,7 +734,7 @@ 

    5.1.1 Define the 6D Rosenbrock Function

    -
    +
    dim = 6
     lower = np.full(dim, -2.0)
     upper = np.full(dim, 2.0)
    @@ -745,7 +745,7 @@ 

    5.1.2 Set up SpotOptim Parameters

    -
    +
    n_initial = dim
     seed = 321
    @@ -878,7 +878,7 @@

    +
    print(f"[6D] Sklearn Kriging: min y = {result_rosen.fun:.4f} at x = {result_rosen.x}")
     print(f"Number of function evaluations: {result_rosen.nfev}")
     print(f"Number of iterations: {result_rosen.nit}")
    @@ -891,7 +891,7 @@

    5.1.4 Visualize Optimization Progress

    -
    +
    import matplotlib.pyplot as plt
     
     # Plot the optimization progress
    @@ -915,7 +915,7 @@ 

    5.1.5 Evaluation of Multiple Repeats

    To perform 30 repeats and collect statistics:

    -
    +
    # Perform 30 independent runs
     n_repeats = 30
     results = []
    @@ -964,7 +964,7 @@ 

    5.2.1 Define the 10D Michalewicz Function

    -
    +
    from spotoptim.function import michalewicz
     
     dim = 10
    @@ -977,7 +977,7 @@ 

    5.2.2 Set up SpotOptim Parameters

    -
    +
    n_initial = dim
     seed = 321
    @@ -1308,7 +1308,7 @@

    +
    print(f"[10D] Sklearn Kriging: min y = {result_micha.fun:.4f} at x = {result_micha.x}")
     print(f"Number of function evaluations: {result_micha.nfev}")
     print(f"Number of iterations: {result_micha.nit}")
    @@ -1322,7 +1322,7 @@

    5.2.4 Visualize Optimization Progress

    -
    +
    import matplotlib.pyplot as plt
     
     # Plot the optimization progress
    @@ -1346,7 +1346,7 @@ 

    5.2.5 Evaluation of Multiple Repeats

    To perform 30 repeats and collect statistics:

    -
    +
    # Perform 30 independent runs
     n_repeats = 30
     results = []
    diff --git a/docs/019_spotoptim_sk_matern_files/figure-pdf/cell-13-output-1.pdf b/docs/019_spotoptim_sk_matern_files/figure-pdf/cell-13-output-1.pdf
    index 35153529..82e70e14 100644
    Binary files a/docs/019_spotoptim_sk_matern_files/figure-pdf/cell-13-output-1.pdf and b/docs/019_spotoptim_sk_matern_files/figure-pdf/cell-13-output-1.pdf differ
    diff --git a/docs/019_spotoptim_sk_matern_files/figure-pdf/cell-7-output-1.pdf b/docs/019_spotoptim_sk_matern_files/figure-pdf/cell-7-output-1.pdf
    index 8fc32c21..a18825b6 100644
    Binary files a/docs/019_spotoptim_sk_matern_files/figure-pdf/cell-7-output-1.pdf and b/docs/019_spotoptim_sk_matern_files/figure-pdf/cell-7-output-1.pdf differ
    diff --git a/docs/020_scaling.html b/docs/020_scaling.html
    index 16eb0d86..aca3415e 100644
    --- a/docs/020_scaling.html
    +++ b/docs/020_scaling.html
    @@ -713,7 +713,7 @@ 

    31.2 Using TorchStandardScaler Directly

    You can use the scaler independently for any PyTorch data processing tasks.

    -
    +
    import torch
     from spotoptim.utils.scaler import TorchStandardScaler
     
    @@ -755,7 +755,7 @@ 

    +
    import torch
     import torch.nn as nn
     import numpy as np
    diff --git a/docs/100_ddmo_eda.html b/docs/100_ddmo_eda.html
    index f5fcf6ab..a0decbb6 100644
    --- a/docs/100_ddmo_eda.html
    +++ b/docs/100_ddmo_eda.html
    @@ -865,7 +865,7 @@ 

    -
    +
    @@ -883,24 +883,24 @@

    -
    +
    for i, prob in enumerate(probabilities):
         print(f"Bin {i+1} Probability: {prob:.4f}")
     
     # Ensure probabilities sum to 1 (or very close, due to floating-point arithmetic)
     print(f"Sum of probabilities: {np.sum(probabilities)}")
    -
    Bin 1 Probability: 0.0030
    -Bin 2 Probability: 0.0080
    -Bin 3 Probability: 0.0480
    -Bin 4 Probability: 0.0920
    -Bin 5 Probability: 0.1830
    -Bin 6 Probability: 0.2290
    -Bin 7 Probability: 0.2160
    -Bin 8 Probability: 0.1370
    -Bin 9 Probability: 0.0570
    -Bin 10 Probability: 0.0230
    -Bin 11 Probability: 0.0040
    +
    Bin 1 Probability: 0.0020
    +Bin 2 Probability: 0.0170
    +Bin 3 Probability: 0.0500
    +Bin 4 Probability: 0.1180
    +Bin 5 Probability: 0.2120
    +Bin 6 Probability: 0.2330
    +Bin 7 Probability: 0.1790
    +Bin 8 Probability: 0.1330
    +Bin 9 Probability: 0.0370
    +Bin 10 Probability: 0.0120
    +Bin 11 Probability: 0.0070
     Sum of probabilities: 1.0
    @@ -957,7 +957,7 @@

    Example 45.1 (Pie Preference) Consider a scenario from “StatLand” where 70% of people prefer pumpkin pie over blueberry pie. The question is: What is the probability that, out of three people asked, the first two prefer pumpkin pie and the third prefers blueberry pie?

    Using the concept of the Binomial Distribution, the probability of such an outcome can be calculated without the need to layout every possible combination by hand. This process not only simplifies calculations but also provides a clear and precise method to determine probabilities in scenarios involving discrete choices. We will use Python to calculate the probability of observing exactly two out of three people prefer pumpkin pie, given the 70% preference rate:

    -
    +
    from scipy.stats import binom
     n = 3  # Number of trials (people asked)
     p = 0.7  # Probability of success (preferring pumpkin pie)
    @@ -1088,7 +1088,7 @@ 

    Example 45.3 (Standard Deviation with Python) Consider the array \([1,2,3]\): Since \(\bar{x} = 2\), the following value is computed: \[ \sqrt{1/3 \times \left( (1-2)^2 + (2-2)^2 + (3-2)^2 \right)} = \sqrt{2/3}.\]

    -
    +
    import numpy as np
     a = np.array([[1, 2, 3]])
     np.std(a)
    @@ -1098,7 +1098,7 @@

    \(N-1\)), \(\sqrt{1/2 \times \left( (1-2)^2 + (2-2)^2 + (3-2)^2 \right)} = \sqrt{2/2}\), can be calculated in Python as follows:

    -
    +
    np.std(a, ddof=1)
    np.float64(1.0)
    @@ -1110,7 +1110,7 @@

    When you compute np.std with axis=0, it calculates the standard deviation along the vertical axis, meaning it computes the standard deviation for each column of the array. On the other hand, when you compute np.std with axis=1, it calculates the standard deviation along the horizontal axis, meaning it computes the standard deviation for each row of the array. If the axis parameter is not specified, np.std computes the standard deviation of the flattened array, i.e., it calculates the standard deviation of all the elements in the array.

    Example 45.4 (Axes along which the standard deviation is computed)  

    -
    +
    A = np.array([[1, 2], [3, 4]])
     A
    @@ -1119,21 +1119,21 @@

    First, we calculate the standard deviation of all elements in the array:

    -
    +
    np.std(A)
    np.float64(1.118033988749895)

    Setting axis=0 calculates the standard deviation along the vertical axis (column-wise):

    -
    +
    np.std(A, axis=0)
    array([1., 1.])

    Finally, setting axis=1 calculates the standard deviation along the horizontal axis (row-wise):

    -
    +
    np.std(A, axis=1)
    array([0.5, 0.5])
    @@ -1176,7 +1176,7 @@

    \(a=0\) and \(b=1\):

    -
    +
    import numpy as np
     # Initialize the random number generator
     rng = np.random.default_rng(seed=123456789)
    @@ -1189,7 +1189,7 @@ 

    +
    import numpy as np
     import matplotlib.pyplot as plt
     
    @@ -1252,25 +1252,25 @@ 

    x = rng.normal(mu, sigma, n) x

    -
    array([1.96714185, 1.85465936, 1.9792697 , 1.94488593, 1.97432874,
    -       2.07284846, 2.00841563, 2.18059008, 2.08972059, 1.78420194])
    +
    array([1.83956491, 1.96316573, 1.91946089, 1.9424839 , 2.00858252,
    +       1.8554255 , 1.70403878, 2.12573916, 2.00071284, 2.08990745])

    Verify the mean:

    -
    +
    abs(mu - np.mean(x))
    -
    np.float64(0.014393771709440006)
    +
    np.float64(0.05509183154874364)

    Note: To verify the standard deviation, we use ddof = 1 (empirical standard deviation):

    -
    +
    abs(sigma - np.std(x, ddof=1))
    -
    np.float64(0.01395279622436188)
    +
    np.float64(0.024276758967269524)
    -
    +
    plot_normal_distribution(mu=0, sigma=1, num_samples=10000)
    @@ -1289,7 +1289,7 @@

    +
    @@ -1304,7 +1304,7 @@

    Example 45.5 (Realizations of a Normal Distribution) If you have a normal distribution with a mean of 0 and a standard deviation of 1, each number you draw from that distribution is a realization. Here is a Python example that generates 10 realizations of a normal distribution with a mean of 0 and a standard deviation of 1:

    -
    +
    import numpy as np
     mu = 0
     sigma = 1
    @@ -1516,7 +1516,7 @@ 

    - +
    Figure 45.7: Bivariate Normal. Mean zero and covariance \(\Sigma=\begin{pmatrix} 9 & 0 \\ 0 & 9\end{pmatrix}\) @@ -1703,7 +1703,7 @@

    Example 45.11 (Computing the Outer Product) We will consider two vectors, \(\mathbf{a}\) and \(\mathbf{b}\):

    -
    +
    import numpy as np
     
     a = np.array([1, 2, 3])
    @@ -1743,7 +1743,7 @@ 

    \(m \times n\), where \(m\) is the length of the first vector and \(n\) is the length of the second vector. The function is particularly useful in various mathematical and scientific computations where matrix representations of vector relationships are needed.

    Example 45.12 (Computing the Covariance and the Correlation Matrix) The following Python code computes the covariance and correlation matrices using the NumPy library.

    -
    +
    import numpy as np
     
     def calculate_cov_corr_matrices(data, rowvar=False)->(np.array, np.array):
    @@ -1777,7 +1777,7 @@ 

    corr_matrix = cov_matrix / np.outer(std_devs, std_devs) return cov_matrix, corr_matrix

    -
    +
    A = np.array([[0,1],
                      [1,0]])
     print(f"Input matrix:\n {A}")
    @@ -1806,7 +1806,7 @@ 

    Example 45.13 (Covariance of Independent Variables) Consider a covariance matrix where variables are independent:

    -
    +
    A = np.array([[1,-1],
     [2,0],
     [3,1],
    @@ -1835,7 +1835,7 @@ 

    Example 45.14 (Strong Correlation) For a covariance matrix with strong positive correlation:

    -
    +
    A = np.array([[10,-1],
     [20,0],
     [30,1],
    @@ -1864,7 +1864,7 @@ 

    Example 45.15 (Strong Negative Correlation)  

    -
    +
    A = np.array([[10,1],
     [20,0],
     [30,-1],
    @@ -1949,7 +1949,7 @@ 

    1 \end{pmatrix} \]

    -
    +
    from spotoptim.utils.stats import partial_correlation
     import numpy as np
     import pandas as pd
    diff --git a/docs/100_ddmo_eda_files/figure-html/cell-17-output-1.png b/docs/100_ddmo_eda_files/figure-html/cell-17-output-1.png
    index ccc9f56a..8159d055 100644
    Binary files a/docs/100_ddmo_eda_files/figure-html/cell-17-output-1.png and b/docs/100_ddmo_eda_files/figure-html/cell-17-output-1.png differ
    diff --git a/docs/100_ddmo_eda_files/figure-html/fig-bi9000-output-1.png b/docs/100_ddmo_eda_files/figure-html/fig-bi9000-output-1.png
    index 1a29228d..977f3120 100644
    Binary files a/docs/100_ddmo_eda_files/figure-html/fig-bi9000-output-1.png and b/docs/100_ddmo_eda_files/figure-html/fig-bi9000-output-1.png differ
    diff --git a/docs/100_ddmo_eda_files/figure-html/fig-bi9040-output-1.png b/docs/100_ddmo_eda_files/figure-html/fig-bi9040-output-1.png
    index 4d2abf9d..37f2bbd8 100644
    Binary files a/docs/100_ddmo_eda_files/figure-html/fig-bi9040-output-1.png and b/docs/100_ddmo_eda_files/figure-html/fig-bi9040-output-1.png differ
    diff --git a/docs/100_ddmo_eda_files/figure-html/fig-bi9449-output-1.png b/docs/100_ddmo_eda_files/figure-html/fig-bi9449-output-1.png
    index 7417b99d..39f8fdac 100644
    Binary files a/docs/100_ddmo_eda_files/figure-html/fig-bi9449-output-1.png and b/docs/100_ddmo_eda_files/figure-html/fig-bi9449-output-1.png differ
    diff --git a/docs/100_ddmo_eda_files/figure-html/fig-histogram-output-2.png b/docs/100_ddmo_eda_files/figure-html/fig-histogram-output-2.png
    index 62054f82..40ab61f6 100644
    Binary files a/docs/100_ddmo_eda_files/figure-html/fig-histogram-output-2.png and b/docs/100_ddmo_eda_files/figure-html/fig-histogram-output-2.png differ
    diff --git a/docs/100_ddmo_eda_files/figure-pdf/cell-12-output-1.pdf b/docs/100_ddmo_eda_files/figure-pdf/cell-12-output-1.pdf
    index d325dbce..3b577756 100644
    Binary files a/docs/100_ddmo_eda_files/figure-pdf/cell-12-output-1.pdf and b/docs/100_ddmo_eda_files/figure-pdf/cell-12-output-1.pdf differ
    diff --git a/docs/100_ddmo_eda_files/figure-pdf/cell-17-output-1.pdf b/docs/100_ddmo_eda_files/figure-pdf/cell-17-output-1.pdf
    index dbacc9ed..26b390d4 100644
    Binary files a/docs/100_ddmo_eda_files/figure-pdf/cell-17-output-1.pdf and b/docs/100_ddmo_eda_files/figure-pdf/cell-17-output-1.pdf differ
    diff --git a/docs/100_ddmo_eda_files/figure-pdf/cell-18-output-1.pdf b/docs/100_ddmo_eda_files/figure-pdf/cell-18-output-1.pdf
    index 48928d12..353f18e2 100644
    Binary files a/docs/100_ddmo_eda_files/figure-pdf/cell-18-output-1.pdf and b/docs/100_ddmo_eda_files/figure-pdf/cell-18-output-1.pdf differ
    diff --git a/docs/100_ddmo_eda_files/figure-pdf/fig-bi9000-output-1.pdf b/docs/100_ddmo_eda_files/figure-pdf/fig-bi9000-output-1.pdf
    index 09fe9e89..cee7b15e 100644
    Binary files a/docs/100_ddmo_eda_files/figure-pdf/fig-bi9000-output-1.pdf and b/docs/100_ddmo_eda_files/figure-pdf/fig-bi9000-output-1.pdf differ
    diff --git a/docs/100_ddmo_eda_files/figure-pdf/fig-bi9040-output-1.pdf b/docs/100_ddmo_eda_files/figure-pdf/fig-bi9040-output-1.pdf
    index 7eae28db..dda3bf2b 100644
    Binary files a/docs/100_ddmo_eda_files/figure-pdf/fig-bi9040-output-1.pdf and b/docs/100_ddmo_eda_files/figure-pdf/fig-bi9040-output-1.pdf differ
    diff --git a/docs/100_ddmo_eda_files/figure-pdf/fig-bi90403d-output-1.pdf b/docs/100_ddmo_eda_files/figure-pdf/fig-bi90403d-output-1.pdf
    index 37cf5905..5076aec5 100644
    Binary files a/docs/100_ddmo_eda_files/figure-pdf/fig-bi90403d-output-1.pdf and b/docs/100_ddmo_eda_files/figure-pdf/fig-bi90403d-output-1.pdf differ
    diff --git a/docs/100_ddmo_eda_files/figure-pdf/fig-bi9449-output-1.pdf b/docs/100_ddmo_eda_files/figure-pdf/fig-bi9449-output-1.pdf
    index 2b205c45..e4e030b3 100644
    Binary files a/docs/100_ddmo_eda_files/figure-pdf/fig-bi9449-output-1.pdf and b/docs/100_ddmo_eda_files/figure-pdf/fig-bi9449-output-1.pdf differ
    diff --git a/docs/100_ddmo_eda_files/figure-pdf/fig-histogram-output-2.pdf b/docs/100_ddmo_eda_files/figure-pdf/fig-histogram-output-2.pdf
    index 9fef2a1d..a69853dd 100644
    Binary files a/docs/100_ddmo_eda_files/figure-pdf/fig-histogram-output-2.pdf and b/docs/100_ddmo_eda_files/figure-pdf/fig-histogram-output-2.pdf differ
    diff --git a/docs/100_ddmo_eda_files/figure-pdf/fig-normal-cdf-output-1.pdf b/docs/100_ddmo_eda_files/figure-pdf/fig-normal-cdf-output-1.pdf
    index 20dd7dd4..77ecb15a 100644
    Binary files a/docs/100_ddmo_eda_files/figure-pdf/fig-normal-cdf-output-1.pdf and b/docs/100_ddmo_eda_files/figure-pdf/fig-normal-cdf-output-1.pdf differ
    diff --git a/docs/100_ddmo_eda_files/figure-pdf/fig-normal-density-output-1.pdf b/docs/100_ddmo_eda_files/figure-pdf/fig-normal-density-output-1.pdf
    index e81b1e5c..3417d452 100644
    Binary files a/docs/100_ddmo_eda_files/figure-pdf/fig-normal-density-output-1.pdf and b/docs/100_ddmo_eda_files/figure-pdf/fig-normal-density-output-1.pdf differ
    diff --git a/docs/100_ddmo_eda_files/figure-pdf/fig-normal-distribution-output-1.pdf b/docs/100_ddmo_eda_files/figure-pdf/fig-normal-distribution-output-1.pdf
    index 743fbfc4..22044eff 100644
    Binary files a/docs/100_ddmo_eda_files/figure-pdf/fig-normal-distribution-output-1.pdf and b/docs/100_ddmo_eda_files/figure-pdf/fig-normal-distribution-output-1.pdf differ
    diff --git a/docs/100_ddmo_pca.html b/docs/100_ddmo_pca.html
    index 7c7b8173..2b5bdece 100644
    --- a/docs/100_ddmo_pca.html
    +++ b/docs/100_ddmo_pca.html
    @@ -781,7 +781,7 @@ 

    The concepts of Principal Component Analysis (PCA) and Factor Analysis (FA) are both dimensionality reduction techniques. They operate on different assumptions and serve distinct purposes. PCA aims to transform correlated variables into a smaller set of uncorrelated principal components that capture maximum variance, whereas Factor Analysis seeks to explain the correlations between observed variables in terms of a smaller number of unobserved, underlying factors.

    After loading and preprocessing the data in Section 47.2, we will explore these methods to reduce dimensions and address multicollinearity. In Section 47.3 we will conduct linear regression on the extracted components or factors. Section 47.4 provides diagnostics for multicollinearity, including the coefficient table, eigenvalues, condition indices, and the KMO measure. Section 47.5 explains how PCA is applied to the data, while Section 47.6 discusses Factor Analysis. Both methods are used to mitigate multicollinearity issues in regression models. Section 47.8 shows how the reduced dimensions can be used in other machine learning models, such as Random Forests.

    The following packages are used in this chapter:

    -
    +
    import pandas as pd
     import numpy as np
     import statsmodels.api as sm
    @@ -1152,7 +1152,7 @@ 

  • Standardize the numerical predictors in X using StandardScaler.
  • Verify that X_scaled does not contain any NaN or infinite values.
  • -
    +
    # Use columns from 'price' to 'mpg' as predictors
     independent_var_columns = ['price', 'engine_s', 'horsepow', 'wheelbas',
                                'width', 'length', 'curb_wgt', 'fuel_cap', 'mpg']
    @@ -1248,7 +1248,7 @@ 

    -
    +
    if X.isnull().any().any():
         print("NaNs detected in X. Filling with column medians.")
         X = X.fillna(X.median())
    @@ -1351,7 +1351,7 @@ 

    47.2.2.2 Categorical Features

    Categorical features (like ‘type’) are one-hot encoded and then combined with the scaled numerical features.

    -
    +
    categorical_cols = ['type'] # Replace if more categorical variables exist
     encoder = OneHotEncoder(drop='first', sparse_output=False)
     X_categorical_encoded = encoder.fit_transform(df[categorical_cols])
    @@ -1414,7 +1414,7 @@ 

    47.2.3 Combining Non-categorical and Categorical (encoded) Data

    The final feature set X_encoded is created by concatenating the scaled numerical features and the one-hot encoded categorical features. This combined DataFrame will be used for regression analysis.

    -
    +
    X_encoded = pd.concat([X_scaled, X_categorical_encoded_df], axis=1)
     print(f"Dimension: {X_encoded.shape}")
     print(list(X_encoded.columns))
    @@ -1423,7 +1423,7 @@

    +
    X_encoded.describe(include='all')
    @@ -1561,7 +1561,7 @@

    47.3 Fit the Linear Regression Model

    An Ordinary Least Squares (OLS) regression model is fitted using the preprocessed and combined features (X_encoded).

    -
    +
    X_encoded_with_const = sm.add_constant(X_encoded) # Adds a constant term (intercept) to the model
     model = sm.OLS(df['ln_sales'], X_encoded_with_const).fit()
    @@ -1570,7 +1570,7 @@

    47.3.1.1 Model Summary (ANOVA Table)

    The ANOVA table shows a significant F-value (Prob (F-statistic) close to zero), indicating that the model is statistically significant and better than simply estimating the mean. The Adj. R-squared value, close to 0.40, suggests that nearly 40% of the variation in ln_sales is explained by the model.

    -
    +
    print(model.summary())
                                OLS Regression Results                            
    @@ -1578,8 +1578,8 @@ 

    47.4.1 The Coefficient Table

    The coefficient table provides further evidence of multicollinearity. The function compute_coefficients_table() from the spotoptim package is used here for comprehensive diagnostics.

    -
    +
    from spotoptim.utils.stats import compute_coefficients_table
     coeffs_table = compute_coefficients_table(
         model=model, X_encoded=X_encoded_with_const, y=y, vif_table=None
    @@ -1649,7 +1649,7 @@ 

    47.4.2.1 Eigenvalues

    We use the FactorAnalyzer function from the factor_analyzer package to compute eigenvalues.

    -
    +
    fa_temp = FactorAnalyzer(n_factors=X_encoded.shape[1], method="principal", rotation=None)
     try:
         fa_temp.fit(X_encoded)
    @@ -1677,7 +1677,7 @@ 

    < \] where \(\lambda_{\max}\) is the largest eigenvalue of the scaled predictor correlation matrix, and \(\lambda_i\) is the \(i\)-th eigenvalue of the same matrix.

    \(CI_i\)-values greater than 15 suggest a potential problem, and values over 30 indicate a severe problem.

    -
    +
    X_cond = copy.deepcopy(X_encoded)
     condition_index_df = condition_index(X_cond)
     print("\nCondition Index:")
    @@ -1704,7 +1704,7 @@ 

    <

    47.4.3 Kayser-Meyer-Olkin (KMO) Measure

    The KMO (Kaiser-Meyer-Olkin) measure is a metric for assessing the suitability of data for Factor Analysis. A KMO value of 0.6 or higher is generally considered acceptable, while a value below 0.5 indicates that the data is not suitable for Factor Analysis.

    The KMO measure is based on the correlation and partial correlation between variables. It is calculated as the ratio of the squared sums of correlations to the squared sums of correlations plus the squared sums of partial correlations. KMO values range between 0 and 1, where values close to 1 suggest strong correlations and suitability for Factor Analysis, and values close to 0 indicate weak correlations and unsuitability.

    -
    +
    kmo_all, kmo_model = calculate_kmo(X_encoded)
     print(f"\nKMO measure: {kmo_model:.3f} (0.6+ is often considered acceptable)")
    @@ -1887,7 +1887,7 @@

    47.5.6 Creating the Regression Model with Principal Components

    Now, a linear regression model is fitted using the principal components derived from PCA. These components are uncorrelated, which should eliminate multicollinearity issues.

    -
    +
    X_pca_model_with_const = sm.add_constant(df_pca_components)
     model_pca = sm.OLS(y, X_pca_model_with_const).fit()
     print("\nRegression on PCA Components:")
    @@ -1900,8 +1900,8 @@ 

    47.5.7 Collinearity Diagnostics for PCA Regression Model

    Consider the eigenvalues of the PCA components to verify that they are uncorrelated. The eigenvalues should be close to 1, indicating that the components are orthogonal and do not exhibit multicollinearity.

    -
    +
    fa_temp = FactorAnalyzer(n_factors=df_pca_components.shape[1], method="principal", rotation=None)
     try:
         fa_temp.fit(df_pca_components)
    @@ -1952,7 +1952,7 @@ 

    +
    coeffs_table = compute_coefficients_table(
         model=model_pca, X_encoded=X_pca_model_with_const, y=y, vif_table=None
     )
    @@ -1978,7 +1978,7 @@ 

    47.5.8 PCA: Creating the Regression Model with three Principle Components only

    -
    +
    # Create a regression model using only the first three principal components
     df_pc_reduced = df_pca_components.iloc[:, :3] # select the first three factors
     X_model_pc_reduced = sm.add_constant(df_pc_reduced)
    @@ -2006,8 +2006,8 @@ 

    47.6.2 Determining the Number of Factors for Factor Analysis

    For Factor Analysis, the number of factors to extract is a crucial decision. A common approach, consistent with the KMO measure, is to consider factors with eigenvalues greater than 1 (Kaiser’s criterion). Factor analysis is then performed, often with a rotation method like Varimax to improve factor interpretability.

    -
    +
    anz_fak = 10 # Number of factors to extract, similar to the components in PCA
     n_factors = min(anz_fak, X_encoded.shape[1])
     fa = FactorAnalyzer(n_factors=n_factors, method="principal", rotation="varimax")
    @@ -2115,7 +2115,7 @@ 

    Section 47.7.3 explains the difference between loading scores in PCA and factor loadings in FA.

    -
    +
    # Print factor loadings with 2 decimals
     factor_loadings = fa.loadings_
     print("Factor Loadings (rounded to 2 decimals):\n", np.round(factor_loadings, 2))
    @@ -2180,7 +2180,7 @@ 

    \(p\) is the number of observed variables.

    -
    +
    # Factor scores for each row (shape: [n_samples, actual_factors])
     X_factor_scores = fa.transform(X_encoded)
     print(f"X_factor_scores shape: {X_factor_scores.shape}")
    @@ -2212,7 +2212,7 @@ 

    47.6.6 Creating the Regression Model with Extracted Factors (from FA)

    A linear regression model is built using all ten extracted factors from Factor Analysis. The expectation is that these factors are uncorrelated, addressing multicollinearity.

    -
    +
    X_model_fa = sm.add_constant(df_factors)
     model_factors = sm.OLS(y, X_model_fa).fit()
     print("\nRegression on Factor Scores (all 10 factors):")
    @@ -2238,8 +2238,8 @@ 

    47.6.7.1 Setting Up the Regression Model with Reduced Factors

    To demonstrate the effect of dimensionality reduction, a regression model is created using only the first three extracted factors from Factor Analysis.

    -
    +
    # Create a regression model using only the first three factors
     df_factors_reduced = df_factors.iloc[:, :3] # select the first three factors
     X_model_fa_reduced = sm.add_constant(df_factors_reduced)
    @@ -2331,8 +2331,8 @@ 

    47.8.1 Random Forest Regressor with the Full Dataset

    First, a Random Forest Regressor is trained using the original, full dataset (X_encoded).

    -
    +
    # 1. Prepare Data # 
     # Use the original input features (X_encoded) as predictors
     X_original = X_encoded
    @@ -2583,7 +2583,7 @@ 

    47.8.2 Random Forest Regressor with PCA Components

    Next, a Random Forest Regressor is trained using the principal components derived from PCA. This tests if the dimensionality reduction and multicollinearity resolution of PCA benefit non-linear models.

    -
    +
    # 1. Prepare Data 
     # Use the extracted PCA components as predictors (using the 10 components)
     X_pca_rf = df_pca_components
    @@ -2622,7 +2622,7 @@ 

    47.8.3 Random Forest Regressor with Extracted Factors (from FA)

    Finally, a Random Forest Regressor is trained using the extracted factors from Factor Analysis (using the 3 factors from the reduced model for this example to illustrate potential impact of reduction).

    -
    +
    # 1. Prepare Data 
     # Use the extracted factors as predictors (using the 3 factors from the reduced FA model)
     X_factors_rf = df_factors_reduced
    @@ -2660,7 +2660,7 @@ 

    47.8.4 Comparison of the Random Forest Models

    -
    +
    # Print comparison of Random Forest models
     print("\nComparison of Random Forest Models:")
     print("\nUsing Original Data:")
    diff --git a/docs/100_ddmo_pca_files/figure-pdf/cell-29-output-2.pdf b/docs/100_ddmo_pca_files/figure-pdf/cell-29-output-2.pdf
    index dec2c6df..5877b124 100644
    Binary files a/docs/100_ddmo_pca_files/figure-pdf/cell-29-output-2.pdf and b/docs/100_ddmo_pca_files/figure-pdf/cell-29-output-2.pdf differ
    diff --git a/docs/100_ddmo_pca_files/figure-pdf/fig-pca_loading_scores-10-output-1.pdf b/docs/100_ddmo_pca_files/figure-pdf/fig-pca_loading_scores-10-output-1.pdf
    index 7dd301b3..42ef44af 100644
    Binary files a/docs/100_ddmo_pca_files/figure-pdf/fig-pca_loading_scores-10-output-1.pdf and b/docs/100_ddmo_pca_files/figure-pdf/fig-pca_loading_scores-10-output-1.pdf differ
    diff --git a/docs/100_ddmo_pca_files/figure-pdf/fig-pcvals2-output-1.pdf b/docs/100_ddmo_pca_files/figure-pdf/fig-pcvals2-output-1.pdf
    index a8fb659a..4b429677 100644
    Binary files a/docs/100_ddmo_pca_files/figure-pdf/fig-pcvals2-output-1.pdf and b/docs/100_ddmo_pca_files/figure-pdf/fig-pcvals2-output-1.pdf differ
    diff --git a/docs/100_ddmo_pca_files/figure-pdf/fig-scree_plot_fa-output-1.pdf b/docs/100_ddmo_pca_files/figure-pdf/fig-scree_plot_fa-output-1.pdf
    index bc74b508..8c9083cb 100644
    Binary files a/docs/100_ddmo_pca_files/figure-pdf/fig-scree_plot_fa-output-1.pdf and b/docs/100_ddmo_pca_files/figure-pdf/fig-scree_plot_fa-output-1.pdf differ
    diff --git a/docs/100_ddmo_pca_files/figure-pdf/fig-scree_plot_pca-1-output-1.pdf b/docs/100_ddmo_pca_files/figure-pdf/fig-scree_plot_pca-1-output-1.pdf
    index 5d9510e6..4381eacd 100644
    Binary files a/docs/100_ddmo_pca_files/figure-pdf/fig-scree_plot_pca-1-output-1.pdf and b/docs/100_ddmo_pca_files/figure-pdf/fig-scree_plot_pca-1-output-1.pdf differ
    diff --git a/docs/100_ddmo_regression.html b/docs/100_ddmo_regression.html
    index b37e7e7a..2acfeaea 100644
    --- a/docs/100_ddmo_regression.html
    +++ b/docs/100_ddmo_regression.html
    @@ -964,7 +964,7 @@ 

    (Wang 2007). spotoptim provides tools for assessing confounding effects in multiple regression models.

    Example 48.1 (Assessing Confounding Effects in Multiple Regression with spotoptim) Consider the following data generation function generate_data and the fit_ols_model function to fit an ordinary least squares (OLS) regression model.

    -
    +
    import numpy as np
     import pandas as pd
     import statsmodels.formula.api as smf
    @@ -1064,7 +1064,7 @@ 

    print(res["estimate"])

    The basic model is: y ~ x1
    -The following features will be used for fitting the basic model: Index(['x3', 'x2', 'x1', 'y'], dtype='str')
    +The following features will be used for fitting the basic model: Index(['x2', 'x1', 'y', 'x3'], dtype='str')
     p-values: 0.34343741859526244
     estimate: 1.025306391110114
     conf_int: 0   -1.111963
    diff --git a/docs/100_ddmo_regression_files/figure-pdf/fig-plot-coeff-vs-pvals-output-1.pdf b/docs/100_ddmo_regression_files/figure-pdf/fig-plot-coeff-vs-pvals-output-1.pdf
    index e394e278..6c8286f5 100644
    Binary files a/docs/100_ddmo_regression_files/figure-pdf/fig-plot-coeff-vs-pvals-output-1.pdf and b/docs/100_ddmo_regression_files/figure-pdf/fig-plot-coeff-vs-pvals-output-1.pdf differ
    diff --git a/docs/Sequential-Parameter-Optimization-Cookbook.pdf b/docs/Sequential-Parameter-Optimization-Cookbook.pdf
    index 6d8768ed..e96c868a 100644
    Binary files a/docs/Sequential-Parameter-Optimization-Cookbook.pdf and b/docs/Sequential-Parameter-Optimization-Cookbook.pdf differ
    diff --git a/docs/a_04_gp_background.html b/docs/a_04_gp_background.html
    index 98b21a7f..81f03b91 100644
    --- a/docs/a_04_gp_background.html
    +++ b/docs/a_04_gp_background.html
    @@ -800,14 +800,14 @@ 

    51.3 Construction of the Covariance Matrix

    Here is an one-dimensional example. The process begins by creating an input grid using \(\vec{x}\)-values. This grid consists of 100 elements, providing the basis for further analysis and visualization.

    -
    +
    import numpy as np
     n = 100
     X = np.linspace(0, 10, n, endpoint=False).reshape(-1,1)

    In the context of this discussion, the construction of the covariance matrix, denoted as \(\Sigma_n\), relies on the concept of inverse exponentiated squared Euclidean distances. However, it’s important to note that a modification is introduced later in the process. Specifically, the diagonal of the covariance matrix is augmented with a small value, represented as “eps” or \(\epsilon\).

    The reason for this augmentation is that while inverse exponentiated distances theoretically ensure the covariance matrix’s positive definiteness, in practical applications, the matrix can sometimes become numerically ill-conditioned. By adding a small value to the diagonal, such as \(\epsilon\), this ill-conditioning issue is mitigated. In this context, \(\epsilon\) is often referred to as “jitter.”

    -
    +
    import numpy as np
     from numpy import array, zeros, power, ones, exp, multiply, eye, linspace, spacing, sqrt, arange, append, ravel
     from numpy.linalg import cholesky, solve
    @@ -824,7 +824,7 @@ 

    D = D + D.T return exp(-D)

    -
    +
    sigma2 = np.array([1.0])
     Sigma = build_Sigma(X, sigma2)
     np.round(Sigma[:3,:], 3)
    @@ -867,7 +867,7 @@

    +
    import matplotlib.pyplot as plt
     plt.imshow(Sigma, cmap='hot', interpolation='nearest')
     plt.colorbar()
    @@ -886,7 +886,7 @@ 

    <

    In the context of the multivariate normal distribution, the next step is to utilize the previously constructed covariance matrix denoted as Sigma. It is used as an essential component in generating random samples from the multivariate normal distribution.

    The function multivariate_normal is employed for this purpose. It serves as a random number generator specifically designed for the multivariate normal distribution. In this case, the mean of the distribution is set equal to mean, and the covariance matrix is provided as Psi. The argument size specifies the number of realizations, which, in this specific scenario, is set to one.

    By default, the mean vector is initialized to zero. To match the number of samples, which is equivalent to the number of rows in the X and Sigma matrices, the argument zeros(n) is used, where n represents the number of samples (here taken from the size of the matrix, e.g.,: Sigma.shape[0]).

    -
    +
    rng = np.random.default_rng(seed=12345)
     Y = rng.multivariate_normal(zeros(Sigma.shape[0]), Sigma, size = 1, check_valid="raise").reshape(-1,1)
     Y.shape
    @@ -951,7 +951,7 @@

    51.5.3 Scale of Two:

    Regarding the scale of the \(Y\) values, they have a range of approximately \([-2,2]\), with a 95% probability of falling within this range. In standard statistical terms, 95% of the data points typically fall within two standard deviations of the mean, which is a common measure of the spread or range of data.

    -
    +
    import numpy as np
     from numpy import array, zeros, power, ones, exp, multiply, eye, linspace, spacing, sqrt, arange, append, ravel
     from numpy.random import multivariate_normal
    diff --git a/docs/a_04_gp_background_files/figure-pdf/cell-7-output-1.pdf b/docs/a_04_gp_background_files/figure-pdf/cell-7-output-1.pdf
    index ba9ef84f..1aacd7a5 100644
    Binary files a/docs/a_04_gp_background_files/figure-pdf/cell-7-output-1.pdf and b/docs/a_04_gp_background_files/figure-pdf/cell-7-output-1.pdf differ
    diff --git a/docs/a_04_gp_background_files/figure-pdf/fig-exp2euclid-output-1.pdf b/docs/a_04_gp_background_files/figure-pdf/fig-exp2euclid-output-1.pdf
    index 3537514c..b76a5044 100644
    Binary files a/docs/a_04_gp_background_files/figure-pdf/fig-exp2euclid-output-1.pdf and b/docs/a_04_gp_background_files/figure-pdf/fig-exp2euclid-output-1.pdf differ
    diff --git a/docs/a_04_gp_background_files/figure-pdf/fig-mvn1-1-output-1.pdf b/docs/a_04_gp_background_files/figure-pdf/fig-mvn1-1-output-1.pdf
    index c6721a65..2890e523 100644
    Binary files a/docs/a_04_gp_background_files/figure-pdf/fig-mvn1-1-output-1.pdf and b/docs/a_04_gp_background_files/figure-pdf/fig-mvn1-1-output-1.pdf differ
    diff --git a/docs/a_04_gp_background_files/figure-pdf/fig-mvn1-3-output-1.pdf b/docs/a_04_gp_background_files/figure-pdf/fig-mvn1-3-output-1.pdf
    index a755a782..8c1e550f 100644
    Binary files a/docs/a_04_gp_background_files/figure-pdf/fig-mvn1-3-output-1.pdf and b/docs/a_04_gp_background_files/figure-pdf/fig-mvn1-3-output-1.pdf differ
    diff --git a/docs/a_04_gp_background_files/figure-pdf/fig-mvn2-output-1.pdf b/docs/a_04_gp_background_files/figure-pdf/fig-mvn2-output-1.pdf
    index ed84a532..74730cd4 100644
    Binary files a/docs/a_04_gp_background_files/figure-pdf/fig-mvn2-output-1.pdf and b/docs/a_04_gp_background_files/figure-pdf/fig-mvn2-output-1.pdf differ
    diff --git a/docs/a_04_gp_background_files/figure-pdf/fig-mvn5-output-1.pdf b/docs/a_04_gp_background_files/figure-pdf/fig-mvn5-output-1.pdf
    index 44e92a0b..60674977 100644
    Binary files a/docs/a_04_gp_background_files/figure-pdf/fig-mvn5-output-1.pdf and b/docs/a_04_gp_background_files/figure-pdf/fig-mvn5-output-1.pdf differ
    diff --git a/docs/acquisition_failure.html b/docs/acquisition_failure.html
    index 4cf230dd..e8573a81 100644
    --- a/docs/acquisition_failure.html
    +++ b/docs/acquisition_failure.html
    @@ -724,7 +724,7 @@ 

    +
    from spotoptim import SpotOptim
     import numpy as np
     
    @@ -743,47 +743,47 @@ 

    result = optimizer.optimize()

    TensorBoard logging disabled
    -Initial best: f(x) = 1.420785
    -Iter 1 | Best: 0.023695 | Rate: 1.00 | Evals: 22.0%
    -Iter 2 | Best: 0.000476 | Rate: 1.00 | Evals: 24.0%
    -Iter 3 | Best: 0.000076 | Rate: 1.00 | Evals: 26.0%
    +Initial best: f(x) = 1.440556
    +Iter 1 | Best: 0.019454 | Rate: 1.00 | Evals: 22.0%
    +Iter 2 | Best: 0.000172 | Rate: 1.00 | Evals: 24.0%
    +Iter 3 | Best: 0.000044 | Rate: 1.00 | Evals: 26.0%
     Iter 4 | Best: 0.000000 | Rate: 1.00 | Evals: 28.0%
    -Iter 5 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.80 | Evals: 30.0%
    -Iter 6 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.67 | Evals: 32.0%
    -Iter 7 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.57 | Evals: 34.0%
    -Iter 8 | Best: 0.000000 | Rate: 0.62 | Evals: 36.0%
    -Iter 9 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.56 | Evals: 38.0%
    -Iter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.50 | Evals: 40.0%
    -Iter 11 | Best: 0.000000 | Rate: 0.55 | Evals: 42.0%
    -Iter 12 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.50 | Evals: 44.0%
    -Iter 13 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.46 | Evals: 46.0%
    -Iter 14 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.43 | Evals: 48.0%
    -Iter 15 | Best: 0.000000 | Rate: 0.47 | Evals: 50.0%
    +Iter 5 | Best: 0.000000 | Rate: 1.00 | Evals: 30.0%
    +Iter 6 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.83 | Evals: 32.0%
    +Iter 7 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.71 | Evals: 34.0%
    +Iter 8 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.62 | Evals: 36.0%
    +Iter 9 | Best: 0.000000 | Rate: 0.67 | Evals: 38.0%
    +Iter 10 | Best: 0.000000 | Rate: 0.70 | Evals: 40.0%
    +Iter 11 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.64 | Evals: 42.0%
    +Iter 12 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.58 | Evals: 44.0%
    +Iter 13 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.54 | Evals: 46.0%
    +Iter 14 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.50 | Evals: 48.0%
    +Iter 15 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.47 | Evals: 50.0%
     Iter 16 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.44 | Evals: 52.0%
     Iter 17 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.41 | Evals: 54.0%
     Iter 18 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.39 | Evals: 56.0%
     Iter 19 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.37 | Evals: 58.0%
     Iter 20 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.35 | Evals: 60.0%
     Iter 21 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.33 | Evals: 62.0%
    -Iter 22 | Best: 0.000000 | Rate: 0.36 | Evals: 64.0%
    -Iter 23 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.35 | Evals: 66.0%
    -Iter 24 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.33 | Evals: 68.0%
    -Iter 25 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.32 | Evals: 70.0%
    -Iter 26 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.31 | Evals: 72.0%
    -Iter 27 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.30 | Evals: 74.0%
    -Iter 28 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.29 | Evals: 76.0%
    -Iter 29 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.28 | Evals: 78.0%
    -Iter 30 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.27 | Evals: 80.0%
    -Iter 31 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.26 | Evals: 82.0%
    -Iter 32 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.25 | Evals: 84.0%
    +Iter 22 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.32 | Evals: 64.0%
    +Iter 23 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.30 | Evals: 66.0%
    +Iter 24 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.29 | Evals: 68.0%
    +Iter 25 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.28 | Evals: 70.0%
    +Iter 26 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.27 | Evals: 72.0%
    +Iter 27 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.26 | Evals: 74.0%
    +Iter 28 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.25 | Evals: 76.0%
    +Iter 29 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.24 | Evals: 78.0%
    +Iter 30 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.23 | Evals: 80.0%
    +Iter 31 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.23 | Evals: 82.0%
    +Iter 32 | Best: 0.000000 | Rate: 0.25 | Evals: 84.0%
     Iter 33 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.24 | Evals: 86.0%
    -Iter 34 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.24 | Evals: 88.0%
    -Iter 35 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.23 | Evals: 90.0%
    -Iter 36 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.22 | Evals: 92.0%
    -Iter 37 | Best: 0.000000 | Rate: 0.24 | Evals: 94.0%
    +Iter 34 | Best: 0.000000 | Rate: 0.26 | Evals: 88.0%
    +Iter 35 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.26 | Evals: 90.0%
    +Iter 36 | Best: 0.000000 | Curr: 0.000002 | Rate: 0.25 | Evals: 92.0%
    +Iter 37 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.24 | Evals: 94.0%
     Iter 38 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.24 | Evals: 96.0%
     Iter 39 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.23 | Evals: 98.0%
    -Iter 40 | Best: 0.000000 | Curr: 0.000005 | Rate: 0.23 | Evals: 100.0%
    +Iter 40 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.23 | Evals: 100.0%

    @@ -802,7 +802,7 @@

    17.4 Advanced Usage: Setting Tolerance

    The tolerance_x parameter controls when the fallback strategy is triggered. A larger tolerance means points need to be farther apart, triggering the fallback more often:

    -
    +
    def simple_objective(X):
         """Simple quadratic function for demonstration"""
         return np.sum(X**2, axis=1)
    @@ -840,7 +840,7 @@ 

    17.5.1 1. Monitor Fallback Activations

    Enable verbose mode to see when fallbacks are triggered:

    -
    +
    def test_objective(X):
         return np.sum(X**2, axis=1)
     
    @@ -861,7 +861,7 @@ 

    17.5.2 2. Adjust Tolerance Based on Problem Scale

    For problems with small search spaces, use smaller tolerance:

    -
    +
    def scale_objective(X):
         return np.sum(X**2, axis=1)
     
    diff --git a/docs/acquisition_optimization.html b/docs/acquisition_optimization.html
    index 23aa730c..99f1df81 100644
    --- a/docs/acquisition_optimization.html
    +++ b/docs/acquisition_optimization.html
    @@ -718,7 +718,7 @@ 

    19.2.1 1. Default Configuration (Differential Evolution)

    By default, SpotOptim uses Differential Evolution (scipy.optimize.differential_evolution).

    -
    +
    import numpy as np
     from spotoptim import SpotOptim
     
    @@ -737,14 +737,14 @@ 

    spot.optimize() print("Best y:", spot.best_y_)

    -
    Best y: 0.011893909296361216
    +
    Best y: 0.020703279400656156

    19.2.2 2. Customizing Differential Evolution

    You can use acquisition_optimizer_kwargs to adjust Differential Evolution parameters, such as increasing maxiter or changing the popsize.

    -
    +
    import numpy as np
     from spotoptim import SpotOptim
     
    @@ -770,14 +770,14 @@ 

    spot.optimize() print("Best y with Custom DE:", spot.best_y_)

    -
    Best y with Custom DE: 0.05032941308851764
    +
    Best y with Custom DE: 0.0014445206408396619

    19.2.3 3. Using Gradient-Based Optimization (L-BFGS-B)

    You can switch to a gradient-based optimizer like L-BFGS-B by specifying it in acquisition_optimizer. Note that for minimize-based methods, parameters are usually passed via an options dictionary within acquisition_optimizer_kwargs.

    -
    +
    import numpy as np
     from spotoptim import SpotOptim
     
    @@ -805,14 +805,14 @@ 

    spot.optimize() print("Best y with L-BFGS-B:", spot.best_y_)

    -
    Best y with L-BFGS-B: 0.2641559097007588
    +
    Best y with L-BFGS-B: 0.013698181964238821

    19.2.4 4. Using Gradient-Free Optimization (Nelder-Mead)

    For non-smooth acquisition landscapes or when robustness is needed without gradients, Nelder-Mead is a good choice. SpotOptim automatically handles the interface to ensure compatibility.

    -
    +
    import numpy as np
     from spotoptim import SpotOptim
     
    @@ -840,14 +840,14 @@ 

    spot.optimize() print("Best y with Nelder-Mead:", spot.best_y_)

    -
    Best y with Nelder-Mead: 0.5883781258898368
    +
    Best y with Nelder-Mead: 0.03305715511489834

    19.2.5 5. Returning Multiple Candidates

    Setting acquisition_fun_return_size > 1 forces the optimizer to return multiple candidate points (e.g., the top N from the final population).

    -
    +
    import numpy as np
     from spotoptim import SpotOptim
     
    @@ -866,20 +866,20 @@ 

    spot.optimize()

     message: Optimization terminated: maximum evaluations (5) reached
    -                   Current function value: 7.568267
    +                   Current function value: 5.221754
                        Iterations: 3
                        Function evaluations: 5
      success: True
    -     fun: 7.568266507987052
    -       x: [ 1.437e+00  2.346e+00]
    -       X: [[ 1.437e+00  2.346e+00]
    -           [-1.993e+00 -2.151e+00]
    -           [ 1.387e+00  2.696e+00]
    -           [-2.391e+00 -1.403e+00]
    -           [-2.399e+00 -1.405e+00]]
    +     fun: 5.221754123644815
    +       x: [ 1.770e+00  1.446e+00]
    +       X: [[-4.921e+00 -4.794e+00]
    +           [ 2.306e+00  4.720e+00]
    +           [ 2.209e+00  4.434e+00]
    +           [ 1.893e+00  3.502e+00]
    +           [ 1.770e+00  1.446e+00]]
          nit: 3
         nfev: 5
    -       y: [ 7.568e+00  8.600e+00  9.193e+00  7.687e+00  7.728e+00]
    + y: [ 4.720e+01 2.760e+01 2.454e+01 1.585e+01 5.222e+00]

    diff --git a/docs/awwe_optimization.html b/docs/awwe_optimization.html index 6df454cc..c4a1a49e 100644 --- a/docs/awwe_optimization.html +++ b/docs/awwe_optimization.html @@ -760,7 +760,7 @@

  • The following Python packages are imported:
  • -
    +
    import numpy as np
     import matplotlib.pyplot as plt
     import pandas as pd
    @@ -774,7 +774,7 @@ 

    4.1 The AWWE Objective Function

    We use the same AWWE function from Chapter 34, which models the weight of an unpainted light aircraft wing. The function accepts inputs in the unit cube \([0,1]^9\) and returns the wing weight.

    -
    +
    def wingwt(x):
         """
         Aircraft Wing Weight function.
    @@ -815,7 +815,7 @@ 

    4.2 Baseline Configuration

    The baseline Cessna C172 Skyhawk configuration (coded in unit cube):

    -
    +
    baseline_coded = np.array([0.48, 0.4, 0.38, 0.5, 0.62, 0.344, 0.4, 0.37, 0.38])
     baseline_weight = wingwt(baseline_coded)[0]
     print(f"Baseline wing weight: {baseline_weight:.2f} lb")
    @@ -832,7 +832,7 @@

    Nelder-Mead: Derivative-free simplex method (robust but can be slow)
  • BFGS: Quasi-Newton method (fast but requires smooth functions)
  • -
    +
    # Starting point (baseline configuration)
     x0 = baseline_coded.copy()
     
    @@ -852,7 +852,7 @@ 

    4.4 Method 1: SpotOptim (Surrogate Model Based Optimization)

    -
    +
    # Start timing
     start_time = time.time()
     
    @@ -872,15 +872,15 @@ 

    tensorboard_clean=True )

    -
    Removed old TensorBoard logs: runs/spotoptim_20260404_084500
    +
    Removed old TensorBoard logs: runs/spotoptim_20260405_232436
     Cleaned 1 old TensorBoard log directory
    -TensorBoard logging enabled: runs/spotoptim_20260404_090741
    +TensorBoard logging enabled: runs/spotoptim_20260411_210457

    4.5 Design Table

    -
    +
    pprint.pprint(optimizer_spot.get_design_table())
    ('|   name |   type |   lower |   upper |   default |   transform |\n'
    @@ -899,7 +899,7 @@ 

    4.6 Run optimization

    -
    +
    result_spot = optimizer_spot.optimize()
    Initial best: f(x) = 205.911302
    @@ -924,10 +924,10 @@ 

    +TensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260411_210457

    -
    +
    # End timing
     spot_time = time.time() - start_time
     
    @@ -941,11 +941,11 @@ 

    -
    +
    optimizer_spot.print_best()
    
    @@ -967,7 +967,7 @@ 

    4.7 Result Table

    -
    +
    pprint.pprint(optimizer_spot.get_results_table(show_importance=True))
    ('|   name |   type |   default |   lower |   upper |   tuned |   transform '
    @@ -998,7 +998,7 @@ 

    4.8 Progress of the Optimization

    -
    +
    optimizer_spot.plot_progress(log_y=False)
    @@ -1011,7 +1011,7 @@

    4.9 Contour Plots of Most Important Hyperparameters

    -
    +
    optimizer_spot.plot_important_hyperparameter_contour(max_imp=3)
    Plotting surrogate contours for top 3 most important parameters:
    @@ -1053,7 +1053,7 @@ 

    4.10 Method 2: Nelder-Mead Simplex

    -
    +
    print("\n" + "=" * 60)
     print("Running Nelder-Mead Simplex...")
     print("=" * 60)
    @@ -1094,7 +1094,7 @@ 

    4.11 Method 3: BFGS (Quasi-Newton)

    -
    +
    print("\n" + "=" * 60)
     print("Running BFGS (Quasi-Newton)...")
     print("=" * 60)
    @@ -1136,7 +1136,7 @@ 

    4.12 Comparison of Results

    -
    +
    # Create comparison DataFrame
     comparison = pd.DataFrame({
         'Method': ['Baseline', 'SpotOptim', 'Nelder-Mead', 'BFGS'],
    @@ -1184,9 +1184,9 @@ 

    < ================================================================================ Method Best Weight (lb) Improvement (%) Function Evals Time (s) Success Baseline 233.908405 0.000000 1 0.000000 True - SpotOptim 119.503672 48.910057 30 10.980436 True -Nelder-Mead 220.544928 5.713124 30 0.000857 False - BFGS 119.503672 48.910057 60 0.001501 False + SpotOptim 119.503672 48.910057 30 10.674473 True +Nelder-Mead 220.544928 5.713124 30 0.000850 False + BFGS 119.503672 48.910057 60 0.001405 False ================================================================================

    @@ -1195,7 +1195,7 @@

    <

    4.13 Visualization: Convergence Plots

    4.13.1 SpotOptim Convergence

    -
    +
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
     
     # Plot 1: Best value over iterations
    @@ -1235,7 +1235,7 @@ 

    4.14 Optimal Parameter Values

    Let’s examine the optimal parameter values found by each method:

    -
    +
    # Parameter names
     param_names = ['Sw', 'Wfw', 'A', 'L', 'q', 'l', 'Rtc', 'Nz', 'Wdg']
     
    @@ -1297,7 +1297,7 @@ 

    4.15 Analysis of Optimal Solutions

    -
    +
    # Calculate percentage changes from baseline
     changes_spot = [(spot_decoded[i] - baseline_decoded[i]) / baseline_decoded[i] * 100 
                     for i in range(len(param_names))]
    @@ -1332,7 +1332,7 @@ 

    4.16 Key Insights from Optimal Solutions

    -
    +
    # Find parameters with largest changes for each method
     def analyze_changes(decoded, baseline_decoded, method_name):
         changes = {param_names[i]: decoded[i] - baseline_decoded[i] 
    @@ -1379,7 +1379,7 @@ 

    4.17 Method Efficiency Comparison

    -
    +
    # Calculate efficiency metrics
     efficiency = pd.DataFrame({
         'Method': ['SpotOptim', 'Nelder-Mead', 'BFGS'],
    @@ -1411,9 +1411,9 @@ 

    4.18 Visualization: 2D Slices of Optimal Solutions

    Let’s visualize how the optimal solutions compare in the most important 2D subspaces:

    -
    +
    # Create 2D slices showing optimal points
     fig, axes = plt.subplots(2, 2, figsize=(14, 12))
     
    diff --git a/docs/awwe_optimization_files/figure-pdf/cell-12-output-1.pdf b/docs/awwe_optimization_files/figure-pdf/cell-12-output-1.pdf
    index e1323d81..d9b4cdc7 100644
    Binary files a/docs/awwe_optimization_files/figure-pdf/cell-12-output-1.pdf and b/docs/awwe_optimization_files/figure-pdf/cell-12-output-1.pdf differ
    diff --git a/docs/awwe_optimization_files/figure-pdf/cell-13-output-2.pdf b/docs/awwe_optimization_files/figure-pdf/cell-13-output-2.pdf
    index 489d808c..afd91f3e 100644
    Binary files a/docs/awwe_optimization_files/figure-pdf/cell-13-output-2.pdf and b/docs/awwe_optimization_files/figure-pdf/cell-13-output-2.pdf differ
    diff --git a/docs/awwe_optimization_files/figure-pdf/cell-13-output-4.pdf b/docs/awwe_optimization_files/figure-pdf/cell-13-output-4.pdf
    index f8460c7d..5586db55 100644
    Binary files a/docs/awwe_optimization_files/figure-pdf/cell-13-output-4.pdf and b/docs/awwe_optimization_files/figure-pdf/cell-13-output-4.pdf differ
    diff --git a/docs/awwe_optimization_files/figure-pdf/cell-13-output-6.pdf b/docs/awwe_optimization_files/figure-pdf/cell-13-output-6.pdf
    index 96179191..2abbf64a 100644
    Binary files a/docs/awwe_optimization_files/figure-pdf/cell-13-output-6.pdf and b/docs/awwe_optimization_files/figure-pdf/cell-13-output-6.pdf differ
    diff --git a/docs/awwe_optimization_files/figure-pdf/cell-17-output-1.pdf b/docs/awwe_optimization_files/figure-pdf/cell-17-output-1.pdf
    index b6c34097..1715a551 100644
    Binary files a/docs/awwe_optimization_files/figure-pdf/cell-17-output-1.pdf and b/docs/awwe_optimization_files/figure-pdf/cell-17-output-1.pdf differ
    diff --git a/docs/awwe_optimization_files/figure-pdf/cell-19-output-1.pdf b/docs/awwe_optimization_files/figure-pdf/cell-19-output-1.pdf
    index 99fa3088..5d76b0ae 100644
    Binary files a/docs/awwe_optimization_files/figure-pdf/cell-19-output-1.pdf and b/docs/awwe_optimization_files/figure-pdf/cell-19-output-1.pdf differ
    diff --git a/docs/awwe_optimization_files/figure-pdf/cell-22-output-1.pdf b/docs/awwe_optimization_files/figure-pdf/cell-22-output-1.pdf
    index 5a75adb9..88df5a21 100644
    Binary files a/docs/awwe_optimization_files/figure-pdf/cell-22-output-1.pdf and b/docs/awwe_optimization_files/figure-pdf/cell-22-output-1.pdf differ
    diff --git a/docs/diabetes_dataset.html b/docs/diabetes_dataset.html
    index 8baa88ce..f84cb5b1 100644
    --- a/docs/diabetes_dataset.html
    +++ b/docs/diabetes_dataset.html
    @@ -743,7 +743,7 @@ 

    31.2 Quick Start

    31.2.1 Basic Usage

    -
    +
    from spotoptim.data import get_diabetes_dataloaders
     from sklearn.datasets import load_diabetes
     from spotoptim.data.diabetes import DiabetesDataset
    @@ -772,7 +772,7 @@ 

    31.2.2 Training a Model

    -
    +
    import torch
     import torch.nn as nn
     from spotoptim.data import get_diabetes_dataloaders
    @@ -835,12 +835,12 @@ 

    avg_test_loss = test_loss / len(test_loader) print(f"Test MSE: {avg_test_loss:.4f}")

    -
    Epoch 20/100: Loss = 28259.4506
    -Epoch 40/100: Loss = 34347.1385
    -Epoch 60/100: Loss = 32919.4453
    -Epoch 80/100: Loss = 29039.2554
    -Epoch 100/100: Loss = 30956.3480
    -Test MSE: 26513.4225
    +
    Epoch 20/100: Loss = 30604.4867
    +Epoch 40/100: Loss = 27473.2434
    +Epoch 60/100: Loss = 27584.4352
    +Epoch 80/100: Loss = 30723.6564
    +Epoch 100/100: Loss = 27506.1375
    +Test MSE: 26480.3236
    @@ -851,7 +851,7 @@

    31.3.1 get_diabetes_dataloaders()

    Loads the sklearn diabetes dataset and returns configured PyTorch DataLoaders.

    Signature:

    -
    +
    get_diabetes_dataloaders(
         test_size=0.2,
         batch_size=32,
    @@ -863,8 +863,8 @@ 

    pin_memory=False )

    -
    (<torch.utils.data.dataloader.DataLoader at 0x124e86d70>,
    - <torch.utils.data.dataloader.DataLoader at 0x124ec7e30>,
    +
    (<torch.utils.data.dataloader.DataLoader at 0x12434fce0>,
    + <torch.utils.data.dataloader.DataLoader at 0x1243d3ad0>,
      StandardScaler())
    @@ -942,7 +942,7 @@

    +
    from spotoptim.data import get_diabetes_dataloaders
     
     # Custom configuration
    @@ -969,10 +969,10 @@ 

    31.4 DiabetesDataset Class

    PyTorch Dataset implementation for the diabetes dataset.

    Signature:

    -
    +
    DiabetesDataset(X, y, transform=None, target_transform=None)
    -
    <spotoptim.data.diabetes.DiabetesDataset at 0x125028350>
    +
    <spotoptim.data.diabetes.DiabetesDataset at 0x124508950>

    Parameters:

    @@ -996,7 +996,7 @@

    <

    31.4.1 Manual Dataset Creation

    -
    +
    from spotoptim.data import DiabetesDataset
     from sklearn.datasets import load_diabetes
     from sklearn.model_selection import train_test_split
    @@ -1048,7 +1048,7 @@ 

    31.5 Advanced Usage

    31.5.1 Custom Transforms

    -
    +
    from spotoptim.data import DiabetesDataset
     from sklearn.datasets import load_diabetes
     import torch
    @@ -1079,7 +1079,7 @@ 

    31.5.2 Different Train/Test Splits

    -
    +
    from spotoptim.data import get_diabetes_dataloaders
     
     # 70/30 split
    @@ -1107,7 +1107,7 @@ 

    31.5.3 Without Feature Scaling

    -
    +
    from spotoptim.data import get_diabetes_dataloaders
     
     # Load without scaling (useful for tree-based models)
    @@ -1123,13 +1123,13 @@ 

    break

    Scaler: None
    -Mean: tensor([ 8.3345e-03,  4.0437e-05, -9.1794e-04])
    +Mean: tensor([ 0.0032, 0.0060, -0.0015])

    31.5.4 Larger Batch Sizes

    -
    +
    from spotoptim.data import get_diabetes_dataloaders
     
     # Larger batches for faster training (if memory allows)
    @@ -1151,7 +1151,7 @@ 

    31.5.5 GPU Training with Pin Memory

    -
    +
    import torch
     from spotoptim.data import get_diabetes_dataloaders
     
    @@ -1182,7 +1182,7 @@ 

    31.6 Complete Training Example

    Here’s a complete example showing data loading, model training, and evaluation:

    -
    +
    import torch
     import torch.nn as nn
     from spotoptim.data import get_diabetes_dataloaders
    @@ -1279,26 +1279,26 @@ 

    31.7 Integration with SpotOptim

    Use the diabetes dataset for hyperparameter optimization with SpotOptim:

    -
    +
    import numpy as np
     import torch
     import torch.nn as nn
    @@ -1424,7 +1424,7 @@ 

    31.8 Best Practices

    31.8.1 1. Always Use Feature Scaling

    -
    +
    # Good: Features are standardized
     train_loader, test_loader, scaler = get_diabetes_dataloaders(
         scale_features=True
    @@ -1434,7 +1434,7 @@ 

    31.8.2 2. Set Random Seeds for Reproducibility

    -
    +
    # Reproducible train/test splits
     train_loader, test_loader, scaler = get_diabetes_dataloaders(
         random_state=42
    @@ -1444,13 +1444,13 @@ 

    import torch torch.manual_seed(42)

    -
    <torch._C.Generator at 0x115400830>
    +
    <torch._C.Generator at 0x114eb3970>

    31.8.3 3. Don’t Shuffle Test Data

    -
    +
    # Good: Test data in consistent order
     train_loader, test_loader, scaler = get_diabetes_dataloaders(
         shuffle_train=True,   # Shuffle training data
    @@ -1461,7 +1461,7 @@ 

    31.8.4 4. Choose Appropriate Batch Size

    -
    +
    # Small dataset (442 samples) - moderate batch size works well
     train_loader, test_loader, scaler = get_diabetes_dataloaders(
         batch_size=32  # Good balance for this dataset
    @@ -1472,7 +1472,7 @@ 

    31.8.5 5. Save the Scaler for Production

    -
    +
    import pickle
     import numpy as np
     from spotoptim.data import get_diabetes_dataloaders
    @@ -1510,7 +1510,7 @@ 

    31.9.1 Issue: Out of Memory

    Solution: Reduce batch size or disable pin_memory

    -
    +
    train_loader, test_loader, scaler = get_diabetes_dataloaders(
         batch_size=16,      # Smaller batches
         pin_memory=False    # Disable if not using GPU
    @@ -1521,7 +1521,7 @@ 

    <

    31.9.2 Issue: Different Data Ranges

    Symptom: Model not converging, loss is NaN

    Solution: Ensure feature scaling is enabled

    -
    +
    train_loader, test_loader, scaler = get_diabetes_dataloaders(
         scale_features=True  # Must be True for neural networks
     )
    @@ -1530,7 +1530,7 @@

    31.9.3 Issue: Non-Reproducible Results

    Solution: Set all random seeds

    -
    +
    import torch
     import numpy as np
     
    @@ -1547,7 +1547,7 @@ 

    31.9.4 Issue: Slow Data Loading

    Solution: Use multiple workers (if not on Windows)

    -
    +
    train_loader, test_loader, scaler = get_diabetes_dataloaders(
         num_workers=4,      # Use 4 subprocesses
         pin_memory=True     # Enable for GPU
    diff --git a/docs/factor_variables.html b/docs/factor_variables.html
    index 93b94f35..96c750e2 100644
    --- a/docs/factor_variables.html
    +++ b/docs/factor_variables.html
    @@ -747,7 +747,7 @@ 

    10.2 Quick Start

    10.2.1 Basic Factor Variable Usage

    -
    +
    from spotoptim import SpotOptim
     import numpy as np
     
    @@ -811,7 +811,7 @@ 

    10.2.2 Neural Network Activation Function Optimization

    -
    +
    import torch
     import torch.nn as nn
     from spotoptim import SpotOptim
    @@ -877,7 +877,7 @@ 

    print(f"Best test MSE: {result.fun:.4f}")

    Best activation function: Sigmoid
    -Best test MSE: 26493.4544
    +Best test MSE: 26315.9648
    @@ -886,7 +886,7 @@

    10.3 Mixed Variable Types

    10.3.1 Combining Factor, Integer, and Continuous Variables

    -
    +
    import numpy as np
     import torch
     import torch.nn as nn
    @@ -971,63 +971,63 @@ 

    print(f"Best activation: {result.x[3]}") # String value! print(f"Best test MSE: {result.fun:.4f}")

    -
    lr=0.000112, l1=24, layers=2, activation=LeakyReLU
    -lr=0.000274, l1=60, layers=4, activation=Sigmoid
    -lr=0.003498, l1=84, layers=2, activation=Tanh
    -lr=0.001025, l1=126, layers=3, activation=Tanh
    -lr=0.004205, l1=40, layers=4, activation=Sigmoid
    -lr=0.000226, l1=75, layers=0, activation=Tanh
    -lr=0.000737, l1=104, layers=1, activation=Tanh
    -lr=0.009249, l1=31, layers=1, activation=ReLU
    -lr=0.002215, l1=114, layers=2, activation=Sigmoid
    -lr=0.000611, l1=61, layers=1, activation=ReLU
    -lr=0.002219, l1=114, layers=2, activation=Sigmoid
    -lr=0.002576, l1=67, layers=1, activation=Sigmoid
    -lr=0.000629, l1=58, layers=3, activation=Tanh
    -lr=0.000411, l1=98, layers=4, activation=Sigmoid
    -lr=0.000583, l1=59, layers=0, activation=LeakyReLU
    -lr=0.000192, l1=67, layers=1, activation=ReLU
    -lr=0.000932, l1=92, layers=2, activation=LeakyReLU
    -lr=0.003979, l1=104, layers=1, activation=Sigmoid
    -lr=0.002088, l1=60, layers=2, activation=ReLU
    -lr=0.000104, l1=127, layers=3, activation=Tanh
    -lr=0.000137, l1=61, layers=4, activation=ReLU
    -lr=0.001209, l1=38, layers=4, activation=Sigmoid
    -lr=0.000309, l1=77, layers=3, activation=ReLU
    -lr=0.000203, l1=67, layers=3, activation=LeakyReLU
    -lr=0.002571, l1=37, layers=4, activation=LeakyReLU
    -lr=0.001055, l1=93, layers=1, activation=Tanh
    -lr=0.000122, l1=34, layers=4, activation=ReLU
    -lr=0.000116, l1=124, layers=3, activation=Sigmoid
    -lr=0.000184, l1=35, layers=0, activation=Sigmoid
    -lr=0.000101, l1=25, layers=2, activation=Tanh
    -lr=0.009519, l1=95, layers=3, activation=Sigmoid
    -lr=0.006033, l1=120, layers=3, activation=Tanh
    -lr=0.003073, l1=72, layers=3, activation=Tanh
    -lr=0.000986, l1=38, layers=1, activation=ReLU
    -lr=0.004182, l1=97, layers=1, activation=Sigmoid
    -lr=0.001686, l1=85, layers=4, activation=Sigmoid
    -lr=0.000631, l1=77, layers=2, activation=LeakyReLU
    -lr=0.003100, l1=71, layers=4, activation=ReLU
    -lr=0.000980, l1=33, layers=2, activation=ReLU
    -lr=0.000115, l1=76, layers=3, activation=ReLU
    -lr=0.001067, l1=126, layers=2, activation=Tanh
    -lr=0.001209, l1=70, layers=1, activation=ReLU
    -lr=0.000643, l1=59, layers=0, activation=Tanh
    -lr=0.000107, l1=82, layers=3, activation=Sigmoid
    -lr=0.006051, l1=73, layers=2, activation=Sigmoid
    -lr=0.004060, l1=123, layers=1, activation=Sigmoid
    -lr=0.005555, l1=76, layers=2, activation=Sigmoid
    -lr=0.002506, l1=74, layers=1, activation=ReLU
    -lr=0.000172, l1=128, layers=2, activation=Sigmoid
    -lr=0.001106, l1=72, layers=3, activation=Tanh
    +
    lr=0.000957, l1=114, layers=3, activation=Sigmoid
    +lr=0.003554, l1=43, layers=1, activation=ReLU
    +lr=0.000222, l1=78, layers=2, activation=LeakyReLU
    +lr=0.005857, l1=30, layers=3, activation=Tanh
    +lr=0.000476, l1=64, layers=0, activation=Tanh
    +lr=0.001121, l1=119, layers=1, activation=Sigmoid
    +lr=0.009714, l1=17, layers=4, activation=ReLU
    +lr=0.000317, l1=51, layers=2, activation=Sigmoid
    +lr=0.001837, l1=93, layers=2, activation=LeakyReLU
    +lr=0.000137, l1=97, layers=4, activation=Tanh
    +lr=0.000263, l1=52, layers=2, activation=Sigmoid
    +lr=0.000350, l1=50, layers=2, activation=Sigmoid
    +lr=0.000316, l1=51, layers=2, activation=Sigmoid
    +lr=0.004128, l1=120, layers=2, activation=Tanh
    +lr=0.007577, l1=46, layers=2, activation=LeakyReLU
    +lr=0.007774, l1=83, layers=3, activation=Tanh
    +lr=0.000527, l1=62, layers=3, activation=Sigmoid
    +lr=0.000321, l1=115, layers=1, activation=ReLU
    +lr=0.006431, l1=51, layers=3, activation=Tanh
    +lr=0.000318, l1=51, layers=2, activation=Sigmoid
    +lr=0.005516, l1=67, layers=0, activation=Sigmoid
    +lr=0.001699, l1=86, layers=2, activation=Sigmoid
    +lr=0.003052, l1=26, layers=2, activation=LeakyReLU
    +lr=0.000322, l1=84, layers=2, activation=Tanh
    +lr=0.000107, l1=56, layers=4, activation=Sigmoid
    +lr=0.000408, l1=26, layers=3, activation=Sigmoid
    +lr=0.000114, l1=116, layers=2, activation=Sigmoid
    +lr=0.004964, l1=55, layers=2, activation=LeakyReLU
    +lr=0.000358, l1=65, layers=1, activation=LeakyReLU
    +lr=0.000316, l1=51, layers=2, activation=Sigmoid
    +lr=0.003022, l1=62, layers=2, activation=Tanh
    +lr=0.000317, l1=51, layers=2, activation=Sigmoid
    +lr=0.000120, l1=104, layers=4, activation=LeakyReLU
    +lr=0.002331, l1=109, layers=3, activation=Tanh
    +lr=0.000106, l1=48, layers=2, activation=Tanh
    +lr=0.000198, l1=57, layers=2, activation=LeakyReLU
    +lr=0.000317, l1=51, layers=2, activation=Sigmoid
    +lr=0.000437, l1=38, layers=1, activation=Tanh
    +lr=0.001433, l1=86, layers=1, activation=LeakyReLU
    +lr=0.000211, l1=127, layers=2, activation=Sigmoid
    +lr=0.001690, l1=53, layers=2, activation=Tanh
    +lr=0.000586, l1=86, layers=4, activation=ReLU
    +lr=0.005586, l1=125, layers=3, activation=LeakyReLU
    +lr=0.000144, l1=106, layers=3, activation=ReLU
    +lr=0.006845, l1=117, layers=2, activation=LeakyReLU
    +lr=0.005839, l1=63, layers=1, activation=LeakyReLU
    +lr=0.003284, l1=124, layers=2, activation=ReLU
    +lr=0.000968, l1=41, layers=3, activation=LeakyReLU
    +lr=0.001697, l1=39, layers=2, activation=Tanh
    +lr=0.002121, l1=91, layers=3, activation=Sigmoid
     
     Optimization Results:
    -Best learning rate: 0.004060
    -Best layer size: 123
    -Best num layers: 1
    +Best learning rate: 0.000317
    +Best layer size: 51
    +Best num layers: 2
     Best activation: Sigmoid
    -Best test MSE: 26416.2917
    +Best test MSE: 26490.0690
    @@ -1036,7 +1036,7 @@

    10.4 Multiple Factor Variables

    10.4.1 Optimizing Both Activation and Optimizer

    -
    +
    from spotoptim import SpotOptim
     from spotoptim.data import get_diabetes_dataloaders
     from spotoptim.nn.linear_regressor import LinearRegressor
    @@ -1105,9 +1105,9 @@ 

    print(f"Best optimizer: {result.x[1]}") print(f"Best learning rate: {10**result.x[2]:.6f}")

    -
    Best activation: ReLU
    +
    Best activation: LeakyReLU
     Best optimizer: SGD
    -Best learning rate: 0.006053
    +Best learning rate: 0.007715
    @@ -1117,7 +1117,7 @@

    10.5.1 Custom Categorical Choices

    Factor variables work with any string values, not just activation functions:

    -
    +
    from spotoptim import SpotOptim
     import numpy as np
     
    @@ -1190,7 +1190,7 @@ 

    10.5.2 Viewing All Evaluated Configurations

    -
    +
    import torch
     import torch.nn as nn
     from spotoptim import SpotOptim
    @@ -1307,24 +1307,24 @@ 

    SpotOptim handles factor variables through automatic conversion:

    1. Initialization: String tuples in bounds are detected

      -
      +
      bounds = [("ReLU", "Sigmoid", "Tanh")]
       # Internally mapped to: {0: "ReLU", 1: "Sigmoid", 2: "Tanh"}
       # Bounds become: [(0, 2)]
    2. Sampling: Initial design samples from [0, n_levels-1] and rounds to integers

      -
      +
      # Samples might be: [0.3, 1.8, 2.1]
       # After rounding: [0, 2, 2]
    3. Evaluation: Before calling objective function, integers → strings

      -
      +
      # [0, 2, 2] → ["ReLU", "Tanh", "Tanh"]
       # Objective function receives strings
    4. Optimization: Surrogate model works with integers [0, n_levels-1]

    5. Results: Final results mapped back to strings

      -
      +
      result.x[0]  # Returns "ReLU", not 0
       result.X     # All rows contain strings for factor variables
      @@ -1365,7 +1365,7 @@

      10.6.2 Variable Type Auto-Detection

      If you don’t specify var_type, SpotOptim automatically detects factor variables:

      -
      +
      # Example 1: Explicit var_type (recommended)
       # This shows the syntax - replace my_function with your actual function
       
      @@ -1414,7 +1414,7 @@ 

      10.7 Complete Example: Full Workflow

      -
      +
      """
       Complete example: Neural network hyperparameter optimization with factor variables.
       """
      @@ -1750,15 +1750,15 @@ 

      10.8.1 Do’s

      Use descriptive string values

      -
      +
      bounds=[("xavier_uniform", "kaiming_normal", "orthogonal")]

      Explicitly specify var_type for clarity

      -
      +
      var_type=["float", "int", "factor"]

      Access results as strings

      -
      +
      # Example: Accessing factor variable results as strings
       # (This assumes you've run an optimization with activation as a factor variable)
       
      @@ -1799,7 +1799,7 @@ 

      +
      bounds=[(-4, -2), (16, 128), ("ReLU", "Tanh")]
       var_type=["float", "int", "factor"]
      @@ -1807,13 +1807,13 @@

      10.8.2 Don’ts

      Don’t use integers in factor bounds

      -
      +
      # Wrong: Use strings, not integers
       bounds=[(0, 1, 2)]  # Wrong!
       bounds=[("ReLU", "Sigmoid", "Tanh")]  # Correct!

      Don’t expect integers in objective function

      -
      +
      def objective(X):
           activation = X[0][2]
           # activation is a string, not an integer!
      @@ -1821,12 +1821,12 @@ 

      # Do: if activation == "ReLU": # Correct!

      Don’t manually convert factor variables

      -
      +
      # SpotOptim handles conversion automatically
       # Don't do manual mapping in your objective function

      Don’t use empty tuples

      -
      +
      # Wrong: Empty tuple
       bounds=[()]
       
      diff --git a/docs/kriging.html b/docs/kriging.html
      index 9c711ba8..96ab8537 100644
      --- a/docs/kriging.html
      +++ b/docs/kriging.html
      @@ -749,7 +749,7 @@ 

      20.4 Integration with SpotOptim

      No Changes Required to SpotOptim Core!

      The existing surrogate parameter already supports any scikit-learn compatible model:

      -
      +
      from spotoptim import SpotOptim, Kriging
       import numpy as np
       
      @@ -788,7 +788,7 @@ 

      20.6 Usage Examples

      20.6.1 Basic Usage

      -
      +
      from spotoptim import SpotOptim, Kriging
       import numpy as np
       
      @@ -815,7 +815,7 @@ 

      20.6.2 Custom Parameters

      -
      +
      import numpy as np
       
       def ackley(X):
      @@ -855,7 +855,7 @@ 

      20.6.3 Prediction with Uncertainty

      -
      +
      from spotoptim import Kriging
       import numpy as np
       
      diff --git a/docs/kriging_forrester.html b/docs/kriging_forrester.html
      index fde6e830..b8955b6e 100644
      --- a/docs/kriging_forrester.html
      +++ b/docs/kriging_forrester.html
      @@ -825,7 +825,7 @@ 

      26 3. ExamplesWe will verify these methods with a simple 1D example.

      26.1 Setup

      -
      +
      import numpy as np
       import matplotlib.pyplot as plt
       from spotoptim.surrogate import Kriging
      @@ -846,7 +846,7 @@ 

      26.2 Comparisons

      We will fit three models and compare them.

      -
      +
      # 1. Interpolation (Forces fit through noisy points)
       model_interp = Kriging(method="interpolation", seed=42)
       model_interp.fit(X, y)
      @@ -865,7 +865,7 @@ 

      26.3 Visualization

      -
      +
      fig, axes = plt.subplots(1, 3, figsize=(18, 5))
       
       def plot_model(ax, title, y_pred, std):
      @@ -907,7 +907,7 @@ 

      27 4. Infill Criteria

      The book also describes “Infill Criteria” (Section 3) for deciding where to sample next. This is implemented in SpotOptim itself (not the surrogate class). When you use SpotOptim(..., acquisition="ei"), you are using the Expected Improvement criterion described in Section 3.2.1 of the book.

      -
      +
      from spotoptim import SpotOptim
       
       # Use Kriging with EI
      diff --git a/docs/kriging_forrester_files/figure-pdf/cell-4-output-1.pdf b/docs/kriging_forrester_files/figure-pdf/cell-4-output-1.pdf
      index 2f8e8b3b..91a12e75 100644
      Binary files a/docs/kriging_forrester_files/figure-pdf/cell-4-output-1.pdf and b/docs/kriging_forrester_files/figure-pdf/cell-4-output-1.pdf differ
      diff --git a/docs/kriging_nystroem.html b/docs/kriging_nystroem.html
      index 5238f933..13373c9a 100644
      --- a/docs/kriging_nystroem.html
      +++ b/docs/kriging_nystroem.html
      @@ -770,7 +770,7 @@ 

      23.3.2 Example Usage

      Here is how to set up a GP model with Nystroem approximation for a high-dimensional problem.

      -
      +
      import numpy as np
       from spotoptim.surrogate.kriging import Kriging
       from spotoptim.surrogate.nystroem import Nystroem
      diff --git a/docs/kriging_surrogate.html b/docs/kriging_surrogate.html
      index aed2e047..6d13ac2c 100644
      --- a/docs/kriging_surrogate.html
      +++ b/docs/kriging_surrogate.html
      @@ -813,7 +813,7 @@ 

      21.2.1 Creating a Simple Kriging Model

      Let’s start with the most basic usage - creating a Kriging model with default settings:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       from spotoptim.surrogate import Kriging
      @@ -862,7 +862,7 @@ 

      21.2.2 Default vs Custom Surrogate

      SpotOptim uses a Gaussian Process Regressor by default. Here’s how Kriging compares:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       from spotoptim.surrogate import Kriging
      @@ -919,7 +919,7 @@ 

      +
      import numpy as np
       from spotoptim.surrogate import Kriging
       import matplotlib.pyplot as plt
      @@ -982,7 +982,7 @@ 

      21.3.2 Noise Parameter and Regularization

      The noise parameter adds a small nugget effect for numerical stability:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       from spotoptim.surrogate import Kriging
      @@ -1051,7 +1051,7 @@ 

      +
      import numpy as np
       from spotoptim.surrogate import Kriging
       
      @@ -1087,7 +1087,7 @@ 

      +
      import numpy as np
       from spotoptim.surrogate import Kriging
       
      @@ -1140,7 +1140,7 @@ 

      21.4.1 Mixed Variable Types

      Kriging supports mixed variable types: continuous (float), integer (int), and categorical (factor):

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       from spotoptim.surrogate import Kriging
      @@ -1248,7 +1248,7 @@ 

      21.4.2 Customizing the Distance Metric for Factors

      For categorical variables, you can choose different distance metrics.

      -
      +
      import numpy as np
       from spotoptim.surrogate import Kriging
       
      @@ -1293,7 +1293,7 @@ 

      21.4.3 Handling High-Dimensional Problems

      For high-dimensional problems, Kriging can become computationally expensive. Here are strategies:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       from spotoptim.surrogate import Kriging
      @@ -1354,7 +1354,7 @@ 

      21.4.4 Uncertainty Quantification

      Kriging provides uncertainty estimates, useful for exploration vs exploitation:

      -
      +
      import numpy as np
       from spotoptim.surrogate import Kriging
       import matplotlib.pyplot as plt
      @@ -1431,7 +1431,7 @@ 

      21.5.1 Example 1: Optimizing the Rastrigin Function

      The Rastrigin function is highly multimodal - a challenging test case:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       from spotoptim.surrogate import Kriging
      @@ -1515,7 +1515,7 @@ 

      21.5.2 Example 2: Robust Optimization with Noise

      When optimizing noisy functions, Kriging’s regression mode helps:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       from spotoptim.surrogate import Kriging
      @@ -1583,7 +1583,7 @@ 

      21.5.3 Example 3: Real-World Machine Learning Hyperparameter Tuning

      Optimize hyperparameters for a neural network:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       from spotoptim.surrogate import Kriging
      @@ -1691,7 +1691,7 @@ 

      21.5.4 Example 4: Comparing Kriging Methods

      Let’s compare all three Kriging methods on the same problem:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       from spotoptim.surrogate import Kriging
      @@ -1754,7 +1754,7 @@ 

      21.5.5 Example 5: Sensitivity to Theta Bounds

      Theta bounds control the range of smoothness. Let’s see their impact:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       from spotoptim.surrogate import Kriging
      @@ -1810,7 +1810,7 @@ 

      21.6.1 Kriging vs Gaussian Process vs Random Forest

      Let’s compare different surrogate models:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       from spotoptim.surrogate import Kriging, SimpleKriging
      @@ -1881,7 +1881,7 @@ 

      21.7 Best Practices

      21.7.1 1. Choosing the Right Method

      -
      +
      # For smooth, deterministic functions
       kriging = Kriging(method='interpolation', noise=1e-10, seed=42)
       
      @@ -1895,7 +1895,7 @@ 

      21.7.2 2. Setting Model Complexity

      -
      +
      # For low-dimensional problems (<5D)
       kriging = Kriging(
           method='regression',
      @@ -1915,7 +1915,7 @@ 

      21.7.3 3. Handling Different Variable Types

      -
      +
      # Mixed types example
       bounds = [
           (0.0, 10.0),    # continuous
      @@ -1944,7 +1944,7 @@ 

      21.7.4 4. Reproducibility

      Always set the seed for reproducible results:

      -
      +
      # Both Kriging and SpotOptim should have seeds
       kriging = Kriging(method='regression', seed=42)
       
      @@ -1959,7 +1959,7 @@ 

      21.7.5 5. Monitoring Surrogate Quality

      Check the negative log-likelihood after fitting:

      -
      +
      import numpy as np
       from spotoptim.surrogate import Kriging
       
      @@ -1995,7 +1995,7 @@ 

      21.8.1 Issue 1: Slow Fitting for Large Datasets

      Problem: Kriging becomes slow with many training points.

      Solution: Limit surrogate training set size:

      -
      +
      # Use max_surrogate_points in SpotOptim
       optimizer = SpotOptim(
           fun=objective,
      @@ -2011,7 +2011,7 @@ 

      21.8.2 Issue 2: Poor Predictions for Categorical Variables

      Problem: Kriging doesn’t handle factors well.

      Solution: Try different distance metrics:

      -
      +
      # Try different metrics
       for metric in ['canberra', 'hamming', 'jaccard']:
           kriging = Kriging(
      @@ -2027,7 +2027,7 @@ 

      21.8.3 Issue 3: Numerical Instability

      Problem: Correlation matrix is nearly singular.

      Solution: Increase regularization:

      -
      +
      # For interpolation method
       kriging = Kriging(
           method='interpolation',
      @@ -2047,7 +2047,7 @@ 

      21.8.4 Issue 4: Overfitting to Noisy Data

      Problem: Kriging fits noise instead of underlying function.

      Solution: Use regression method with reasonable Lambda bounds:

      -
      +
      kriging = Kriging(
           method='regression',
           min_Lambda=-5.0,  # Not too small
      @@ -2071,7 +2071,7 @@ 

      21.9.2 Quick Reference

      -
      +
      from spotoptim import SpotOptim
       from spotoptim.surrogate import Kriging
       
      diff --git a/docs/kriging_surrogate_files/figure-pdf/cell-11-output-1.pdf b/docs/kriging_surrogate_files/figure-pdf/cell-11-output-1.pdf
      index 4cc7033c..1978fe4f 100644
      Binary files a/docs/kriging_surrogate_files/figure-pdf/cell-11-output-1.pdf and b/docs/kriging_surrogate_files/figure-pdf/cell-11-output-1.pdf differ
      diff --git a/docs/kriging_surrogate_files/figure-pdf/cell-4-output-1.pdf b/docs/kriging_surrogate_files/figure-pdf/cell-4-output-1.pdf
      index e759fae3..a73c5c70 100644
      Binary files a/docs/kriging_surrogate_files/figure-pdf/cell-4-output-1.pdf and b/docs/kriging_surrogate_files/figure-pdf/cell-4-output-1.pdf differ
      diff --git a/docs/learning_rate_mapping.html b/docs/learning_rate_mapping.html
      index ad86deec..6ab931b6 100644
      --- a/docs/learning_rate_mapping.html
      +++ b/docs/learning_rate_mapping.html
      @@ -1495,7 +1495,7 @@ 

      29.9 Examples

      29.9.1 Complete Example: Optimizer Comparison Study

      -
      +
      """
       Complete example: Compare optimizers with unified learning rate interface.
       """
      diff --git a/docs/mlp.html b/docs/mlp.html
      index 73f3496c..4220a36f 100644
      --- a/docs/mlp.html
      +++ b/docs/mlp.html
      @@ -715,7 +715,7 @@ 

      26.2.1 Initialization

      You can initialize an MLP by describing its architecture explicitly.

      -
      +
      import torch
       from spotoptim.nn.mlp import MLP
       
      @@ -747,7 +747,7 @@ 

      26.2.2 Implicit Architecture (Width & Depth)

      For hyperparameter tuning, it is often easier to control the network’s size with just two numbers: width and depth.

      -
      +
      # Create a network with 3 hidden layers, each having 64 neurons
       model_compact = MLP(
           in_channels=10,
      @@ -796,7 +796,7 @@ 

      <
    6. dropout: Dropout probability applied after each layer (default: 0.0).
    7. bias: Whether to use bias in linear layers (default: True).
    8. -
      +
      model_custom = MLP(
           in_channels=10,
           hidden_channels=[32, 1],
      @@ -818,7 +818,7 @@ 

      <

      26.4 Optimizer Integration

      The MLP class simplifies optimizer creation, specifically handling the “unified learning rate” concept used in SpotOptim (where different optimizers have their default learning rates mapped to a common scale).

      -
      +
      # Create model with a unified learning rate of 1.0 (default)
       model = MLP(in_channels=10, hidden_channels=[32, 1], lr=1.0)
       
      @@ -835,7 +835,7 @@ 

      <

      You can also pass extra arguments to the optimizer:

      -
      +
      # SGD with momentum
       opt_sgd_mom = model.get_optimizer("SGD", momentum=0.9)
       print(opt_sgd_mom)
      @@ -858,7 +858,7 @@

      <

      26.5 Hyperparameter Tuning

      One of the key features of the MLP class is its ability to suggest a default ParameterSet for tuning. This provides a great starting point for finding the best architecture.

      -
      +
      from spotoptim.hyperparameters import ParameterSet
       
       # Get default search space
      @@ -871,7 +871,7 @@ 

      <

      26.5.1 Example: Tuning with SpotOptim

      Here is how you can use the MLP class in a full SpotOptim tuning loop using TorchObjective.

      -
      +
      from spotoptim import SpotOptim
       from spotoptim.core.experiment import ExperimentControl
       from spotoptim.core.data import SpotDataFromArray
      @@ -922,7 +922,7 @@ 

      print(objective._get_hyperparameters(res.x))

      Best Parameters:
      -{'l1': 64, 'num_hidden_layers': 2, 'activation': 'LeakyReLU', 'lr': 3.878308634750335, 'optimizer': 'Adam', 'epochs': 14}
      +{'l1': 64, 'num_hidden_layers': 2, 'activation': 'LeakyReLU', 'lr': 0.39507990921803654, 'optimizer': 'Adam', 'epochs': 14}

      This setup automatically tunes the architecture (l1, num_hidden_layers), usage of activation functions (activation), learning rate (lr), and optimization method (optimizer) if left in the parameter set.

      diff --git a/docs/multiobjective.html b/docs/multiobjective.html index d22930fe..068e9480 100644 --- a/docs/multiobjective.html +++ b/docs/multiobjective.html @@ -1007,7 +1007,7 @@

      24.2 Common Scalarization Strategies

      24.2.1 1. Weighted Sum

      -
      +
      def weighted_sum(y_mo, weights=[0.5, 0.5]):
           return sum(w * y_mo[:, i] for i, w in enumerate(weights))
      @@ -1015,7 +1015,7 @@

      24.2.2 2. Weighted Sum with Normalization

      -
      +
      def normalized_weighted_sum(y_mo, weights=[0.5, 0.5]):
           # Normalize each objective to [0, 1]
           y_norm = (y_mo - y_mo.min(axis=0)) / (y_mo.max(axis=0) - y_mo.min(axis=0) + 1e-10)
      @@ -1025,7 +1025,7 @@ 

      24.2.3 3. Min-Max (Chebyshev)

      -
      +
      def min_max(y_mo):
           return np.max(y_mo, axis=1)
      @@ -1033,7 +1033,7 @@

      24.2.4 4. Target Achievement

      -
      +
      def target_achievement(y_mo, targets=[0.0, 0.0]):
           # Minimize deviation from targets
           return np.sum((y_mo - targets)**2, axis=1)
      @@ -1042,7 +1042,7 @@

      24.2.5 5. Product

      -
      +
      def product(y_mo):
           return np.prod(y_mo + 1e-10, axis=1)  # Add small value to avoid zero
      @@ -1343,7 +1343,7 @@

      24.6.3 Example 7: ZDT2 - Non-Convex Pareto Front

      ZDT2 has a non-convex Pareto front, making it more challenging than ZDT1.

      -
      +
      from spotoptim.function.mo import zdt2
       
       # Generate Pareto front
      @@ -1380,7 +1380,7 @@ 

      +
      print("ZDT2 Characteristics:")
       print("- Non-convex Pareto front: f2 = 1 - f1²")
       print("- Tests algorithm's ability to handle non-convexity")
      @@ -1396,7 +1396,7 @@ 

      24.6.4 Example 8: ZDT3 - Disconnected Pareto Front

      ZDT3 has a discontinuous Pareto front consisting of 5 separate regions.

      -
      +
      from spotoptim.function.mo import zdt3
       
       # Generate Pareto front
      @@ -1433,7 +1433,7 @@ 

      +
      print("ZDT3 Characteristics:")
       print("- Disconnected Pareto front with 5 separate regions")
       print("- Tests diversity maintenance in algorithms")
      @@ -1449,7 +1449,7 @@ 

      24.6.5 Example 9: ZDT4 - Multimodal with Many Local Fronts

      ZDT4 has 21^9 local Pareto fronts, testing the algorithm’s ability to escape local optima.

      -
      +
      from spotoptim.function.mo import zdt4
       
       # Generate Pareto front (same as ZDT1 when optimal)
      @@ -1501,7 +1501,7 @@ 

      +
      print("ZDT4 Characteristics:")
       print("- 21^9 local Pareto fronts")
       print("- Tests global search capability")
      @@ -1517,7 +1517,7 @@ 

      24.6.6 Example 10: ZDT6 - Non-Uniform Density

      ZDT6 has a non-uniform search space with low density of solutions near the Pareto front.

      -
      +
      from spotoptim.function.mo import zdt6
       
       # Generate Pareto front
      @@ -1564,7 +1564,7 @@ 

      +
      print("ZDT6 Characteristics:")
       print("- Non-uniform density of Pareto optimal solutions")
       print("- Low density near the Pareto front")
      @@ -1580,7 +1580,7 @@ 

      24.6.7 Example 11: DTLZ2 - Scalable Spherical Pareto Front

      DTLZ2 is a scalable test problem with a concave spherical Pareto front.

      -
      +
      from spotoptim.function.mo import dtlz2
       
       # 3D visualization for 3 objectives
      @@ -1630,7 +1630,7 @@ 

      +
      print("DTLZ2 Characteristics:")
       print("- Scalable number of objectives")
       print("- Pareto front is unit sphere: Σf_i² = 1")
      @@ -1678,7 +1678,7 @@ 

      24.6.8 Example 12: Schaffer N1 - Simple Bi-Objective

      Schaffer N1 is one of the earliest and simplest multi-objective test functions.

      -
      +
      from spotoptim.function.mo import schaffer_n1
       
       # Generate data
      @@ -1727,7 +1727,7 @@ 

      +
      print("Schaffer N1 Characteristics:")
       print("- Simplest multi-objective function")
       print("- 1D decision variable")
      @@ -1750,7 +1750,7 @@ 

      \(x_i \in [-2, 2]\). The Pareto optimal solutions satisfy: \[ \sum_{i=1}^{n} x_i^2 = \frac{1}{n}. \]

      -
      +
      from spotoptim.function.mo import fonseca_fleming
       
       # Generate data for 2D
      @@ -1812,7 +1812,7 @@ 

      +
      print("Fonseca-Fleming Characteristics:")
       print("- Concave Pareto front (minimization)")
       print("- Symmetric problem")
      @@ -1968,7 +1968,7 @@ 

      model.fit(X_base, y[:, i]) models.append(model)

      -
      +
      # calculate base Morris-Mitchell stats
       phi_base, J_base, d_base = mmphi_intensive(X_base, q=2, p=2)
       print(f"phi_base: {phi_base}, J_base: {J_base}, d_base: {d_base}")
      @@ -2093,7 +2093,7 @@

      +
      # imp_min = phi_base * 0.01
       # imp_max = phi_base * 0.1
       
      @@ -2101,12 +2101,12 @@ 

      # d_mm.plot()

      Combine into overall desirability

      -
      +
      # D_overall = DOverall(*d_funcs, d_mm)
       D_overall = DOverall(*d_funcs)

      Now we can call the mo_mm_desirability_function with a test point:

      -
      +
      x_test = np.array([0.5, 0.5])  # Example test point
       neg_D, objectives = mo_mm_desirability_function(x_test, models, X_base, J_base, d_base, phi_base, D_overall, mm_objective=False)
       print(f"Negative Desirability: {neg_D}")
      diff --git a/docs/multiobjective_files/figure-html/cell-14-output-1.png b/docs/multiobjective_files/figure-html/cell-14-output-1.png
      deleted file mode 100644
      index 68631d6a..00000000
      Binary files a/docs/multiobjective_files/figure-html/cell-14-output-1.png and /dev/null differ
      diff --git a/docs/multiobjective_files/figure-html/cell-16-output-1.png b/docs/multiobjective_files/figure-html/cell-16-output-1.png
      deleted file mode 100644
      index fc90919f..00000000
      Binary files a/docs/multiobjective_files/figure-html/cell-16-output-1.png and /dev/null differ
      diff --git a/docs/multiobjective_files/figure-html/cell-17-output-1.png b/docs/multiobjective_files/figure-html/cell-17-output-1.png
      deleted file mode 100644
      index 5468dd31..00000000
      Binary files a/docs/multiobjective_files/figure-html/cell-17-output-1.png and /dev/null differ
      diff --git a/docs/multiobjective_files/figure-html/cell-18-output-1.png b/docs/multiobjective_files/figure-html/cell-18-output-1.png
      deleted file mode 100644
      index 777e57d9..00000000
      Binary files a/docs/multiobjective_files/figure-html/cell-18-output-1.png and /dev/null differ
      diff --git a/docs/multiobjective_files/figure-html/cell-19-output-1.png b/docs/multiobjective_files/figure-html/cell-19-output-1.png
      deleted file mode 100644
      index 8371d82a..00000000
      Binary files a/docs/multiobjective_files/figure-html/cell-19-output-1.png and /dev/null differ
      diff --git a/docs/multiobjective_files/figure-html/cell-20-output-1.png b/docs/multiobjective_files/figure-html/cell-20-output-1.png
      deleted file mode 100644
      index de3fa1ff..00000000
      Binary files a/docs/multiobjective_files/figure-html/cell-20-output-1.png and /dev/null differ
      diff --git a/docs/multiobjective_files/figure-html/cell-22-output-1.png b/docs/multiobjective_files/figure-html/cell-22-output-1.png
      deleted file mode 100644
      index b508ea5a..00000000
      Binary files a/docs/multiobjective_files/figure-html/cell-22-output-1.png and /dev/null differ
      diff --git a/docs/multiobjective_files/figure-html/cell-23-output-1.png b/docs/multiobjective_files/figure-html/cell-23-output-1.png
      deleted file mode 100644
      index 92c6dd00..00000000
      Binary files a/docs/multiobjective_files/figure-html/cell-23-output-1.png and /dev/null differ
      diff --git a/docs/multiobjective_files/figure-html/cell-25-output-1.png b/docs/multiobjective_files/figure-html/cell-25-output-1.png
      deleted file mode 100644
      index 6124a5e7..00000000
      Binary files a/docs/multiobjective_files/figure-html/cell-25-output-1.png and /dev/null differ
      diff --git a/docs/multiobjective_files/figure-html/mo-desirability-function-plot-output-1.png b/docs/multiobjective_files/figure-html/mo-desirability-function-plot-output-1.png
      deleted file mode 100644
      index 6b161c6e..00000000
      Binary files a/docs/multiobjective_files/figure-html/mo-desirability-function-plot-output-1.png and /dev/null differ
      diff --git a/docs/multiobjective_files/figure-html/mo-desirability-function-plot-output-2.png b/docs/multiobjective_files/figure-html/mo-desirability-function-plot-output-2.png
      deleted file mode 100644
      index c42b2746..00000000
      Binary files a/docs/multiobjective_files/figure-html/mo-desirability-function-plot-output-2.png and /dev/null differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-dtlz2-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-dtlz2-plot-output-1.pdf
      index 68918a79..2e32c043 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-dtlz2-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-dtlz2-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-fonseca-fleming-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-fonseca-fleming-plot-output-1.pdf
      index b4866f11..78fa63cc 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-fonseca-fleming-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-fonseca-fleming-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-1.pdf
      index 92464a0c..8b53f157 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-2.pdf b/docs/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-2.pdf
      index 87105f34..de8e43cd 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-2.pdf and b/docs/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-2.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-mo-pareto-front-orig-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-mo-pareto-front-orig-plot-output-1.pdf
      index af18444a..1fe338d6 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-mo-pareto-front-orig-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-mo-pareto-front-orig-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-mo-pareto-optx-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-mo-pareto-optx-plot-output-1.pdf
      index 08319553..56d8af3a 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-mo-pareto-optx-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-mo-pareto-optx-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-mo-xy-contour-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-mo-xy-contour-plot-output-1.pdf
      index 86c7bee1..46c91bda 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-mo-xy-contour-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-mo-xy-contour-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-mo-xy-surface-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-mo-xy-surface-plot-output-1.pdf
      index 0e288b15..72a82e07 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-mo-xy-surface-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-mo-xy-surface-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-schaffer-n1-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-schaffer-n1-plot-output-1.pdf
      index 5970d881..53e4fd22 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-schaffer-n1-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-schaffer-n1-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-zdt1-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-zdt1-plot-output-1.pdf
      index 08ce9833..e3900e7a 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-zdt1-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-zdt1-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-zdt1-scalarization-diff-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-zdt1-scalarization-diff-output-1.pdf
      index cac475ee..3d00ad99 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-zdt1-scalarization-diff-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-zdt1-scalarization-diff-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-zdt2-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-zdt2-plot-output-1.pdf
      index 6fc50af9..63945bb4 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-zdt2-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-zdt2-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-zdt3-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-zdt3-plot-output-1.pdf
      index 42980f9f..1820c474 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-zdt3-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-zdt3-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-zdt4-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-zdt4-plot-output-1.pdf
      index 70df6e6a..20729fcd 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-zdt4-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-zdt4-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/fig-zdt6-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/fig-zdt6-plot-output-1.pdf
      index 58b11ea2..17b396d4 100644
      Binary files a/docs/multiobjective_files/figure-pdf/fig-zdt6-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/fig-zdt6-plot-output-1.pdf differ
      diff --git a/docs/multiobjective_files/figure-pdf/mo-mm-desirability-function-plot-output-1.pdf b/docs/multiobjective_files/figure-pdf/mo-mm-desirability-function-plot-output-1.pdf
      index 65e1195a..0277e8d1 100644
      Binary files a/docs/multiobjective_files/figure-pdf/mo-mm-desirability-function-plot-output-1.pdf and b/docs/multiobjective_files/figure-pdf/mo-mm-desirability-function-plot-output-1.pdf differ
      diff --git a/docs/pinns_1.html b/docs/pinns_1.html
      index 4f65dd88..5eebfe75 100644
      --- a/docs/pinns_1.html
      +++ b/docs/pinns_1.html
      @@ -770,7 +770,7 @@ 

      29 Setup

      First, let’s import the necessary libraries:

      -
      +
      import matplotlib.pyplot as plt
       import numpy as np
       import torch
      @@ -781,7 +781,7 @@ 

      29 Setup

      # Set random seed for reproducibility torch.manual_seed(123)
      -
      <torch._C.Generator at 0x118e3b5d0>
      +
      <torch._C.Generator at 0x116f7f5f0>

      @@ -795,7 +795,7 @@

      30 The Neural Ne
    9. 3 hidden layers
    10. Tanh activation function
    11. -
      +
      model = LinearRegressor(
           input_dim=1,
           output_dim=1,
      @@ -830,7 +830,7 @@ 

      30 The Neural Ne

      31 Generate Training Data

      We’ll generate exact solution data using a numerical solver (RK2 method):

      -
      +
      def oscillator(
           n_steps: int = 3000,
           t_min: float = 0.0,
      @@ -878,7 +878,7 @@ 

      31 Generate Trai return t_tensor, y_tensor

      Generate the exact solution and sample training data points:

      -
      +
      # Generate exact solution (3000 points)
       x, y = oscillator()
       
      @@ -894,7 +894,7 @@ 

      31 Generate Trai

      Visualize the exact solution and training data:

      -
      +
      plt.figure(figsize=(10, 6))
       plt.plot(x.numpy(), y.numpy(), 'b-', linewidth=2, label="Exact solution y(t)")
       plt.scatter(x_data.numpy(), y_data.numpy(), color="tab:orange", 
      @@ -918,7 +918,7 @@ 

      31 Generate Trai

      32 Collocation Points

      Create collocation points where we’ll enforce the physics (ODE) constraints:

      -
      +
      # 50 evenly spaced points in [0, 30]
       x_physics = torch.linspace(0, 30, 50).view(-1, 1).requires_grad_(True)
       
      @@ -980,7 +980,7 @@ 

      33.2 Training Loop

      -
      +
      loss_history_pinn = []
       loss2_history_pinn = []
       plot_data_points_pinn = []
      @@ -1047,7 +1047,7 @@ 

      34 Results Visualization

      34.1 Training Progress

      -
      +
      fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
       
       # Plot total loss
      @@ -1080,7 +1080,7 @@ 

      34.2 Solution Evolution During Training

      Visualize how the neural network solution evolves during training:

      -
      +
      xp_plot = x_physics.detach()
       
       for plot_info in plot_data_points_pinn:
      @@ -1143,7 +1143,7 @@ 

      34.3 Final Solution Comparison

      -
      +
      # Final prediction
       model.eval()
       with torch.no_grad():
      @@ -1187,7 +1187,7 @@ 

      34.4 Error Analysis

      -
      +
      # Compute absolute error
       error = torch.abs(y_final - y)
       
      diff --git a/docs/pinns_1_files/figure-pdf/cell-10-output-1.pdf b/docs/pinns_1_files/figure-pdf/cell-10-output-1.pdf
      index 214eb0cd..a7ae6832 100644
      Binary files a/docs/pinns_1_files/figure-pdf/cell-10-output-1.pdf and b/docs/pinns_1_files/figure-pdf/cell-10-output-1.pdf differ
      diff --git a/docs/pinns_1_files/figure-pdf/cell-10-output-2.pdf b/docs/pinns_1_files/figure-pdf/cell-10-output-2.pdf
      index 1c6017d7..47a7528d 100644
      Binary files a/docs/pinns_1_files/figure-pdf/cell-10-output-2.pdf and b/docs/pinns_1_files/figure-pdf/cell-10-output-2.pdf differ
      diff --git a/docs/pinns_1_files/figure-pdf/cell-10-output-3.pdf b/docs/pinns_1_files/figure-pdf/cell-10-output-3.pdf
      index 28607d7b..2b643fa8 100644
      Binary files a/docs/pinns_1_files/figure-pdf/cell-10-output-3.pdf and b/docs/pinns_1_files/figure-pdf/cell-10-output-3.pdf differ
      diff --git a/docs/pinns_1_files/figure-pdf/cell-10-output-4.pdf b/docs/pinns_1_files/figure-pdf/cell-10-output-4.pdf
      index da2a113b..10383d12 100644
      Binary files a/docs/pinns_1_files/figure-pdf/cell-10-output-4.pdf and b/docs/pinns_1_files/figure-pdf/cell-10-output-4.pdf differ
      diff --git a/docs/pinns_1_files/figure-pdf/cell-11-output-1.pdf b/docs/pinns_1_files/figure-pdf/cell-11-output-1.pdf
      index ffbd83d0..b6d67f8b 100644
      Binary files a/docs/pinns_1_files/figure-pdf/cell-11-output-1.pdf and b/docs/pinns_1_files/figure-pdf/cell-11-output-1.pdf differ
      diff --git a/docs/pinns_1_files/figure-pdf/cell-12-output-1.pdf b/docs/pinns_1_files/figure-pdf/cell-12-output-1.pdf
      index 13707962..402fa16c 100644
      Binary files a/docs/pinns_1_files/figure-pdf/cell-12-output-1.pdf and b/docs/pinns_1_files/figure-pdf/cell-12-output-1.pdf differ
      diff --git a/docs/pinns_1_files/figure-pdf/cell-6-output-1.pdf b/docs/pinns_1_files/figure-pdf/cell-6-output-1.pdf
      index 8c6504bc..5e7b6bb0 100644
      Binary files a/docs/pinns_1_files/figure-pdf/cell-6-output-1.pdf and b/docs/pinns_1_files/figure-pdf/cell-6-output-1.pdf differ
      diff --git a/docs/pinns_1_files/figure-pdf/cell-9-output-1.pdf b/docs/pinns_1_files/figure-pdf/cell-9-output-1.pdf
      index 5df0a819..fa3b34aa 100644
      Binary files a/docs/pinns_1_files/figure-pdf/cell-9-output-1.pdf and b/docs/pinns_1_files/figure-pdf/cell-9-output-1.pdf differ
      diff --git a/docs/pinns_2_hyperparameter_tuning.html b/docs/pinns_2_hyperparameter_tuning.html
      index 87656fec..d8c582d1 100644
      --- a/docs/pinns_2_hyperparameter_tuning.html
      +++ b/docs/pinns_2_hyperparameter_tuning.html
      @@ -1237,9 +1237,9 @@ 

      +TensorBoard logging enabled: runs/spotoptim_20260411_221608

      Display search space configuration. The transcolumn shows applied transformations. lr_unified and alpha use log10 transformation internally. This enables efficient exploration of log-scale parameters. All values shown are in original scale (not transformed).

      @@ -1350,7 +1350,7 @@

      +TensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260411_221608

      @@ -1377,7 +1377,7 @@

      Store values for later use in visualizations. Values are already in original scale thanks to var_trans. Factor variables are returned as strings.

      -
      +
      best_l1 = int(result.x[0])
       best_num_layers = int(result.x[1])
       best_activation = result.x[2]
      @@ -1817,7 +1817,7 @@ 

      40 Hyperparamete

      40.1 Sensitivity Analysis (Spearman Correlation)

      -
      +
      # Use the new sensitivity_spearman() method for tabular output
       optimizer.sensitivity_spearman()
      diff --git a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-baseline-comparison-pinn2-output-2.pdf b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-baseline-comparison-pinn2-output-2.pdf index 70929842..b3881c16 100644 Binary files a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-baseline-comparison-pinn2-output-2.pdf and b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-baseline-comparison-pinn2-output-2.pdf differ diff --git a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-training-pinn2-output-2.pdf b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-training-pinn2-output-2.pdf index 8d1827dd..e1843923 100644 Binary files a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-training-pinn2-output-2.pdf and b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-training-pinn2-output-2.pdf differ diff --git a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-visualization-pinn2-output-1.pdf b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-visualization-pinn2-output-1.pdf index f947544f..0bfd2539 100644 Binary files a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-visualization-pinn2-output-1.pdf and b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-visualization-pinn2-output-1.pdf differ diff --git a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-optimization-history-pinn2-output-1.pdf b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-optimization-history-pinn2-output-1.pdf index 877dfcd8..994d693f 100644 Binary files a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-optimization-history-pinn2-output-1.pdf and b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-optimization-history-pinn2-output-1.pdf differ diff --git a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-parameter-scatter-pinn2-output-1.pdf b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-parameter-scatter-pinn2-output-1.pdf index 7da472e7..125fa169 100644 Binary files a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-parameter-scatter-pinn2-output-1.pdf and b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-parameter-scatter-pinn2-output-1.pdf differ diff --git a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-sensitivity-analysis-pinn2-output-1.pdf b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-sensitivity-analysis-pinn2-output-1.pdf index 8e3cb98a..cf7eaf8c 100644 Binary files a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-sensitivity-analysis-pinn2-output-1.pdf and b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-sensitivity-analysis-pinn2-output-1.pdf differ diff --git a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-2.pdf b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-2.pdf index 3f095736..bc2404b8 100644 Binary files a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-2.pdf and b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-2.pdf differ diff --git a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-4.pdf b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-4.pdf index bee5fbd3..f53adadd 100644 Binary files a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-4.pdf and b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-4.pdf differ diff --git a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-6.pdf b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-6.pdf index 4431b5cf..b596ae29 100644 Binary files a/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-6.pdf and b/docs/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-6.pdf differ diff --git a/docs/plot_surrogate.html b/docs/plot_surrogate.html index 84db5a84..45962ca2 100644 --- a/docs/plot_surrogate.html +++ b/docs/plot_surrogate.html @@ -963,7 +963,7 @@

      -

      +

      @@ -1001,14 +1001,14 @@

      -

      +

      -

      +

      diff --git a/docs/plot_surrogate_files/figure-html/basic-plot-surrogate-example-output-1.png b/docs/plot_surrogate_files/figure-html/basic-plot-surrogate-example-output-1.png index 33e07b38..6b76fd8e 100644 Binary files a/docs/plot_surrogate_files/figure-html/basic-plot-surrogate-example-output-1.png and b/docs/plot_surrogate_files/figure-html/basic-plot-surrogate-example-output-1.png differ diff --git a/docs/plot_surrogate_files/figure-html/custom-plot-surrogate-example-output-1.png b/docs/plot_surrogate_files/figure-html/custom-plot-surrogate-example-output-1.png index 6f1f341c..5d838a04 100644 Binary files a/docs/plot_surrogate_files/figure-html/custom-plot-surrogate-example-output-1.png and b/docs/plot_surrogate_files/figure-html/custom-plot-surrogate-example-output-1.png differ diff --git a/docs/plot_surrogate_files/figure-html/plot-surrogate-3d-example-all-output-1.png b/docs/plot_surrogate_files/figure-html/plot-surrogate-3d-example-all-output-1.png index 2998ec8e..1ce89d7f 100644 Binary files a/docs/plot_surrogate_files/figure-html/plot-surrogate-3d-example-all-output-1.png and b/docs/plot_surrogate_files/figure-html/plot-surrogate-3d-example-all-output-1.png differ diff --git a/docs/plot_surrogate_files/figure-html/plot-surrogate-3d-example-all-output-2.png b/docs/plot_surrogate_files/figure-html/plot-surrogate-3d-example-all-output-2.png index 3b1728ab..04ed45d9 100644 Binary files a/docs/plot_surrogate_files/figure-html/plot-surrogate-3d-example-all-output-2.png and b/docs/plot_surrogate_files/figure-html/plot-surrogate-3d-example-all-output-2.png differ diff --git a/docs/plot_surrogate_files/figure-html/plot-surrogate-3d-example-all-output-3.png b/docs/plot_surrogate_files/figure-html/plot-surrogate-3d-example-all-output-3.png index b23573a7..7a05aea4 100644 Binary files a/docs/plot_surrogate_files/figure-html/plot-surrogate-3d-example-all-output-3.png and b/docs/plot_surrogate_files/figure-html/plot-surrogate-3d-example-all-output-3.png differ diff --git a/docs/plot_surrogate_files/figure-html/plot-surrogate-4d-example-output-1.png b/docs/plot_surrogate_files/figure-html/plot-surrogate-4d-example-output-1.png index 92cc3577..af345d5f 100644 Binary files a/docs/plot_surrogate_files/figure-html/plot-surrogate-4d-example-output-1.png and b/docs/plot_surrogate_files/figure-html/plot-surrogate-4d-example-output-1.png differ diff --git a/docs/plot_surrogate_files/figure-html/plot-surrogate-4d-example-output-2.png b/docs/plot_surrogate_files/figure-html/plot-surrogate-4d-example-output-2.png index f22f8e20..4b1e39ba 100644 Binary files a/docs/plot_surrogate_files/figure-html/plot-surrogate-4d-example-output-2.png and b/docs/plot_surrogate_files/figure-html/plot-surrogate-4d-example-output-2.png differ diff --git a/docs/plot_surrogate_files/figure-html/plot-surrogate-kriging-example-output-1.png b/docs/plot_surrogate_files/figure-html/plot-surrogate-kriging-example-output-1.png index 651c472b..ea28e6ec 100644 Binary files a/docs/plot_surrogate_files/figure-html/plot-surrogate-kriging-example-output-1.png and b/docs/plot_surrogate_files/figure-html/plot-surrogate-kriging-example-output-1.png differ diff --git a/docs/plot_surrogate_files/figure-pdf/basic-plot-surrogate-example-output-1.pdf b/docs/plot_surrogate_files/figure-pdf/basic-plot-surrogate-example-output-1.pdf index 47fc2171..794e37d7 100644 Binary files a/docs/plot_surrogate_files/figure-pdf/basic-plot-surrogate-example-output-1.pdf and b/docs/plot_surrogate_files/figure-pdf/basic-plot-surrogate-example-output-1.pdf differ diff --git a/docs/plot_surrogate_files/figure-pdf/custom-plot-surrogate-example-output-1.pdf b/docs/plot_surrogate_files/figure-pdf/custom-plot-surrogate-example-output-1.pdf index 43f8a61b..a92b886d 100644 Binary files a/docs/plot_surrogate_files/figure-pdf/custom-plot-surrogate-example-output-1.pdf and b/docs/plot_surrogate_files/figure-pdf/custom-plot-surrogate-example-output-1.pdf differ diff --git a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-1.pdf b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-1.pdf index f4b9862d..7b192c68 100644 Binary files a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-1.pdf and b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-1.pdf differ diff --git a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-2.pdf b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-2.pdf index d3f78bef..1d6bd39d 100644 Binary files a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-2.pdf and b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-2.pdf differ diff --git a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-3.pdf b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-3.pdf index e6c96a5f..cbe9fa7c 100644 Binary files a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-3.pdf and b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-3.pdf differ diff --git a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-1.pdf b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-1.pdf index 3ca24986..bdcb6a92 100644 Binary files a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-1.pdf and b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-1.pdf differ diff --git a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-2.pdf b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-2.pdf index f8e0c7b2..af774d7d 100644 Binary files a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-2.pdf and b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-2.pdf differ diff --git a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-kriging-example-output-1.pdf b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-kriging-example-output-1.pdf index 10b30c0d..0e9bf3bd 100644 Binary files a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-kriging-example-output-1.pdf and b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-kriging-example-output-1.pdf differ diff --git a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-rosenbrock-example-output-1.pdf b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-rosenbrock-example-output-1.pdf index 26ffeea6..1ba6d643 100644 Binary files a/docs/plot_surrogate_files/figure-pdf/plot-surrogate-rosenbrock-example-output-1.pdf and b/docs/plot_surrogate_files/figure-pdf/plot-surrogate-rosenbrock-example-output-1.pdf differ diff --git a/docs/reproducibility.html b/docs/reproducibility.html index e2c117fb..3e7b9c64 100644 --- a/docs/reproducibility.html +++ b/docs/reproducibility.html @@ -733,7 +733,7 @@

      15.2.1 Making Optimization Reproducible

      To ensure reproducible results, simply specify the seed parameter when creating the optimizer:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       
      @@ -781,7 +781,7 @@ 

      15.2.2 Running Independent Experiments

      If you don’t specify a seed, each optimization run will explore the search space differently:

      -
      +
      # Non-reproducible: different results each time
       optimizer = SpotOptim(
           fun=sphere,
      @@ -802,7 +802,7 @@ 

      15.3.1 Example 1: Comparing Different Configurations

      When comparing different optimizer settings, use the same seed for fair comparison:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       
      @@ -843,7 +843,7 @@ 

      15.3.2 Example 2: Reproducible Research Experiment

      For scientific papers or reports, always use a fixed seed and document it:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       
      @@ -961,7 +961,7 @@ 

      15.3.3 Example 3: Multiple Independent Runs

      To test robustness, run the same optimization with different seeds:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       
      @@ -1016,7 +1016,7 @@ 

      15.3.4 Example 4: Reproducible Initial Design

      The seed ensures that even the initial design points are reproducible:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       
      @@ -1064,7 +1064,7 @@ 

      15.3.5 Example 5: Custom Initial Design with Seed

      Even when providing a custom initial design, the seed ensures reproducible subsequent iterations:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       
      @@ -1118,7 +1118,7 @@ 

      15.4.1 Seed and Noisy Functions

      When optimizing noisy functions with repeated evaluations, the seed ensures reproducible noise:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       
      @@ -1151,7 +1151,7 @@ 

      15.4.2 Different Seeds for Different Exploration

      Use different seeds to explore different regions systematically:

      -
      +
      import numpy as np
       from spotoptim import SpotOptim
       
      @@ -1203,7 +1203,7 @@ 

      15.5 Best Practices

      15.5.1 1. Always Use Seeds for Production Code

      -
      +
      # Good: Reproducible
       optimizer = SpotOptim(fun=objective, bounds=bounds, seed=42)
       
      @@ -1213,7 +1213,7 @@ 

      15.5.2 2. Document Your Seeds

      -
      +
      # Configuration for experiment reported in Section 4.2
       EXPERIMENT_SEED = 2024
       MAX_ITERATIONS = 100
      @@ -1228,7 +1228,7 @@ 

      <

      15.5.3 3. Use Different Seeds for Different Experiments

      -
      +
      # Different experiments should use different seeds
       BASELINE_SEED = 100
       EXPERIMENT_A_SEED = 200
      @@ -1237,7 +1237,7 @@ 

      15.5.4 4. Test Robustness Across Multiple Seeds

      -
      +
      # Run same optimization with multiple seeds
       for seed in [42, 123, 456, 789, 1011]:
           optimizer = SpotOptim(fun=objective, bounds=bounds, seed=seed)
      diff --git a/docs/save_load.html b/docs/save_load.html
      index cefaeaf8..4ddb4de2 100644
      --- a/docs/save_load.html
      +++ b/docs/save_load.html
      @@ -1207,7 +1207,7 @@ 

      14.7.2 2. Use Meaningful Prefixes

      Organize your experiments with descriptive prefixes:

      -
      +
      # Good practice: descriptive prefixes
       optimizer.save_experiment(prefix="sphere_d10_seed42")
       optimizer.save_experiment(prefix="rosenbrock_n100_lhs")
      @@ -1220,7 +1220,7 @@ 

      14.7.3 3. Save Experiments Before Remote Execution

      -
      +
      # Define locally
       optimizer = SpotOptim(bounds=bounds, max_iter=20, seed=42)
       optimizer.save_experiment(prefix="remote_job")
      @@ -1233,7 +1233,7 @@ 

      14.7.4 4. Version Your Experiments

      -
      +
      import datetime
       
       # Add timestamp to prefix
      @@ -1246,7 +1246,7 @@ 

      14.7.5 5. Handle File Paths Robustly

      -
      +
      import os
       
       # Create directory structure
      @@ -1553,7 +1553,7 @@ 

      <

      14.9.4 File Format

      Files are saved using pickle’s highest protocol:

      -
      +
      with open(filename, "wb") as handle:
           pickle.dump(optimizer_state, handle, protocol=pickle.HIGHEST_PROTOCOL)
      @@ -1573,7 +1573,7 @@

      14.10.2 Issue: “FileNotFoundError: Experiment file not found”

      Cause: Incorrect file path or file doesn’t exist.

      Solution: Check file path and ensure file exists:

      -
      +
      import os
       
       filename = "experiment_exp.pkl"
      @@ -1587,7 +1587,7 @@ 

      14.10.3 Issue: “FileExistsError: File already exists”

      Cause: Attempting to save over an existing file without overwrite=True.

      Solution: Either use a different prefix or enable overwriting:

      -
      +
      # Option 1: Use different prefix
       optimizer.save_result(prefix="my_result_v2")
       
      @@ -1599,7 +1599,7 @@ 

      14.10.4 Issue: Results differ after loading

      Cause: Random state not preserved or function behavior changed.

      Solution: Ensure you’re using the same seed and function definition:

      -
      +
      # When saving
       optimizer = SpotOptim(..., seed=42)  # Use fixed seed
       
      diff --git a/docs/save_load_files/figure-html/analyze-results-locally-output-2.png b/docs/save_load_files/figure-html/analyze-results-locally-output-2.png
      deleted file mode 100644
      index 9fbf3b60..00000000
      Binary files a/docs/save_load_files/figure-html/analyze-results-locally-output-2.png and /dev/null differ
      diff --git a/docs/save_load_files/figure-pdf/analyze-results-locally-complete-output-2.pdf b/docs/save_load_files/figure-pdf/analyze-results-locally-complete-output-2.pdf
      index c179c148..76fb74cb 100644
      Binary files a/docs/save_load_files/figure-pdf/analyze-results-locally-complete-output-2.pdf and b/docs/save_load_files/figure-pdf/analyze-results-locally-complete-output-2.pdf differ
      diff --git a/docs/save_load_files/figure-pdf/fig-plot-progress-1-output-1.pdf b/docs/save_load_files/figure-pdf/fig-plot-progress-1-output-1.pdf
      index 87d71ade..e459aec1 100644
      Binary files a/docs/save_load_files/figure-pdf/fig-plot-progress-1-output-1.pdf and b/docs/save_load_files/figure-pdf/fig-plot-progress-1-output-1.pdf differ
      diff --git a/docs/search.json b/docs/search.json
      index cd3fb94b..4349013a 100644
      --- a/docs/search.json
      +++ b/docs/search.json
      @@ -135,7 +135,7 @@
           "href": "spotoptim_examples.html#main-optimization-method-optimize",
           "title": "3  SpotOptim Internal Methods Examples",
           "section": "3.2 1. Main Optimization Method: optimize()",
      -    "text": "3.2 1. Main Optimization Method: optimize()\nThe optimize() method is the main entry point for running the optimization process. It coordinates all other methods in the optimization workflow:\n\nInitial Design Phase: get_initial_design(), curate_initial_design(), rm_initial_design_NA_values(), check_size_initial_design(), get_best_xy_initial_design()\nMain Loop: Surrogate fitting, OCBA application, point suggestion, evaluation\nTermination: determine_termination()\n\nLet’s see a complete optimization example:\n\n# Define a simple quadratic function\ndef sphere(X):\n    \"\"\"Sphere function: f(x) = sum(x^2)\"\"\"\n    X = np.atleast_2d(X)\n    return np.sum(X**2, axis=1)\n\n# Create optimizer\nopt = SpotOptim(\n    fun=sphere,\n    bounds=[(-5, 5), (-5, 5)],\n    n_initial=5,\n    max_iter=20,\n    verbose=True\n)\n\n# Run optimization\nresult = opt.optimize()\n\nprint(f\"\\nBest point found: {result.x}\")\nprint(f\"Best value: {result.fun:.6f}\")\nprint(f\"Total evaluations: {result.nfev}\")\nprint(f\"Sequential iterations: {result.nit}\")\nprint(f\"Success: {result.success}\")\nprint(f\"Message: {result.message}\")\n\nTensorBoard logging disabled\nInitial best: f(x) = 1.463558\nIter 1 | Best: 1.092223 | Rate: 1.00 | Evals: 30.0%\nIter 2 | Best: 0.222011 | Rate: 1.00 | Evals: 35.0%\nIter 3 | Best: 0.015843 | Rate: 1.00 | Evals: 40.0%\nIter 4 | Best: 0.003055 | Rate: 1.00 | Evals: 45.0%\nIter 5 | Best: 0.000562 | Rate: 1.00 | Evals: 50.0%\nIter 6 | Best: 0.000161 | Rate: 1.00 | Evals: 55.0%\nIter 7 | Best: 0.000019 | Rate: 1.00 | Evals: 60.0%\nIter 8 | Best: 0.000008 | Rate: 1.00 | Evals: 65.0%\nIter 9 | Best: 0.000006 | Rate: 1.00 | Evals: 70.0%\nIter 10 | Best: 0.000006 | Rate: 1.00 | Evals: 75.0%\nIter 11 | Best: 0.000005 | Rate: 1.00 | Evals: 80.0%\nIter 12 | Best: 0.000005 | Rate: 1.00 | Evals: 85.0%\nIter 13 | Best: 0.000005 | Rate: 1.00 | Evals: 90.0%\nIter 14 | Best: 0.000005 | Curr: 0.000005 | Rate: 0.93 | Evals: 95.0%\nIter 15 | Best: 0.000005 | Rate: 0.93 | Evals: 100.0%\n\nBest point found: [ 0.00110384 -0.00190381]\nBest value: 0.000005\nTotal evaluations: 20\nSequential iterations: 15\nSuccess: True\nMessage: Optimization terminated: maximum evaluations (20) reached\n         Current function value: 0.000005\n         Iterations: 15\n         Function evaluations: 20\n\n\n\n3.2.1 Changing the Optimizer\n\n# Create optimizer\nopt = SpotOptim(\n    fun=sphere,\n    bounds=[(-5, 5), (-5, 5)],\n    n_initial=5,\n    max_iter=20,\n    verbose=True,\n    acquisition_optimizer='tricands',    \n)\n\n# Run optimization\nresult = opt.optimize()\n\nprint(f\"\\nBest point found: {result.x}\")\nprint(f\"Best value: {result.fun:.6f}\")\nprint(f\"Total evaluations: {result.nfev}\")\nprint(f\"Sequential iterations: {result.nit}\")\nprint(f\"Success: {result.success}\")\nprint(f\"Message: {result.message}\")\n\nTensorBoard logging disabled\nInitial best: f(x) = 1.847955\nIter 1 | Best: 1.847955 | Curr: 2.775960 | Rate: 0.00 | Evals: 30.0%\nIter 2 | Best: 1.847955 | Curr: 3.161398 | Rate: 0.00 | Evals: 35.0%\nIter 3 | Best: 0.720748 | Rate: 0.33 | Evals: 40.0%\nIter 4 | Best: 0.300748 | Rate: 0.50 | Evals: 45.0%\nIter 5 | Best: 0.198922 | Rate: 0.60 | Evals: 50.0%\nIter 6 | Best: 0.066854 | Rate: 0.67 | Evals: 55.0%\nIter 7 | Best: 0.004573 | Rate: 0.71 | Evals: 60.0%\nIter 8 | Best: 0.004573 | Curr: 0.019856 | Rate: 0.62 | Evals: 65.0%\nIter 9 | Best: 0.004573 | Curr: 0.013083 | Rate: 0.56 | Evals: 70.0%\nIter 10 | Best: 0.004573 | Curr: 0.007611 | Rate: 0.50 | Evals: 75.0%\nIter 11 | Best: 0.004573 | Curr: 0.007171 | Rate: 0.45 | Evals: 80.0%\nIter 12 | Best: 0.004573 | Curr: 0.005684 | Rate: 0.42 | Evals: 85.0%\nIter 13 | Best: 0.004573 | Curr: 0.005130 | Rate: 0.38 | Evals: 90.0%\nIter 14 | Best: 0.004573 | Curr: 0.004954 | Rate: 0.36 | Evals: 95.0%\nIter 15 | Best: 0.004573 | Curr: 0.005546 | Rate: 0.33 | Evals: 100.0%\n\nBest point found: [-0.06745712 -0.0046995 ]\nBest value: 0.004573\nTotal evaluations: 20\nSequential iterations: 15\nSuccess: True\nMessage: Optimization terminated: maximum evaluations (20) reached\n         Current function value: 0.004573\n         Iterations: 15\n         Function evaluations: 20",
      +    "text": "3.2 1. Main Optimization Method: optimize()\nThe optimize() method is the main entry point for running the optimization process. It coordinates all other methods in the optimization workflow:\n\nInitial Design Phase: get_initial_design(), curate_initial_design(), rm_initial_design_NA_values(), check_size_initial_design(), get_best_xy_initial_design()\nMain Loop: Surrogate fitting, OCBA application, point suggestion, evaluation\nTermination: determine_termination()\n\nLet’s see a complete optimization example:\n\n# Define a simple quadratic function\ndef sphere(X):\n    \"\"\"Sphere function: f(x) = sum(x^2)\"\"\"\n    X = np.atleast_2d(X)\n    return np.sum(X**2, axis=1)\n\n# Create optimizer\nopt = SpotOptim(\n    fun=sphere,\n    bounds=[(-5, 5), (-5, 5)],\n    n_initial=5,\n    max_iter=20,\n    verbose=True\n)\n\n# Run optimization\nresult = opt.optimize()\n\nprint(f\"\\nBest point found: {result.x}\")\nprint(f\"Best value: {result.fun:.6f}\")\nprint(f\"Total evaluations: {result.nfev}\")\nprint(f\"Sequential iterations: {result.nit}\")\nprint(f\"Success: {result.success}\")\nprint(f\"Message: {result.message}\")\n\nTensorBoard logging disabled\nInitial best: f(x) = 2.901470\nIter 1 | Best: 1.599343 | Rate: 1.00 | Evals: 30.0%\nIter 2 | Best: 1.599343 | Curr: 2.054661 | Rate: 0.50 | Evals: 35.0%\nIter 3 | Best: 0.349058 | Rate: 0.67 | Evals: 40.0%\nIter 4 | Best: 0.178050 | Rate: 0.75 | Evals: 45.0%\nIter 5 | Best: 0.089616 | Rate: 0.80 | Evals: 50.0%\nIter 6 | Best: 0.000354 | Rate: 0.83 | Evals: 55.0%\nIter 7 | Best: 0.000047 | Rate: 0.86 | Evals: 60.0%\nIter 8 | Best: 0.000002 | Rate: 0.88 | Evals: 65.0%\nIter 9 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.78 | Evals: 70.0%\nIter 10 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.70 | Evals: 75.0%\nIter 11 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.64 | Evals: 80.0%\nIter 12 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.58 | Evals: 85.0%\nIter 13 | Best: 0.000001 | Rate: 0.62 | Evals: 90.0%\nIter 14 | Best: 0.000001 | Rate: 0.64 | Evals: 95.0%\nIter 15 | Best: 0.000001 | Rate: 0.67 | Evals: 100.0%\n\nBest point found: [0.00066472 0.00053723]\nBest value: 0.000001\nTotal evaluations: 20\nSequential iterations: 15\nSuccess: True\nMessage: Optimization terminated: maximum evaluations (20) reached\n         Current function value: 0.000001\n         Iterations: 15\n         Function evaluations: 20\n\n\n\n3.2.1 Changing the Optimizer\n\n# Create optimizer\nopt = SpotOptim(\n    fun=sphere,\n    bounds=[(-5, 5), (-5, 5)],\n    n_initial=5,\n    max_iter=20,\n    verbose=True,\n    acquisition_optimizer='tricands',    \n)\n\n# Run optimization\nresult = opt.optimize()\n\nprint(f\"\\nBest point found: {result.x}\")\nprint(f\"Best value: {result.fun:.6f}\")\nprint(f\"Total evaluations: {result.nfev}\")\nprint(f\"Sequential iterations: {result.nit}\")\nprint(f\"Success: {result.success}\")\nprint(f\"Message: {result.message}\")\n\nTensorBoard logging disabled\nInitial best: f(x) = 3.988087\nIter 1 | Best: 3.988087 | Curr: 6.117423 | Rate: 0.00 | Evals: 30.0%\nIter 2 | Best: 3.988087 | Curr: 6.932585 | Rate: 0.00 | Evals: 35.0%\nIter 3 | Best: 2.237966 | Rate: 0.33 | Evals: 40.0%\nIter 4 | Best: 0.801531 | Rate: 0.50 | Evals: 45.0%\nIter 5 | Best: 0.090012 | Rate: 0.60 | Evals: 50.0%\nIter 6 | Best: 0.090012 | Curr: 0.138100 | Rate: 0.50 | Evals: 55.0%\nIter 7 | Best: 0.063386 | Rate: 0.57 | Evals: 60.0%\nIter 8 | Best: 0.037775 | Rate: 0.62 | Evals: 65.0%\nIter 9 | Best: 0.028391 | Rate: 0.67 | Evals: 70.0%\nIter 10 | Best: 0.026155 | Rate: 0.70 | Evals: 75.0%\nIter 11 | Best: 0.023670 | Rate: 0.73 | Evals: 80.0%\nIter 12 | Best: 0.023670 | Curr: 0.024301 | Rate: 0.67 | Evals: 85.0%\nIter 13 | Best: 0.023222 | Rate: 0.69 | Evals: 90.0%\nIter 14 | Best: 0.022894 | Rate: 0.71 | Evals: 95.0%\nIter 15 | Best: 0.022894 | Curr: 0.023553 | Rate: 0.67 | Evals: 100.0%\n\nBest point found: [0.15108317 0.00822385]\nBest value: 0.022894\nTotal evaluations: 20\nSequential iterations: 15\nSuccess: True\nMessage: Optimization terminated: maximum evaluations (20) reached\n         Current function value: 0.022894\n         Iterations: 15\n         Function evaluations: 20",
           "crumbs": [
             "Sequential Parameter Optimization Toolbox (SPOT)",
             "3  SpotOptim Internal Methods Examples"
      @@ -289,7 +289,7 @@
           "href": "awwe_optimization.html#method-1-spotoptim-surrogate-model-based-optimization",
           "title": "4  Optimizing the Aircraft Wing Weight Example",
           "section": "4.4 Method 1: SpotOptim (Surrogate Model Based Optimization)",
      -    "text": "4.4 Method 1: SpotOptim (Surrogate Model Based Optimization)\n\n# Start timing\nstart_time = time.time()\n\n# Configure SpotOptim\noptimizer_spot = SpotOptim(\n    fun=wingwt,\n    bounds=bounds,\n    x0=None,\n    max_iter=max_evals,\n    n_initial=10,  # Initial design points\n    var_name=['Sw', 'Wfw', 'A', 'L', 'q', 'l', 'Rtc', 'Nz', 'Wdg'],\n    acquisition='y',  # ei: Expected Improvement\n    max_surrogate_points=100,\n    seed=42,\n    verbose=True,\n    tensorboard_log=True,\n    tensorboard_clean=True\n)\n\nRemoved old TensorBoard logs: runs/spotoptim_20260404_084500\nCleaned 1 old TensorBoard log directory\nTensorBoard logging enabled: runs/spotoptim_20260404_090741",
      +    "text": "4.4 Method 1: SpotOptim (Surrogate Model Based Optimization)\n\n# Start timing\nstart_time = time.time()\n\n# Configure SpotOptim\noptimizer_spot = SpotOptim(\n    fun=wingwt,\n    bounds=bounds,\n    x0=None,\n    max_iter=max_evals,\n    n_initial=10,  # Initial design points\n    var_name=['Sw', 'Wfw', 'A', 'L', 'q', 'l', 'Rtc', 'Nz', 'Wdg'],\n    acquisition='y',  # ei: Expected Improvement\n    max_surrogate_points=100,\n    seed=42,\n    verbose=True,\n    tensorboard_log=True,\n    tensorboard_clean=True\n)\n\nRemoved old TensorBoard logs: runs/spotoptim_20260405_232436\nCleaned 1 old TensorBoard log directory\nTensorBoard logging enabled: runs/spotoptim_20260411_210457",
           "crumbs": [
             "Sequential Parameter Optimization Toolbox (SPOT)",
             "4  Optimizing the Aircraft Wing Weight Example"
      @@ -311,7 +311,7 @@
           "href": "awwe_optimization.html#run-optimization",
           "title": "4  Optimizing the Aircraft Wing Weight Example",
           "section": "4.6 Run optimization",
      -    "text": "4.6 Run optimization\n\nresult_spot = optimizer_spot.optimize()\n\nInitial best: f(x) = 205.911302\nIter 1 | Best: 205.911302 | Curr: 207.213031 | Rate: 0.00 | Evals: 36.7%\nIter 2 | Best: 123.244947 | Rate: 0.50 | Evals: 40.0%\nIter 3 | Best: 123.049522 | Rate: 0.67 | Evals: 43.3%\nIter 4 | Best: 122.583435 | Rate: 0.75 | Evals: 46.7%\nIter 5 | Best: 121.856102 | Rate: 0.80 | Evals: 50.0%\nIter 6 | Best: 121.750731 | Rate: 0.83 | Evals: 53.3%\nIter 7 | Best: 121.581279 | Rate: 0.86 | Evals: 56.7%\nIter 8 | Best: 120.668434 | Rate: 0.88 | Evals: 60.0%\nIter 9 | Best: 120.668434 | Curr: 121.917692 | Rate: 0.78 | Evals: 63.3%\nIter 10 | Best: 120.668434 | Curr: 122.342894 | Rate: 0.70 | Evals: 66.7%\nIter 11 | Best: 120.136404 | Rate: 0.73 | Evals: 70.0%\nIter 12 | Best: 119.507981 | Rate: 0.75 | Evals: 73.3%\nIter 13 | Best: 119.503751 | Rate: 0.77 | Evals: 76.7%\nIter 14 | Best: 119.503672 | Rate: 0.79 | Evals: 80.0%\nOptimizer candidate 1/3 was duplicate/invalid.\nIter 15 | Best: 119.503672 | Curr: 120.365658 | Rate: 0.73 | Evals: 83.3%\nIter 16 | Best: 119.503672 | Curr: 119.528666 | Rate: 0.69 | Evals: 86.7%\nIter 17 | Best: 119.503672 | Curr: 119.504084 | Rate: 0.65 | Evals: 90.0%\nIter 18 | Best: 119.503672 | Curr: 119.511109 | Rate: 0.61 | Evals: 93.3%\nIter 19 | Best: 119.503672 | Rate: 0.63 | Evals: 96.7%\nIter 20 | Best: 119.503672 | Curr: 119.522520 | Rate: 0.60 | Evals: 100.0%\nTensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260404_090741\n\n\n\n# End timing\nspot_time = time.time() - start_time\n\nprint(f\"\\nSpotOptim Results:\")\nprint(f\"  Best weight: {result_spot.fun:.4f} lb\")\nprint(f\"  Function evaluations: {result_spot.nfev}\")\nprint(f\"  Time elapsed: {spot_time:.2f} seconds\")\nprint(f\"  Success: {result_spot.success}\")\n\n\nSpotOptim Results:\n  Best weight: 119.5037 lb\n  Function evaluations: 30\n  Time elapsed: 10.98 seconds\n  Success: True\n\n\n\noptimizer_spot.print_best()\n\n\nBest Solution Found:\n--------------------------------------------------\n  Sw: 0.0000\n  Wfw: 0.0000\n  A: 0.0000\n  L: 0.5000\n  q: 0.0000\n  l: 0.0000\n  Rtc: 1.0000\n  Nz: 0.0000\n  Wdg: 0.0000\n  Objective Value: 119.5037\n  Total Evaluations: 30",
      +    "text": "4.6 Run optimization\n\nresult_spot = optimizer_spot.optimize()\n\nInitial best: f(x) = 205.911302\nIter 1 | Best: 205.911302 | Curr: 207.213031 | Rate: 0.00 | Evals: 36.7%\nIter 2 | Best: 123.244947 | Rate: 0.50 | Evals: 40.0%\nIter 3 | Best: 123.049522 | Rate: 0.67 | Evals: 43.3%\nIter 4 | Best: 122.583435 | Rate: 0.75 | Evals: 46.7%\nIter 5 | Best: 121.856102 | Rate: 0.80 | Evals: 50.0%\nIter 6 | Best: 121.750731 | Rate: 0.83 | Evals: 53.3%\nIter 7 | Best: 121.581279 | Rate: 0.86 | Evals: 56.7%\nIter 8 | Best: 120.668434 | Rate: 0.88 | Evals: 60.0%\nIter 9 | Best: 120.668434 | Curr: 121.917692 | Rate: 0.78 | Evals: 63.3%\nIter 10 | Best: 120.668434 | Curr: 122.342894 | Rate: 0.70 | Evals: 66.7%\nIter 11 | Best: 120.136404 | Rate: 0.73 | Evals: 70.0%\nIter 12 | Best: 119.507981 | Rate: 0.75 | Evals: 73.3%\nIter 13 | Best: 119.503751 | Rate: 0.77 | Evals: 76.7%\nIter 14 | Best: 119.503672 | Rate: 0.79 | Evals: 80.0%\nOptimizer candidate 1/3 was duplicate/invalid.\nIter 15 | Best: 119.503672 | Curr: 120.365658 | Rate: 0.73 | Evals: 83.3%\nIter 16 | Best: 119.503672 | Curr: 119.528666 | Rate: 0.69 | Evals: 86.7%\nIter 17 | Best: 119.503672 | Curr: 119.504084 | Rate: 0.65 | Evals: 90.0%\nIter 18 | Best: 119.503672 | Curr: 119.511109 | Rate: 0.61 | Evals: 93.3%\nIter 19 | Best: 119.503672 | Rate: 0.63 | Evals: 96.7%\nIter 20 | Best: 119.503672 | Curr: 119.522520 | Rate: 0.60 | Evals: 100.0%\nTensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260411_210457\n\n\n\n# End timing\nspot_time = time.time() - start_time\n\nprint(f\"\\nSpotOptim Results:\")\nprint(f\"  Best weight: {result_spot.fun:.4f} lb\")\nprint(f\"  Function evaluations: {result_spot.nfev}\")\nprint(f\"  Time elapsed: {spot_time:.2f} seconds\")\nprint(f\"  Success: {result_spot.success}\")\n\n\nSpotOptim Results:\n  Best weight: 119.5037 lb\n  Function evaluations: 30\n  Time elapsed: 10.67 seconds\n  Success: True\n\n\n\noptimizer_spot.print_best()\n\n\nBest Solution Found:\n--------------------------------------------------\n  Sw: 0.0000\n  Wfw: 0.0000\n  A: 0.0000\n  L: 0.5000\n  q: 0.0000\n  l: 0.0000\n  Rtc: 1.0000\n  Nz: 0.0000\n  Wdg: 0.0000\n  Objective Value: 119.5037\n  Total Evaluations: 30",
           "crumbs": [
             "Sequential Parameter Optimization Toolbox (SPOT)",
             "4  Optimizing the Aircraft Wing Weight Example"
      @@ -377,7 +377,7 @@
           "href": "awwe_optimization.html#comparison-of-results",
           "title": "4  Optimizing the Aircraft Wing Weight Example",
           "section": "4.12 Comparison of Results",
      -    "text": "4.12 Comparison of Results\n\n# Create comparison DataFrame\ncomparison = pd.DataFrame({\n    'Method': ['Baseline', 'SpotOptim', 'Nelder-Mead', 'BFGS'],\n    'Best Weight (lb)': [\n        baseline_weight,\n        result_spot.fun,\n        result_nm.fun,\n        result_bfgs.fun\n    ],\n    'Improvement (%)': [\n        0.0,\n        (baseline_weight - result_spot.fun) / baseline_weight * 100,\n        (baseline_weight - result_nm.fun) / baseline_weight * 100,\n        (baseline_weight - result_bfgs.fun) / baseline_weight * 100\n    ],\n    'Function Evals': [\n        1,\n        result_spot.nfev,\n        result_nm.nfev,\n        result_bfgs.nfev\n    ],\n    'Time (s)': [\n        0.0,\n        spot_time,\n        nm_time,\n        bfgs_time\n    ],\n    'Success': [\n        True,\n        result_spot.success,\n        result_nm.success,\n        result_bfgs.success\n    ]\n})\n\nprint(\"\\n\" + \"=\" * 80)\nprint(\"OPTIMIZATION COMPARISON\")\nprint(\"=\" * 80)\nprint(comparison.to_string(index=False))\nprint(\"=\" * 80)\n\n\n================================================================================\nOPTIMIZATION COMPARISON\n================================================================================\n     Method  Best Weight (lb)  Improvement (%)  Function Evals  Time (s)  Success\n   Baseline        233.908405         0.000000               1  0.000000     True\n  SpotOptim        119.503672        48.910057              30 10.980436     True\nNelder-Mead        220.544928         5.713124              30  0.000857    False\n       BFGS        119.503672        48.910057              60  0.001501    False\n================================================================================",
      +    "text": "4.12 Comparison of Results\n\n# Create comparison DataFrame\ncomparison = pd.DataFrame({\n    'Method': ['Baseline', 'SpotOptim', 'Nelder-Mead', 'BFGS'],\n    'Best Weight (lb)': [\n        baseline_weight,\n        result_spot.fun,\n        result_nm.fun,\n        result_bfgs.fun\n    ],\n    'Improvement (%)': [\n        0.0,\n        (baseline_weight - result_spot.fun) / baseline_weight * 100,\n        (baseline_weight - result_nm.fun) / baseline_weight * 100,\n        (baseline_weight - result_bfgs.fun) / baseline_weight * 100\n    ],\n    'Function Evals': [\n        1,\n        result_spot.nfev,\n        result_nm.nfev,\n        result_bfgs.nfev\n    ],\n    'Time (s)': [\n        0.0,\n        spot_time,\n        nm_time,\n        bfgs_time\n    ],\n    'Success': [\n        True,\n        result_spot.success,\n        result_nm.success,\n        result_bfgs.success\n    ]\n})\n\nprint(\"\\n\" + \"=\" * 80)\nprint(\"OPTIMIZATION COMPARISON\")\nprint(\"=\" * 80)\nprint(comparison.to_string(index=False))\nprint(\"=\" * 80)\n\n\n================================================================================\nOPTIMIZATION COMPARISON\n================================================================================\n     Method  Best Weight (lb)  Improvement (%)  Function Evals  Time (s)  Success\n   Baseline        233.908405         0.000000               1  0.000000     True\n  SpotOptim        119.503672        48.910057              30 10.674473     True\nNelder-Mead        220.544928         5.713124              30  0.000850    False\n       BFGS        119.503672        48.910057              60  0.001405    False\n================================================================================",
           "crumbs": [
             "Sequential Parameter Optimization Toolbox (SPOT)",
             "4  Optimizing the Aircraft Wing Weight Example"
      @@ -432,7 +432,7 @@
           "href": "awwe_optimization.html#method-efficiency-comparison",
           "title": "4  Optimizing the Aircraft Wing Weight Example",
           "section": "4.17 Method Efficiency Comparison",
      -    "text": "4.17 Method Efficiency Comparison\n\n# Calculate efficiency metrics\nefficiency = pd.DataFrame({\n    'Method': ['SpotOptim', 'Nelder-Mead', 'BFGS'],\n    'Weight Reduction (lb)': [\n        baseline_weight - result_spot.fun,\n        baseline_weight - result_nm.fun,\n        baseline_weight - result_bfgs.fun\n    ],\n    'Evals to Best': [\n        np.argmin(optimizer_spot.y_) + 1,\n        result_nm.nfev,\n        result_bfgs.nfev\n    ],\n    'Time per Eval (ms)': [\n        spot_time / result_spot.nfev * 1000,\n        nm_time / result_nm.nfev * 1000,\n        bfgs_time / result_bfgs.nfev * 1000\n    ]\n})\n\nprint(\"\\n\" + \"=\" * 80)\nprint(\"METHOD EFFICIENCY METRICS\")\nprint(\"=\" * 80)\nprint(efficiency.to_string(index=False))\nprint(\"=\" * 80)\n\n\n================================================================================\nMETHOD EFFICIENCY METRICS\n================================================================================\n     Method  Weight Reduction (lb)  Evals to Best  Time per Eval (ms)\n  SpotOptim             114.404734             29          366.014528\nNelder-Mead              13.363478             30            0.028570\n       BFGS             114.404734             60            0.025018\n================================================================================",
      +    "text": "4.17 Method Efficiency Comparison\n\n# Calculate efficiency metrics\nefficiency = pd.DataFrame({\n    'Method': ['SpotOptim', 'Nelder-Mead', 'BFGS'],\n    'Weight Reduction (lb)': [\n        baseline_weight - result_spot.fun,\n        baseline_weight - result_nm.fun,\n        baseline_weight - result_bfgs.fun\n    ],\n    'Evals to Best': [\n        np.argmin(optimizer_spot.y_) + 1,\n        result_nm.nfev,\n        result_bfgs.nfev\n    ],\n    'Time per Eval (ms)': [\n        spot_time / result_spot.nfev * 1000,\n        nm_time / result_nm.nfev * 1000,\n        bfgs_time / result_bfgs.nfev * 1000\n    ]\n})\n\nprint(\"\\n\" + \"=\" * 80)\nprint(\"METHOD EFFICIENCY METRICS\")\nprint(\"=\" * 80)\nprint(efficiency.to_string(index=False))\nprint(\"=\" * 80)\n\n\n================================================================================\nMETHOD EFFICIENCY METRICS\n================================================================================\n     Method  Weight Reduction (lb)  Evals to Best  Time per Eval (ms)\n  SpotOptim             114.404734             29          355.815768\nNelder-Mead              13.363478             30            0.028324\n       BFGS             114.404734             60            0.023413\n================================================================================",
           "crumbs": [
             "Sequential Parameter Optimization Toolbox (SPOT)",
             "4  Optimizing the Aircraft Wing Weight Example"
      @@ -564,7 +564,7 @@
           "href": "success_rate.html#second-example",
           "title": "6  Success Rate Tracking in SpotOptim",
           "section": "6.3 Second Example",
      -    "text": "6.3 Second Example\n\nfrom spotoptim import SpotOptim\nimport numpy as np\n\ndef sphere(X):\n    \"\"\"Simple sphere function: f(x) = sum(x^2)\"\"\"\n    return np.sum(X**2, axis=1)\n\n# Create optimizer\noptimizer = SpotOptim(\n    fun=sphere,\n    bounds=[(-5, 5), (-5, 5), (-5, 5)],\n    max_iter=20,\n    n_initial=10,\n    verbose=True\n)\n\n# Run optimization\nresult = optimizer.optimize()\n\n# Check success rate\nprint(f\"Final success rate: {optimizer.success_rate:.2%}\")\nprint(f\"Total evaluations: {optimizer.counter}\")\n\nTensorBoard logging disabled\nInitial best: f(x) = 3.194459\nIter 1 | Best: 2.930449 | Rate: 1.00 | Evals: 55.0%\nIter 2 | Best: 2.114603 | Rate: 1.00 | Evals: 60.0%\nIter 3 | Best: 0.261093 | Rate: 1.00 | Evals: 65.0%\nIter 4 | Best: 0.114542 | Rate: 1.00 | Evals: 70.0%\nIter 5 | Best: 0.021557 | Rate: 1.00 | Evals: 75.0%\nIter 6 | Best: 0.000849 | Rate: 1.00 | Evals: 80.0%\nIter 7 | Best: 0.000589 | Rate: 1.00 | Evals: 85.0%\nIter 8 | Best: 0.000034 | Rate: 1.00 | Evals: 90.0%\nIter 9 | Best: 0.000000 | Rate: 1.00 | Evals: 95.0%\nIter 10 | Best: 0.000000 | Rate: 1.00 | Evals: 100.0%\nFinal success rate: 100.00%\nTotal evaluations: 20",
      +    "text": "6.3 Second Example\n\nfrom spotoptim import SpotOptim\nimport numpy as np\n\ndef sphere(X):\n    \"\"\"Simple sphere function: f(x) = sum(x^2)\"\"\"\n    return np.sum(X**2, axis=1)\n\n# Create optimizer\noptimizer = SpotOptim(\n    fun=sphere,\n    bounds=[(-5, 5), (-5, 5), (-5, 5)],\n    max_iter=20,\n    n_initial=10,\n    verbose=True\n)\n\n# Run optimization\nresult = optimizer.optimize()\n\n# Check success rate\nprint(f\"Final success rate: {optimizer.success_rate:.2%}\")\nprint(f\"Total evaluations: {optimizer.counter}\")\n\nTensorBoard logging disabled\nInitial best: f(x) = 12.204669\nIter 1 | Best: 12.204669 | Curr: 12.210079 | Rate: 0.00 | Evals: 55.0%\nIter 2 | Best: 11.418586 | Rate: 0.50 | Evals: 60.0%\nIter 3 | Best: 6.409408 | Rate: 0.67 | Evals: 65.0%\nIter 4 | Best: 2.374566 | Rate: 0.75 | Evals: 70.0%\nIter 5 | Best: 0.672796 | Rate: 0.80 | Evals: 75.0%\nIter 6 | Best: 0.352679 | Rate: 0.83 | Evals: 80.0%\nIter 7 | Best: 0.002272 | Rate: 0.86 | Evals: 85.0%\nIter 8 | Best: 0.001173 | Rate: 0.88 | Evals: 90.0%\nIter 9 | Best: 0.000027 | Rate: 0.89 | Evals: 95.0%\nIter 10 | Best: 0.000027 | Curr: 0.000039 | Rate: 0.80 | Evals: 100.0%\nFinal success rate: 80.00%\nTotal evaluations: 20",
           "crumbs": [
             "Sequential Parameter Optimization Toolbox (SPOT)",
             "6  Success Rate Tracking in SpotOptim"
      @@ -707,7 +707,7 @@
           "href": "spotoptim_parallel.html#benchmark-example",
           "title": "7  Parallelization",
           "section": "7.3 Benchmark Example",
      -    "text": "7.3 Benchmark Example\nThe following example demonstrates the speedup achieved by using parallelization on a simulated expensive objective function.\n\n7.3.1 Benchmark Script\nWe compare sequential execution (n_jobs=1) against parallel execution (n_jobs=4) for a task simulating 4 independent optimization runs.\n\nimport os\nimport time\nimport warnings\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom spotoptim import SpotOptim\nfrom sklearn.exceptions import ConvergenceWarning\n\nos.environ[\"PYTHONWARNINGS\"] = \"ignore\"\nwarnings.filterwarnings(\"ignore\")\nwarnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n\ndef expensive_objective(X):\n    import time\n    import numpy as np\n    # Simulate a computationally expensive function\n    # Sleep for 0.05 seconds per point\n    n_points = X.shape[0]\n    time.sleep(0.05 * n_points)\n    # Simple sphere function\n    return np.sum(X**2, axis=1)\n\ndef run_benchmark():\n    n_runs = 4\n    n_iter_per_run = 10\n    \n    print(f\"Benchmark Configuration:\")\n    print(f\"  Objective cost: 0.05s per evaluation\")\n    print(f\"  Runs: {n_runs}\")\n    print(f\"  Iters per run: {n_iter_per_run}\")\n\n    # --- Sequential Execution (n_jobs=1) ---\n    print(\"\\nStarting Sequential Benchmark (n_jobs=1)...\")\n    start_seq = time.time()\n    for i in range(n_runs):\n        optimizer = SpotOptim(\n            fun=expensive_objective,\n            bounds=[(-5, 5)] * 2,\n            max_iter=n_iter_per_run,\n            n_initial=5,\n            n_jobs=1,\n            seed=42 + i,\n            verbose=False\n        )\n        optimizer.optimize()\n    end_seq = time.time()\n    time_seq = end_seq - start_seq\n    print(f\"Sequential Total Time: {time_seq:.2f}s\")\n    \n    # --- Parallel Execution (n_jobs=4) ---\n    print(\"\\nStarting Parallel Benchmark (n_jobs=4)...\")\n    start_par = time.time()\n    optimizer_par = SpotOptim(\n        fun=expensive_objective,\n        bounds=[(-5, 5)] * 2,\n        max_iter=n_iter_per_run, \n        n_initial=5,\n        n_jobs=n_runs,   # 4 parallel tasks\n        seed=42,\n        verbose=False\n    )\n    optimizer_par.optimize()\n    end_par = time.time()\n    time_par = end_par - start_par\n    print(f\"Parallel Total Time: {time_par:.2f}s\")\n    \n    # --- Results ---\n    speedup = time_seq / time_par\n    print(\"-\" * 30)\n    print(f\"Speedup: {speedup:.2f}x\")\n\n    # --- Plotting ---\n    labels = ['Sequential', 'Parallel (n_jobs=4)']\n    times = [time_seq, time_par]\n\n    plt.figure(figsize=(8, 6))\n    bars = plt.bar(labels, times, color=['skyblue', 'salmon'])\n    plt.ylabel('Total Time (s)')\n    plt.title(f'Optimization Time Comparison\\n(Speedup: {speedup:.2f}x)')\n    plt.grid(axis='y', linestyle='--', alpha=0.7)\n    \n    # Add text labels on bars\n    for bar in bars:\n        height = bar.get_height()\n        plt.text(bar.get_x() + bar.get_width()/2., height,\n                f'{height:.2f}s',\n                ha='center', va='bottom')\n                \n    plt.show()\n\nif __name__ == \"__main__\":\n    run_benchmark()\n\nBenchmark Configuration:\n  Objective cost: 0.05s per evaluation\n  Runs: 4\n  Iters per run: 10\n\nStarting Sequential Benchmark (n_jobs=1)...\nSequential Total Time: 8.91s\n\nStarting Parallel Benchmark (n_jobs=4)...\nParallel Total Time: 5.64s\n------------------------------\nSpeedup: 1.58x\n\n\n\n\n\n\n\n\n\n\n\n7.3.2 Results\nRunning the benchmark on a standard multi-core machine yields significant speedups. In our test case with a simulated delay of 0.05s per evaluation:\n\nSequential Time: ~7.56s\nParallel Time: ~4.13s\nSpeedup: 1.83x\n\nNote: Actual speedup depends on the overhead of process spawning and the nature of the objective function. For very fast objective functions, the overhead of parallelization might outweigh the benefits.",
      +    "text": "7.3 Benchmark Example\nThe following example demonstrates the speedup achieved by using parallelization on a simulated expensive objective function.\n\n7.3.1 Benchmark Script\nWe compare sequential execution (n_jobs=1) against parallel execution (n_jobs=4) for a task simulating 4 independent optimization runs.\n\nimport os\nimport time\nimport warnings\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom spotoptim import SpotOptim\nfrom sklearn.exceptions import ConvergenceWarning\n\nos.environ[\"PYTHONWARNINGS\"] = \"ignore\"\nwarnings.filterwarnings(\"ignore\")\nwarnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n\ndef expensive_objective(X):\n    import time\n    import numpy as np\n    # Simulate a computationally expensive function\n    # Sleep for 0.05 seconds per point\n    n_points = X.shape[0]\n    time.sleep(0.05 * n_points)\n    # Simple sphere function\n    return np.sum(X**2, axis=1)\n\ndef run_benchmark():\n    n_runs = 4\n    n_iter_per_run = 10\n    \n    print(f\"Benchmark Configuration:\")\n    print(f\"  Objective cost: 0.05s per evaluation\")\n    print(f\"  Runs: {n_runs}\")\n    print(f\"  Iters per run: {n_iter_per_run}\")\n\n    # --- Sequential Execution (n_jobs=1) ---\n    print(\"\\nStarting Sequential Benchmark (n_jobs=1)...\")\n    start_seq = time.time()\n    for i in range(n_runs):\n        optimizer = SpotOptim(\n            fun=expensive_objective,\n            bounds=[(-5, 5)] * 2,\n            max_iter=n_iter_per_run,\n            n_initial=5,\n            n_jobs=1,\n            seed=42 + i,\n            verbose=False\n        )\n        optimizer.optimize()\n    end_seq = time.time()\n    time_seq = end_seq - start_seq\n    print(f\"Sequential Total Time: {time_seq:.2f}s\")\n    \n    # --- Parallel Execution (n_jobs=4) ---\n    print(\"\\nStarting Parallel Benchmark (n_jobs=4)...\")\n    start_par = time.time()\n    optimizer_par = SpotOptim(\n        fun=expensive_objective,\n        bounds=[(-5, 5)] * 2,\n        max_iter=n_iter_per_run, \n        n_initial=5,\n        n_jobs=n_runs,   # 4 parallel tasks\n        seed=42,\n        verbose=False\n    )\n    optimizer_par.optimize()\n    end_par = time.time()\n    time_par = end_par - start_par\n    print(f\"Parallel Total Time: {time_par:.2f}s\")\n    \n    # --- Results ---\n    speedup = time_seq / time_par\n    print(\"-\" * 30)\n    print(f\"Speedup: {speedup:.2f}x\")\n\n    # --- Plotting ---\n    labels = ['Sequential', 'Parallel (n_jobs=4)']\n    times = [time_seq, time_par]\n\n    plt.figure(figsize=(8, 6))\n    bars = plt.bar(labels, times, color=['skyblue', 'salmon'])\n    plt.ylabel('Total Time (s)')\n    plt.title(f'Optimization Time Comparison\\n(Speedup: {speedup:.2f}x)')\n    plt.grid(axis='y', linestyle='--', alpha=0.7)\n    \n    # Add text labels on bars\n    for bar in bars:\n        height = bar.get_height()\n        plt.text(bar.get_x() + bar.get_width()/2., height,\n                f'{height:.2f}s',\n                ha='center', va='bottom')\n                \n    plt.show()\n\nif __name__ == \"__main__\":\n    run_benchmark()\n\nBenchmark Configuration:\n  Objective cost: 0.05s per evaluation\n  Runs: 4\n  Iters per run: 10\n\nStarting Sequential Benchmark (n_jobs=1)...\nSequential Total Time: 8.87s\n\nStarting Parallel Benchmark (n_jobs=4)...\nParallel Total Time: 5.39s\n------------------------------\nSpeedup: 1.65x\n\n\n\n\n\n\n\n\n\n\n\n7.3.2 Results\nRunning the benchmark on a standard multi-core machine yields significant speedups. In our test case with a simulated delay of 0.05s per evaluation:\n\nSequential Time: ~7.56s\nParallel Time: ~4.13s\nSpeedup: 1.83x\n\nNote: Actual speedup depends on the overhead of process spawning and the nature of the objective function. For very fast objective functions, the overhead of parallelization might outweigh the benefits.",
           "crumbs": [
             "Sequential Parameter Optimization Toolbox (SPOT)",
             "7  Parallelization"
      @@ -916,7 +916,7 @@
           "href": "factor_variables.html#quick-start",
           "title": "10  Factor Variables for Categorical Hyperparameters",
           "section": "10.2 Quick Start",
      -    "text": "10.2 Quick Start\n\n10.2.1 Basic Factor Variable Usage\n\nfrom spotoptim import SpotOptim\nimport numpy as np\n\ndef objective_function(X):\n    \"\"\"Objective function receives string values.\"\"\"\n    results = []\n    for params in X:\n        activation = params[0]  # This is a string!\n        print(f\"Testing activation: {activation}\")\n        \n        # Simple scoring based on activation choice (for demonstration)\n        # In real use, you would train a model and return actual performance\n        scores = {\n            \"ReLU\": 3500.0,\n            \"Sigmoid\": 4200.0,\n            \"Tanh\": 3800.0,\n            \"LeakyReLU\": 3600.0\n        }\n        score = scores.get(activation, 5000.0) + np.random.normal(0, 100)\n        results.append(score)\n    return np.array(results)  # Return numpy array\n\n# Define bounds with factor variable\noptimizer = SpotOptim(\n    fun=objective_function,\n    bounds=[(\"ReLU\", \"Sigmoid\", \"Tanh\", \"LeakyReLU\")],\n    var_type=[\"factor\"],\n    max_iter=20,\n    seed=42\n)\n\nresult = optimizer.optimize()\nprint(f\"\\nBest activation: {result.x[0]}\")  # Returns string, e.g., \"ReLU\"\nprint(f\"Best score: {result.fun:.4f}\")\n\nTesting activation: ReLU\nTesting activation: Sigmoid\nTesting activation: Tanh\nTesting activation: LeakyReLU\nTesting activation: Tanh\nTesting activation: Tanh\nTesting activation: Sigmoid\nTesting activation: Tanh\nTesting activation: Tanh\nTesting activation: Tanh\nTesting activation: Sigmoid\nTesting activation: Sigmoid\nTesting activation: ReLU\nTesting activation: Tanh\nTesting activation: LeakyReLU\nTesting activation: LeakyReLU\nTesting activation: LeakyReLU\nTesting activation: Tanh\nTesting activation: Sigmoid\nTesting activation: LeakyReLU\n\nBest activation: LeakyReLU\nBest score: 3427.5082\n\n\n\n\n10.2.2 Neural Network Activation Function Optimization\n\nimport torch\nimport torch.nn as nn\nfrom spotoptim import SpotOptim\nfrom spotoptim.data import get_diabetes_dataloaders\nfrom spotoptim.nn.linear_regressor import LinearRegressor\nimport numpy as np\n\ndef train_and_evaluate(X):\n    \"\"\"Train models with different activation functions.\"\"\"\n    results = []\n    \n    for params in X:\n        activation = params[0]  # String: \"ReLU\", \"Sigmoid\", etc.\n        \n        # Load data\n        train_loader, test_loader, _ = get_diabetes_dataloaders()\n        \n        # Create model with the activation function\n        model = LinearRegressor(\n            input_dim=10,\n            output_dim=1,\n            l1=64,\n            num_hidden_layers=2,\n            activation=activation  # Pass string directly!\n        )\n        \n        # Train model\n        optimizer = model.get_optimizer(\"Adam\", lr=0.01)\n        criterion = nn.MSELoss()\n        \n        for epoch in range(50):\n            model.train()\n            for batch_X, batch_y in train_loader:\n                predictions = model(batch_X)\n                loss = criterion(predictions, batch_y)\n                optimizer.zero_grad()\n                loss.backward()\n                optimizer.step()\n        \n        # Evaluate\n        model.eval()\n        test_loss = 0.0\n        with torch.no_grad():\n            for batch_X, batch_y in test_loader:\n                predictions = model(batch_X)\n                test_loss += criterion(predictions, batch_y).item()\n        \n        avg_loss = test_loss / len(test_loader)\n        results.append(avg_loss)\n    \n    return np.array(results)  # Return numpy array\n\n# Optimize activation function choice\noptimizer = SpotOptim(\n    fun=train_and_evaluate,\n    bounds=[(\"ReLU\", \"Sigmoid\", \"Tanh\", \"LeakyReLU\", \"ELU\")],\n    var_type=[\"factor\"],\n    max_iter=30\n)\n\nresult = optimizer.optimize()\nprint(f\"Best activation function: {result.x[0]}\")\nprint(f\"Best test MSE: {result.fun:.4f}\")\n\nBest activation function: Sigmoid\nBest test MSE: 26493.4544",
      +    "text": "10.2 Quick Start\n\n10.2.1 Basic Factor Variable Usage\n\nfrom spotoptim import SpotOptim\nimport numpy as np\n\ndef objective_function(X):\n    \"\"\"Objective function receives string values.\"\"\"\n    results = []\n    for params in X:\n        activation = params[0]  # This is a string!\n        print(f\"Testing activation: {activation}\")\n        \n        # Simple scoring based on activation choice (for demonstration)\n        # In real use, you would train a model and return actual performance\n        scores = {\n            \"ReLU\": 3500.0,\n            \"Sigmoid\": 4200.0,\n            \"Tanh\": 3800.0,\n            \"LeakyReLU\": 3600.0\n        }\n        score = scores.get(activation, 5000.0) + np.random.normal(0, 100)\n        results.append(score)\n    return np.array(results)  # Return numpy array\n\n# Define bounds with factor variable\noptimizer = SpotOptim(\n    fun=objective_function,\n    bounds=[(\"ReLU\", \"Sigmoid\", \"Tanh\", \"LeakyReLU\")],\n    var_type=[\"factor\"],\n    max_iter=20,\n    seed=42\n)\n\nresult = optimizer.optimize()\nprint(f\"\\nBest activation: {result.x[0]}\")  # Returns string, e.g., \"ReLU\"\nprint(f\"Best score: {result.fun:.4f}\")\n\nTesting activation: ReLU\nTesting activation: Sigmoid\nTesting activation: Tanh\nTesting activation: LeakyReLU\nTesting activation: Tanh\nTesting activation: Tanh\nTesting activation: Sigmoid\nTesting activation: Tanh\nTesting activation: Tanh\nTesting activation: Tanh\nTesting activation: Sigmoid\nTesting activation: Sigmoid\nTesting activation: ReLU\nTesting activation: Tanh\nTesting activation: LeakyReLU\nTesting activation: LeakyReLU\nTesting activation: LeakyReLU\nTesting activation: Tanh\nTesting activation: Sigmoid\nTesting activation: LeakyReLU\n\nBest activation: LeakyReLU\nBest score: 3427.5082\n\n\n\n\n10.2.2 Neural Network Activation Function Optimization\n\nimport torch\nimport torch.nn as nn\nfrom spotoptim import SpotOptim\nfrom spotoptim.data import get_diabetes_dataloaders\nfrom spotoptim.nn.linear_regressor import LinearRegressor\nimport numpy as np\n\ndef train_and_evaluate(X):\n    \"\"\"Train models with different activation functions.\"\"\"\n    results = []\n    \n    for params in X:\n        activation = params[0]  # String: \"ReLU\", \"Sigmoid\", etc.\n        \n        # Load data\n        train_loader, test_loader, _ = get_diabetes_dataloaders()\n        \n        # Create model with the activation function\n        model = LinearRegressor(\n            input_dim=10,\n            output_dim=1,\n            l1=64,\n            num_hidden_layers=2,\n            activation=activation  # Pass string directly!\n        )\n        \n        # Train model\n        optimizer = model.get_optimizer(\"Adam\", lr=0.01)\n        criterion = nn.MSELoss()\n        \n        for epoch in range(50):\n            model.train()\n            for batch_X, batch_y in train_loader:\n                predictions = model(batch_X)\n                loss = criterion(predictions, batch_y)\n                optimizer.zero_grad()\n                loss.backward()\n                optimizer.step()\n        \n        # Evaluate\n        model.eval()\n        test_loss = 0.0\n        with torch.no_grad():\n            for batch_X, batch_y in test_loader:\n                predictions = model(batch_X)\n                test_loss += criterion(predictions, batch_y).item()\n        \n        avg_loss = test_loss / len(test_loader)\n        results.append(avg_loss)\n    \n    return np.array(results)  # Return numpy array\n\n# Optimize activation function choice\noptimizer = SpotOptim(\n    fun=train_and_evaluate,\n    bounds=[(\"ReLU\", \"Sigmoid\", \"Tanh\", \"LeakyReLU\", \"ELU\")],\n    var_type=[\"factor\"],\n    max_iter=30\n)\n\nresult = optimizer.optimize()\nprint(f\"Best activation function: {result.x[0]}\")\nprint(f\"Best test MSE: {result.fun:.4f}\")\n\nBest activation function: Sigmoid\nBest test MSE: 26315.9648",
           "crumbs": [
             "Variables and Hyperparameters",
             "10  Factor Variables for Categorical Hyperparameters"
      @@ -927,7 +927,7 @@
           "href": "factor_variables.html#mixed-variable-types",
           "title": "10  Factor Variables for Categorical Hyperparameters",
           "section": "10.3 Mixed Variable Types",
      -    "text": "10.3 Mixed Variable Types\n\n10.3.1 Combining Factor, Integer, and Continuous Variables\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom spotoptim import SpotOptim\nfrom spotoptim.data import get_diabetes_dataloaders\nfrom spotoptim.nn.linear_regressor import LinearRegressor\n\ndef comprehensive_optimization(X):\n    \"\"\"Optimize learning rate, layer size, depth, and activation.\"\"\"\n    results = []\n    \n    for params in X:\n        log_lr = params[0]      # Continuous (log scale)\n        l1 = int(params[1])     # Integer\n        n_layers = int(params[2])  # Integer\n        activation = params[3]   # Factor (string)\n        \n        lr = 10 ** log_lr  # Convert from log scale\n        \n        print(f\"lr={lr:.6f}, l1={l1}, layers={n_layers}, activation={activation}\")\n        \n        # Load data\n        train_loader, test_loader, _ = get_diabetes_dataloaders(\n            batch_size=32,\n            random_state=42\n        )\n        \n        # Create model\n        model = LinearRegressor(\n            input_dim=10,\n            output_dim=1,\n            l1=l1,\n            num_hidden_layers=n_layers,\n            activation=activation\n        )\n        \n        # Train\n        optimizer = model.get_optimizer(\"Adam\", lr=lr)\n        criterion = nn.MSELoss()\n        \n        for epoch in range(30):\n            model.train()\n            for batch_X, batch_y in train_loader:\n                predictions = model(batch_X)\n                loss = criterion(predictions, batch_y)\n                optimizer.zero_grad()\n                loss.backward()\n                optimizer.step()\n        \n        # Evaluate\n        model.eval()\n        test_loss = 0.0\n        with torch.no_grad():\n            for batch_X, batch_y in test_loader:\n                predictions = model(batch_X)\n                test_loss += criterion(predictions, batch_y).item()\n        \n        results.append(test_loss / len(test_loader))\n    \n    return np.array(results)\n\n# Optimize all four hyperparameters simultaneously\noptimizer = SpotOptim(\n    fun=comprehensive_optimization,\n    bounds=[\n        (-4, -2),                                    # log10(learning_rate)\n        (16, 128),                                   # l1 (neurons per layer)\n        (0, 4),                                      # num_hidden_layers\n        (\"ReLU\", \"Sigmoid\", \"Tanh\", \"LeakyReLU\")   # activation function\n    ],\n    var_type=[\"float\", \"int\", \"int\", \"factor\"],\n    max_iter=50\n)\n\nresult = optimizer.optimize()\n\n# Results contain original string values\nprint(\"\\nOptimization Results:\")\nprint(f\"Best learning rate: {10**result.x[0]:.6f}\")\nprint(f\"Best layer size: {int(result.x[1])}\")\nprint(f\"Best num layers: {int(result.x[2])}\")\nprint(f\"Best activation: {result.x[3]}\")  # String value!\nprint(f\"Best test MSE: {result.fun:.4f}\")\n\nlr=0.000112, l1=24, layers=2, activation=LeakyReLU\nlr=0.000274, l1=60, layers=4, activation=Sigmoid\nlr=0.003498, l1=84, layers=2, activation=Tanh\nlr=0.001025, l1=126, layers=3, activation=Tanh\nlr=0.004205, l1=40, layers=4, activation=Sigmoid\nlr=0.000226, l1=75, layers=0, activation=Tanh\nlr=0.000737, l1=104, layers=1, activation=Tanh\nlr=0.009249, l1=31, layers=1, activation=ReLU\nlr=0.002215, l1=114, layers=2, activation=Sigmoid\nlr=0.000611, l1=61, layers=1, activation=ReLU\nlr=0.002219, l1=114, layers=2, activation=Sigmoid\nlr=0.002576, l1=67, layers=1, activation=Sigmoid\nlr=0.000629, l1=58, layers=3, activation=Tanh\nlr=0.000411, l1=98, layers=4, activation=Sigmoid\nlr=0.000583, l1=59, layers=0, activation=LeakyReLU\nlr=0.000192, l1=67, layers=1, activation=ReLU\nlr=0.000932, l1=92, layers=2, activation=LeakyReLU\nlr=0.003979, l1=104, layers=1, activation=Sigmoid\nlr=0.002088, l1=60, layers=2, activation=ReLU\nlr=0.000104, l1=127, layers=3, activation=Tanh\nlr=0.000137, l1=61, layers=4, activation=ReLU\nlr=0.001209, l1=38, layers=4, activation=Sigmoid\nlr=0.000309, l1=77, layers=3, activation=ReLU\nlr=0.000203, l1=67, layers=3, activation=LeakyReLU\nlr=0.002571, l1=37, layers=4, activation=LeakyReLU\nlr=0.001055, l1=93, layers=1, activation=Tanh\nlr=0.000122, l1=34, layers=4, activation=ReLU\nlr=0.000116, l1=124, layers=3, activation=Sigmoid\nlr=0.000184, l1=35, layers=0, activation=Sigmoid\nlr=0.000101, l1=25, layers=2, activation=Tanh\nlr=0.009519, l1=95, layers=3, activation=Sigmoid\nlr=0.006033, l1=120, layers=3, activation=Tanh\nlr=0.003073, l1=72, layers=3, activation=Tanh\nlr=0.000986, l1=38, layers=1, activation=ReLU\nlr=0.004182, l1=97, layers=1, activation=Sigmoid\nlr=0.001686, l1=85, layers=4, activation=Sigmoid\nlr=0.000631, l1=77, layers=2, activation=LeakyReLU\nlr=0.003100, l1=71, layers=4, activation=ReLU\nlr=0.000980, l1=33, layers=2, activation=ReLU\nlr=0.000115, l1=76, layers=3, activation=ReLU\nlr=0.001067, l1=126, layers=2, activation=Tanh\nlr=0.001209, l1=70, layers=1, activation=ReLU\nlr=0.000643, l1=59, layers=0, activation=Tanh\nlr=0.000107, l1=82, layers=3, activation=Sigmoid\nlr=0.006051, l1=73, layers=2, activation=Sigmoid\nlr=0.004060, l1=123, layers=1, activation=Sigmoid\nlr=0.005555, l1=76, layers=2, activation=Sigmoid\nlr=0.002506, l1=74, layers=1, activation=ReLU\nlr=0.000172, l1=128, layers=2, activation=Sigmoid\nlr=0.001106, l1=72, layers=3, activation=Tanh\n\nOptimization Results:\nBest learning rate: 0.004060\nBest layer size: 123\nBest num layers: 1\nBest activation: Sigmoid\nBest test MSE: 26416.2917",
      +    "text": "10.3 Mixed Variable Types\n\n10.3.1 Combining Factor, Integer, and Continuous Variables\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom spotoptim import SpotOptim\nfrom spotoptim.data import get_diabetes_dataloaders\nfrom spotoptim.nn.linear_regressor import LinearRegressor\n\ndef comprehensive_optimization(X):\n    \"\"\"Optimize learning rate, layer size, depth, and activation.\"\"\"\n    results = []\n    \n    for params in X:\n        log_lr = params[0]      # Continuous (log scale)\n        l1 = int(params[1])     # Integer\n        n_layers = int(params[2])  # Integer\n        activation = params[3]   # Factor (string)\n        \n        lr = 10 ** log_lr  # Convert from log scale\n        \n        print(f\"lr={lr:.6f}, l1={l1}, layers={n_layers}, activation={activation}\")\n        \n        # Load data\n        train_loader, test_loader, _ = get_diabetes_dataloaders(\n            batch_size=32,\n            random_state=42\n        )\n        \n        # Create model\n        model = LinearRegressor(\n            input_dim=10,\n            output_dim=1,\n            l1=l1,\n            num_hidden_layers=n_layers,\n            activation=activation\n        )\n        \n        # Train\n        optimizer = model.get_optimizer(\"Adam\", lr=lr)\n        criterion = nn.MSELoss()\n        \n        for epoch in range(30):\n            model.train()\n            for batch_X, batch_y in train_loader:\n                predictions = model(batch_X)\n                loss = criterion(predictions, batch_y)\n                optimizer.zero_grad()\n                loss.backward()\n                optimizer.step()\n        \n        # Evaluate\n        model.eval()\n        test_loss = 0.0\n        with torch.no_grad():\n            for batch_X, batch_y in test_loader:\n                predictions = model(batch_X)\n                test_loss += criterion(predictions, batch_y).item()\n        \n        results.append(test_loss / len(test_loader))\n    \n    return np.array(results)\n\n# Optimize all four hyperparameters simultaneously\noptimizer = SpotOptim(\n    fun=comprehensive_optimization,\n    bounds=[\n        (-4, -2),                                    # log10(learning_rate)\n        (16, 128),                                   # l1 (neurons per layer)\n        (0, 4),                                      # num_hidden_layers\n        (\"ReLU\", \"Sigmoid\", \"Tanh\", \"LeakyReLU\")   # activation function\n    ],\n    var_type=[\"float\", \"int\", \"int\", \"factor\"],\n    max_iter=50\n)\n\nresult = optimizer.optimize()\n\n# Results contain original string values\nprint(\"\\nOptimization Results:\")\nprint(f\"Best learning rate: {10**result.x[0]:.6f}\")\nprint(f\"Best layer size: {int(result.x[1])}\")\nprint(f\"Best num layers: {int(result.x[2])}\")\nprint(f\"Best activation: {result.x[3]}\")  # String value!\nprint(f\"Best test MSE: {result.fun:.4f}\")\n\nlr=0.000957, l1=114, layers=3, activation=Sigmoid\nlr=0.003554, l1=43, layers=1, activation=ReLU\nlr=0.000222, l1=78, layers=2, activation=LeakyReLU\nlr=0.005857, l1=30, layers=3, activation=Tanh\nlr=0.000476, l1=64, layers=0, activation=Tanh\nlr=0.001121, l1=119, layers=1, activation=Sigmoid\nlr=0.009714, l1=17, layers=4, activation=ReLU\nlr=0.000317, l1=51, layers=2, activation=Sigmoid\nlr=0.001837, l1=93, layers=2, activation=LeakyReLU\nlr=0.000137, l1=97, layers=4, activation=Tanh\nlr=0.000263, l1=52, layers=2, activation=Sigmoid\nlr=0.000350, l1=50, layers=2, activation=Sigmoid\nlr=0.000316, l1=51, layers=2, activation=Sigmoid\nlr=0.004128, l1=120, layers=2, activation=Tanh\nlr=0.007577, l1=46, layers=2, activation=LeakyReLU\nlr=0.007774, l1=83, layers=3, activation=Tanh\nlr=0.000527, l1=62, layers=3, activation=Sigmoid\nlr=0.000321, l1=115, layers=1, activation=ReLU\nlr=0.006431, l1=51, layers=3, activation=Tanh\nlr=0.000318, l1=51, layers=2, activation=Sigmoid\nlr=0.005516, l1=67, layers=0, activation=Sigmoid\nlr=0.001699, l1=86, layers=2, activation=Sigmoid\nlr=0.003052, l1=26, layers=2, activation=LeakyReLU\nlr=0.000322, l1=84, layers=2, activation=Tanh\nlr=0.000107, l1=56, layers=4, activation=Sigmoid\nlr=0.000408, l1=26, layers=3, activation=Sigmoid\nlr=0.000114, l1=116, layers=2, activation=Sigmoid\nlr=0.004964, l1=55, layers=2, activation=LeakyReLU\nlr=0.000358, l1=65, layers=1, activation=LeakyReLU\nlr=0.000316, l1=51, layers=2, activation=Sigmoid\nlr=0.003022, l1=62, layers=2, activation=Tanh\nlr=0.000317, l1=51, layers=2, activation=Sigmoid\nlr=0.000120, l1=104, layers=4, activation=LeakyReLU\nlr=0.002331, l1=109, layers=3, activation=Tanh\nlr=0.000106, l1=48, layers=2, activation=Tanh\nlr=0.000198, l1=57, layers=2, activation=LeakyReLU\nlr=0.000317, l1=51, layers=2, activation=Sigmoid\nlr=0.000437, l1=38, layers=1, activation=Tanh\nlr=0.001433, l1=86, layers=1, activation=LeakyReLU\nlr=0.000211, l1=127, layers=2, activation=Sigmoid\nlr=0.001690, l1=53, layers=2, activation=Tanh\nlr=0.000586, l1=86, layers=4, activation=ReLU\nlr=0.005586, l1=125, layers=3, activation=LeakyReLU\nlr=0.000144, l1=106, layers=3, activation=ReLU\nlr=0.006845, l1=117, layers=2, activation=LeakyReLU\nlr=0.005839, l1=63, layers=1, activation=LeakyReLU\nlr=0.003284, l1=124, layers=2, activation=ReLU\nlr=0.000968, l1=41, layers=3, activation=LeakyReLU\nlr=0.001697, l1=39, layers=2, activation=Tanh\nlr=0.002121, l1=91, layers=3, activation=Sigmoid\n\nOptimization Results:\nBest learning rate: 0.000317\nBest layer size: 51\nBest num layers: 2\nBest activation: Sigmoid\nBest test MSE: 26490.0690",
           "crumbs": [
             "Variables and Hyperparameters",
             "10  Factor Variables for Categorical Hyperparameters"
      @@ -938,7 +938,7 @@
           "href": "factor_variables.html#multiple-factor-variables",
           "title": "10  Factor Variables for Categorical Hyperparameters",
           "section": "10.4 Multiple Factor Variables",
      -    "text": "10.4 Multiple Factor Variables\n\n10.4.1 Optimizing Both Activation and Optimizer\n\nfrom spotoptim import SpotOptim\nfrom spotoptim.data import get_diabetes_dataloaders\nfrom spotoptim.nn.linear_regressor import LinearRegressor\nimport torch.nn as nn\nimport numpy as np\n\ndef optimize_activation_and_optimizer(X):\n    \"\"\"Optimize both activation function and optimizer choice.\"\"\"\n    results = []\n    \n    for params in X:\n        activation = params[0]      # Factor variable 1\n        optimizer_name = params[1]  # Factor variable 2\n        lr = 10 ** params[2]        # Continuous variable\n        \n        train_loader, test_loader, _ = get_diabetes_dataloaders()\n        \n        model = LinearRegressor(\n            input_dim=10,\n            output_dim=1,\n            l1=64,\n            num_hidden_layers=2,\n            activation=activation\n        )\n        \n        # Use the optimizer string\n        optimizer = model.get_optimizer(optimizer_name, lr=lr)\n        criterion = nn.MSELoss()\n        \n        # Train\n        for epoch in range(30):\n            model.train()\n            for batch_X, batch_y in train_loader:\n                predictions = model(batch_X)\n                loss = criterion(predictions, batch_y)\n                optimizer.zero_grad()\n                loss.backward()\n                optimizer.step()\n        \n        # Evaluate\n        model.eval()\n        test_loss = 0.0\n        with torch.no_grad():\n            for batch_X, batch_y in test_loader:\n                predictions = model(batch_X)\n                test_loss += criterion(predictions, batch_y).item()\n        \n        results.append(test_loss / len(test_loader))\n    \n    return np.array(results)  # Return numpy array\n\n# Two factor variables + one continuous\nopt = SpotOptim(\n    fun=optimize_activation_and_optimizer,\n    bounds=[\n        (\"ReLU\", \"Tanh\", \"Sigmoid\", \"LeakyReLU\"),    # Activation\n        (\"Adam\", \"SGD\", \"RMSprop\", \"AdamW\"),         # Optimizer\n        (-4, -2)                                      # log10(lr)\n    ],\n    var_type=[\"factor\", \"factor\", \"float\"],\n    max_iter=40\n)\n\nresult = opt.optimize()\nprint(f\"Best activation: {result.x[0]}\")\nprint(f\"Best optimizer: {result.x[1]}\")\nprint(f\"Best learning rate: {10**result.x[2]:.6f}\")\n\nBest activation: ReLU\nBest optimizer: SGD\nBest learning rate: 0.006053",
      +    "text": "10.4 Multiple Factor Variables\n\n10.4.1 Optimizing Both Activation and Optimizer\n\nfrom spotoptim import SpotOptim\nfrom spotoptim.data import get_diabetes_dataloaders\nfrom spotoptim.nn.linear_regressor import LinearRegressor\nimport torch.nn as nn\nimport numpy as np\n\ndef optimize_activation_and_optimizer(X):\n    \"\"\"Optimize both activation function and optimizer choice.\"\"\"\n    results = []\n    \n    for params in X:\n        activation = params[0]      # Factor variable 1\n        optimizer_name = params[1]  # Factor variable 2\n        lr = 10 ** params[2]        # Continuous variable\n        \n        train_loader, test_loader, _ = get_diabetes_dataloaders()\n        \n        model = LinearRegressor(\n            input_dim=10,\n            output_dim=1,\n            l1=64,\n            num_hidden_layers=2,\n            activation=activation\n        )\n        \n        # Use the optimizer string\n        optimizer = model.get_optimizer(optimizer_name, lr=lr)\n        criterion = nn.MSELoss()\n        \n        # Train\n        for epoch in range(30):\n            model.train()\n            for batch_X, batch_y in train_loader:\n                predictions = model(batch_X)\n                loss = criterion(predictions, batch_y)\n                optimizer.zero_grad()\n                loss.backward()\n                optimizer.step()\n        \n        # Evaluate\n        model.eval()\n        test_loss = 0.0\n        with torch.no_grad():\n            for batch_X, batch_y in test_loader:\n                predictions = model(batch_X)\n                test_loss += criterion(predictions, batch_y).item()\n        \n        results.append(test_loss / len(test_loader))\n    \n    return np.array(results)  # Return numpy array\n\n# Two factor variables + one continuous\nopt = SpotOptim(\n    fun=optimize_activation_and_optimizer,\n    bounds=[\n        (\"ReLU\", \"Tanh\", \"Sigmoid\", \"LeakyReLU\"),    # Activation\n        (\"Adam\", \"SGD\", \"RMSprop\", \"AdamW\"),         # Optimizer\n        (-4, -2)                                      # log10(lr)\n    ],\n    var_type=[\"factor\", \"factor\", \"float\"],\n    max_iter=40\n)\n\nresult = opt.optimize()\nprint(f\"Best activation: {result.x[0]}\")\nprint(f\"Best optimizer: {result.x[1]}\")\nprint(f\"Best learning rate: {10**result.x[2]:.6f}\")\n\nBest activation: LeakyReLU\nBest optimizer: SGD\nBest learning rate: 0.007715",
           "crumbs": [
             "Variables and Hyperparameters",
             "10  Factor Variables for Categorical Hyperparameters"
      @@ -1092,7 +1092,7 @@
           "href": "transformations.html#viewing-transformations-in-tables",
           "title": "11  Variable Transformations for Search Space Scaling",
           "section": "11.6 Viewing Transformations in Tables",
      -    "text": "11.6 Viewing Transformations in Tables\nThe transformation type is displayed in the “trans” column of both design and results tables:\n\n11.6.1 Design Table (Before Optimization)\n\nfrom spotoptim import SpotOptim\nimport numpy as np\n\noptimizer = SpotOptim(\n    fun=lambda X: np.sum(X**2, axis=1),\n    bounds=[\n        (0.001, 1.0),\n        (0.01, 10.0),\n        (10, 1000),\n        (-5, 5)\n    ],\n    var_trans=[\"log10\", \"log\", \"sqrt\", None],\n    var_name=[\"lr\", \"alpha\", \"neurons\", \"bias\"],\n    max_iter=10\n)\n\n# Display design table\nprint(optimizer.get_design_table())\n\n|    name |   type |   lower |   upper |   default |   transform |\n|---------|--------|---------|---------|-----------|-------------|\n|      lr |  float |   0.001 |       1 |    0.5005 |       log10 |\n|   alpha |  float |    0.01 |      10 |     5.005 |         log |\n| neurons |  float |      10 |    1000 |       505 |        sqrt |\n|    bias |  float |      -5 |       5 |         0 |           - |\n\n\nOutput:\n| name    | type   |    lower |    upper |   default | trans   |\n|---------|--------|----------|----------|-----------|---------|\n| lr      | num    |   0.0010 |   1.0000 |    0.5005 | log10   |\n| alpha   | num    |   0.0100 |  10.0000 |    5.0050 | log     |\n| neurons | num    |  10.0000 | 1000.0000 |  505.0000 | sqrt    |\n| bias    | num    |  -5.0000 |   5.0000 |    0.0000 | -       |\n\n\n11.6.2 Results Table (After Optimization)\n\nresult = optimizer.optimize()\n\n# Display results with transformations\nprint(optimizer.get_results_table())\n\n|    name |   type |   default |   lower |   upper |   tuned |   transform |\n|---------|--------|-----------|---------|---------|---------|-------------|\n|      lr |  float |    0.5005 |   0.001 |       1 |  0.0085 |       log10 |\n|   alpha |  float |     5.005 |    0.01 |      10 |  0.1281 |         log |\n| neurons |  float |       505 |      10 |    1000 | 31.1461 |        sqrt |\n|    bias |  float |         0 |      -5 |       5 |  0.0908 |           - |\n\n\nOutput shows the “trans” column with transformation types, helping you understand which parameters were optimized in which scale.",
      +    "text": "11.6 Viewing Transformations in Tables\nThe transformation type is displayed in the “trans” column of both design and results tables:\n\n11.6.1 Design Table (Before Optimization)\n\nfrom spotoptim import SpotOptim\nimport numpy as np\n\noptimizer = SpotOptim(\n    fun=lambda X: np.sum(X**2, axis=1),\n    bounds=[\n        (0.001, 1.0),\n        (0.01, 10.0),\n        (10, 1000),\n        (-5, 5)\n    ],\n    var_trans=[\"log10\", \"log\", \"sqrt\", None],\n    var_name=[\"lr\", \"alpha\", \"neurons\", \"bias\"],\n    max_iter=10\n)\n\n# Display design table\nprint(optimizer.get_design_table())\n\n|    name |   type |   lower |   upper |   default |   transform |\n|---------|--------|---------|---------|-----------|-------------|\n|      lr |  float |   0.001 |       1 |    0.5005 |       log10 |\n|   alpha |  float |    0.01 |      10 |     5.005 |         log |\n| neurons |  float |      10 |    1000 |       505 |        sqrt |\n|    bias |  float |      -5 |       5 |         0 |           - |\n\n\nOutput:\n| name    | type   |    lower |    upper |   default | trans   |\n|---------|--------|----------|----------|-----------|---------|\n| lr      | num    |   0.0010 |   1.0000 |    0.5005 | log10   |\n| alpha   | num    |   0.0100 |  10.0000 |    5.0050 | log     |\n| neurons | num    |  10.0000 | 1000.0000 |  505.0000 | sqrt    |\n| bias    | num    |  -5.0000 |   5.0000 |    0.0000 | -       |\n\n\n11.6.2 Results Table (After Optimization)\n\nresult = optimizer.optimize()\n\n# Display results with transformations\nprint(optimizer.get_results_table())\n\n|    name |   type |   default |   lower |   upper |   tuned |   transform |\n|---------|--------|-----------|---------|---------|---------|-------------|\n|      lr |  float |    0.5005 |   0.001 |       1 |  0.0041 |       log10 |\n|   alpha |  float |     5.005 |    0.01 |      10 |  0.1308 |         log |\n| neurons |  float |       505 |      10 |    1000 | 21.3053 |        sqrt |\n|    bias |  float |         0 |      -5 |       5 | -1.2417 |           - |\n\n\nOutput shows the “trans” column with transformation types, helping you understand which parameters were optimized in which scale.",
           "crumbs": [
             "Variables and Hyperparameters",
             "11  Variable Transformations for Search Space Scaling"
      @@ -1290,7 +1290,7 @@
           "href": "tensorboard.html#quick-start",
           "title": "13  TensorBoard Logging in SpotOptim",
           "section": "",
      -    "text": "13.1.1 Enable TensorBoard Logging\n\nfrom spotoptim import SpotOptim\nimport numpy as np\n\ndef sphere(X):\n    return np.sum(X**2, axis=1)\n\noptimizer = SpotOptim(\n    fun=sphere,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=20,\n    n_initial=10,\n    tensorboard_log=True,  # Enable logging\n    tensorboard_clean=True,\n    verbose=True,\n    seed=42\n)\n\nresult = optimizer.optimize()\nprint(f\"Best value: {result.fun:.6f}\")\nprint(f\"Logs saved to: runs/{optimizer.tensorboard_path}\")\n\nRemoved old TensorBoard logs: runs/spotoptim_20260404_093122\nCleaned 1 old TensorBoard log directory\nTensorBoard logging enabled: runs/spotoptim_20260404_094042\nInitial best: f(x) = 3.925721\nIter 1 | Best: 0.020506 | Rate: 1.00 | Evals: 55.0%\nIter 2 | Best: 0.000751 | Rate: 1.00 | Evals: 60.0%\nIter 3 | Best: 0.000024 | Rate: 1.00 | Evals: 65.0%\nIter 4 | Best: 0.000000 | Rate: 1.00 | Evals: 70.0%\nIter 5 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.80 | Evals: 75.0%\nIter 6 | Best: 0.000000 | Rate: 0.83 | Evals: 80.0%\nIter 7 | Best: 0.000000 | Rate: 0.86 | Evals: 85.0%\nIter 8 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.75 | Evals: 90.0%\nIter 9 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.67 | Evals: 95.0%\nIter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.60 | Evals: 100.0%\nTensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260404_094042\nBest value: 0.000000\nLogs saved to: runs/runs/spotoptim_20260404_094042\n\n\n\n\n13.1.2 View Logs in TensorBoard\nIn a separate terminal, run:\ntensorboard --logdir=runs\nThen open your browser to http://localhost:6006\n\n\n13.1.3 Cleaning Old Logs\nYou can automatically remove old TensorBoard logs before starting a new optimization:\noptimizer = SpotOptim(\n    fun=objective,\n    bounds=[(-5, 5), (-5, 5)],\n    tensorboard_log=True,\n    tensorboard_clean=True,  # Remove old logs from 'runs' directory\n    verbose=True\n)\n\n\n\n\n\n\nNoteWarning\n\n\n\nThis permanently deletes all subdirectories in the runs folder. Make sure to save important logs elsewhere before enabling this feature.\n\n\n\n\n13.1.4 Use Cases\n\nClean Start - Remove old logs and create new one:\ntensorboard_log=True, tensorboard_clean=True\nPreserve History - Keep old logs and add new one (default):\ntensorboard_log=True, tensorboard_clean=False\nJust Clean - Remove old logs without new logging:\ntensorboard_log=False, tensorboard_clean=True\n\n\n\n13.1.5 Custom Log Directory\nSpecify a custom path for TensorBoard logs:\noptimizer = SpotOptim(\n    fun=objective,\n    bounds=[(-5, 5), (-5, 5)],\n    tensorboard_log=True,\n    tensorboard_path=\"my_experiments/run_001\",\n    ...\n)\n\n\n13.1.6 What Gets Logged\n\n13.1.6.1 Scalar Metrics\nFor Deterministic Functions:\n\ny_values/min: Best (minimum) y value found so far\ny_values/last: Most recently evaluated y value\nX_best/x0, X_best/x1, ...: Coordinates of the best point\n\nFor Noisy Functions (repeats > 1):\n\ny_values/min: Best single evaluation\ny_values/mean_best: Best mean y value\ny_values/last: Most recent evaluation\ny_variance_at_best: Variance at the best mean point\nX_mean_best/x0, X_mean_best/x1, ...: Coordinates of best mean point\n\n\n\n13.1.6.2 Hyperparameters\nEach function evaluation is logged with:\n\nInput coordinates (x0, x1, x2, …)\nFunction value (hp_metric)\n\nThis allows you to explore the relationship between hyperparameters and objective values in the HPARAMS tab.\n\n\n\n13.1.7 Examples\n\nExample 13.1 (Basic Tensorboard Usage)  \n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\noptimizer = SpotOptim(\n    fun=lambda X: np.sum(X**2, axis=1),\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=20,\n    n_initial=10,\n    tensorboard_log=True,\n    tensorboard_clean=True,\n    verbose=True,\n    seed=42\n)\nresult = optimizer.optimize()\nprint(f\"Best value: {result.fun:.6f}\")\n\nRemoved old TensorBoard logs: runs/spotoptim_20260404_094042\nCleaned 1 old TensorBoard log directory\nTensorBoard logging enabled: runs/spotoptim_20260404_094050\nInitial best: f(x) = 3.925721\nIter 1 | Best: 0.020506 | Rate: 1.00 | Evals: 55.0%\nIter 2 | Best: 0.000751 | Rate: 1.00 | Evals: 60.0%\nIter 3 | Best: 0.000024 | Rate: 1.00 | Evals: 65.0%\nIter 4 | Best: 0.000000 | Rate: 1.00 | Evals: 70.0%\nIter 5 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.80 | Evals: 75.0%\nIter 6 | Best: 0.000000 | Rate: 0.83 | Evals: 80.0%\nIter 7 | Best: 0.000000 | Rate: 0.86 | Evals: 85.0%\nIter 8 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.75 | Evals: 90.0%\nIter 9 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.67 | Evals: 95.0%\nIter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.60 | Evals: 100.0%\nTensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260404_094050\nBest value: 0.000000\n\n\n\n\nExample 13.2 (Noisy Optimization)  \n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef noisy_objective(X):\n    base = np.sum(X**2, axis=1)\n    noise = np.random.normal(0, 0.1, size=base.shape)\n    return base + noise\n\noptimizer = SpotOptim(\n    fun=noisy_objective,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=20,\n    n_initial=10,\n    repeats_initial=3,\n    repeats_surrogate=2,\n    tensorboard_log=True,\n    tensorboard_clean=True,\n    tensorboard_path=\"runs/noisy_exp\",\n    seed=42\n)\nresult = optimizer.optimize()\nprint(f\"Best value: {result.fun:.6f}\")\n\nBest value: 3.784490\n\n\n\n\nExample 13.3 (With OCBA)  \n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef noisy_objective(X):\n    base = np.sum(X**2, axis=1)\n    noise = np.random.normal(0, 0.1, size=base.shape)\n    return base + noise\n\noptimizer = SpotOptim(\n    fun=noisy_objective,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=20,\n    n_initial=10,\n    repeats_initial=2,\n    ocba_delta=3,  # Re-evaluate 3 promising points per iteration\n    tensorboard_log=True,\n    tensorboard_clean=True,\n    seed=42\n)\nresult = optimizer.optimize()\nprint(f\"Best value: {result.fun:.6f}\")\n\nBest value: 3.734393\n\n\n\n\nExample 13.4 (Comparing Multiple Runs) Run multiple optimizations with different settings:\n# Run 1: Standard\nopt1 = SpotOptim(..., tensorboard_path=\"runs/standard\")\nopt1.optimize()\n\n# Run 2: With OCBA\nopt2 = SpotOptim(..., ocba_delta=3, tensorboard_path=\"runs/with_ocba\")\nopt2.optimize()\n\n# Run 3: More initial points\nopt3 = SpotOptim(..., n_initial=20, tensorboard_path=\"runs/more_initial\")\nopt3.optimize()\nThen view all runs together:\ntensorboard --logdir=runs",
      +    "text": "13.1.1 Enable TensorBoard Logging\n\nfrom spotoptim import SpotOptim\nimport numpy as np\n\ndef sphere(X):\n    return np.sum(X**2, axis=1)\n\noptimizer = SpotOptim(\n    fun=sphere,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=20,\n    n_initial=10,\n    tensorboard_log=True,  # Enable logging\n    tensorboard_clean=True,\n    verbose=True,\n    seed=42\n)\n\nresult = optimizer.optimize()\nprint(f\"Best value: {result.fun:.6f}\")\nprint(f\"Logs saved to: runs/{optimizer.tensorboard_path}\")\n\nRemoved old TensorBoard logs: runs/spotoptim_20260411_212846\nCleaned 1 old TensorBoard log directory\nTensorBoard logging enabled: runs/spotoptim_20260411_213759\nInitial best: f(x) = 3.925721\nIter 1 | Best: 0.020506 | Rate: 1.00 | Evals: 55.0%\nIter 2 | Best: 0.000751 | Rate: 1.00 | Evals: 60.0%\nIter 3 | Best: 0.000024 | Rate: 1.00 | Evals: 65.0%\nIter 4 | Best: 0.000000 | Rate: 1.00 | Evals: 70.0%\nIter 5 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.80 | Evals: 75.0%\nIter 6 | Best: 0.000000 | Rate: 0.83 | Evals: 80.0%\nIter 7 | Best: 0.000000 | Rate: 0.86 | Evals: 85.0%\nIter 8 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.75 | Evals: 90.0%\nIter 9 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.67 | Evals: 95.0%\nIter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.60 | Evals: 100.0%\nTensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260411_213759\nBest value: 0.000000\nLogs saved to: runs/runs/spotoptim_20260411_213759\n\n\n\n\n13.1.2 View Logs in TensorBoard\nIn a separate terminal, run:\ntensorboard --logdir=runs\nThen open your browser to http://localhost:6006\n\n\n13.1.3 Cleaning Old Logs\nYou can automatically remove old TensorBoard logs before starting a new optimization:\noptimizer = SpotOptim(\n    fun=objective,\n    bounds=[(-5, 5), (-5, 5)],\n    tensorboard_log=True,\n    tensorboard_clean=True,  # Remove old logs from 'runs' directory\n    verbose=True\n)\n\n\n\n\n\n\nNoteWarning\n\n\n\nThis permanently deletes all subdirectories in the runs folder. Make sure to save important logs elsewhere before enabling this feature.\n\n\n\n\n13.1.4 Use Cases\n\nClean Start - Remove old logs and create new one:\ntensorboard_log=True, tensorboard_clean=True\nPreserve History - Keep old logs and add new one (default):\ntensorboard_log=True, tensorboard_clean=False\nJust Clean - Remove old logs without new logging:\ntensorboard_log=False, tensorboard_clean=True\n\n\n\n13.1.5 Custom Log Directory\nSpecify a custom path for TensorBoard logs:\noptimizer = SpotOptim(\n    fun=objective,\n    bounds=[(-5, 5), (-5, 5)],\n    tensorboard_log=True,\n    tensorboard_path=\"my_experiments/run_001\",\n    ...\n)\n\n\n13.1.6 What Gets Logged\n\n13.1.6.1 Scalar Metrics\nFor Deterministic Functions:\n\ny_values/min: Best (minimum) y value found so far\ny_values/last: Most recently evaluated y value\nX_best/x0, X_best/x1, ...: Coordinates of the best point\n\nFor Noisy Functions (repeats > 1):\n\ny_values/min: Best single evaluation\ny_values/mean_best: Best mean y value\ny_values/last: Most recent evaluation\ny_variance_at_best: Variance at the best mean point\nX_mean_best/x0, X_mean_best/x1, ...: Coordinates of best mean point\n\n\n\n13.1.6.2 Hyperparameters\nEach function evaluation is logged with:\n\nInput coordinates (x0, x1, x2, …)\nFunction value (hp_metric)\n\nThis allows you to explore the relationship between hyperparameters and objective values in the HPARAMS tab.\n\n\n\n13.1.7 Examples\n\nExample 13.1 (Basic Tensorboard Usage)  \n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\noptimizer = SpotOptim(\n    fun=lambda X: np.sum(X**2, axis=1),\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=20,\n    n_initial=10,\n    tensorboard_log=True,\n    tensorboard_clean=True,\n    verbose=True,\n    seed=42\n)\nresult = optimizer.optimize()\nprint(f\"Best value: {result.fun:.6f}\")\n\nRemoved old TensorBoard logs: runs/spotoptim_20260411_213759\nCleaned 1 old TensorBoard log directory\nTensorBoard logging enabled: runs/spotoptim_20260411_213807\nInitial best: f(x) = 3.925721\nIter 1 | Best: 0.020506 | Rate: 1.00 | Evals: 55.0%\nIter 2 | Best: 0.000751 | Rate: 1.00 | Evals: 60.0%\nIter 3 | Best: 0.000024 | Rate: 1.00 | Evals: 65.0%\nIter 4 | Best: 0.000000 | Rate: 1.00 | Evals: 70.0%\nIter 5 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.80 | Evals: 75.0%\nIter 6 | Best: 0.000000 | Rate: 0.83 | Evals: 80.0%\nIter 7 | Best: 0.000000 | Rate: 0.86 | Evals: 85.0%\nIter 8 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.75 | Evals: 90.0%\nIter 9 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.67 | Evals: 95.0%\nIter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.60 | Evals: 100.0%\nTensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260411_213807\nBest value: 0.000000\n\n\n\n\nExample 13.2 (Noisy Optimization)  \n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef noisy_objective(X):\n    base = np.sum(X**2, axis=1)\n    noise = np.random.normal(0, 0.1, size=base.shape)\n    return base + noise\n\noptimizer = SpotOptim(\n    fun=noisy_objective,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=20,\n    n_initial=10,\n    repeats_initial=3,\n    repeats_surrogate=2,\n    tensorboard_log=True,\n    tensorboard_clean=True,\n    tensorboard_path=\"runs/noisy_exp\",\n    seed=42\n)\nresult = optimizer.optimize()\nprint(f\"Best value: {result.fun:.6f}\")\n\nBest value: 3.784490\n\n\n\n\nExample 13.3 (With OCBA)  \n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef noisy_objective(X):\n    base = np.sum(X**2, axis=1)\n    noise = np.random.normal(0, 0.1, size=base.shape)\n    return base + noise\n\noptimizer = SpotOptim(\n    fun=noisy_objective,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=20,\n    n_initial=10,\n    repeats_initial=2,\n    ocba_delta=3,  # Re-evaluate 3 promising points per iteration\n    tensorboard_log=True,\n    tensorboard_clean=True,\n    seed=42\n)\nresult = optimizer.optimize()\nprint(f\"Best value: {result.fun:.6f}\")\n\nBest value: 3.734393\n\n\n\n\nExample 13.4 (Comparing Multiple Runs) Run multiple optimizations with different settings:\n# Run 1: Standard\nopt1 = SpotOptim(..., tensorboard_path=\"runs/standard\")\nopt1.optimize()\n\n# Run 2: With OCBA\nopt2 = SpotOptim(..., ocba_delta=3, tensorboard_path=\"runs/with_ocba\")\nopt2.optimize()\n\n# Run 3: More initial points\nopt3 = SpotOptim(..., n_initial=20, tensorboard_path=\"runs/more_initial\")\nopt3.optimize()\nThen view all runs together:\ntensorboard --logdir=runs",
           "crumbs": [
             "Visualization",
             "13  TensorBoard Logging in SpotOptim"
      @@ -1312,7 +1312,7 @@
           "href": "tensorboard.html#tensorboard-log-cleaning-feature-in-spotoptim",
           "title": "13  TensorBoard Logging in SpotOptim",
           "section": "13.3 TensorBoard Log Cleaning Feature in SpotOptim",
      -    "text": "13.3 TensorBoard Log Cleaning Feature in SpotOptim\nAutomatic cleaning of old TensorBoard log directories with the tensorboard_clean parameter.\n\n13.3.1 Basic Usage\n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef sphere(X):\n    \"\"\"Simple sphere function\"\"\"\n    return np.sum(X**2, axis=1)\n\n# Remove old logs and create new log directory\noptimizer = SpotOptim(\n    fun=sphere,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=20,\n    n_initial=10,\n    tensorboard_log=True,\n    tensorboard_clean=True,  # Removes all subdirectories in 'runs'\n    verbose=True,\n    seed=42\n)\n\nresult = optimizer.optimize()\nprint(f\"Best value: {result.fun:.6f}\")\nprint(f\"Logs saved to: runs/{optimizer.tensorboard_path}\")\n\nRemoved old TensorBoard logs: runs/spotoptim_20260404_094059\nCleaned 1 old TensorBoard log directory\nTensorBoard logging enabled: runs/spotoptim_20260404_094059\nInitial best: f(x) = 3.925721\nIter 1 | Best: 0.020506 | Rate: 1.00 | Evals: 55.0%\nIter 2 | Best: 0.000751 | Rate: 1.00 | Evals: 60.0%\nIter 3 | Best: 0.000024 | Rate: 1.00 | Evals: 65.0%\nIter 4 | Best: 0.000000 | Rate: 1.00 | Evals: 70.0%\nIter 5 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.80 | Evals: 75.0%\nIter 6 | Best: 0.000000 | Rate: 0.83 | Evals: 80.0%\nIter 7 | Best: 0.000000 | Rate: 0.86 | Evals: 85.0%\nIter 8 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.75 | Evals: 90.0%\nIter 9 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.67 | Evals: 95.0%\nIter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.60 | Evals: 100.0%\nTensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260404_094059\nBest value: 0.000000\nLogs saved to: runs/runs/spotoptim_20260404_094059\n\n\n\n\n13.3.2 Use Cases\n\n\n\ntensorboard_log\ntensorboard_clean\nBehavior\n\n\n\n\nTrue\nTrue\nClean old logs, create new log directory\n\n\nTrue\nFalse\nPreserve old logs, create new log directory\n\n\nFalse\nTrue\nClean old logs, no new logging\n\n\nFalse\nFalse\nNo logging, no cleaning (default)\n\n\n\n\n\n13.3.3 Implementation Details\n\n13.3.3.1 Cleaning Method\ndef _clean_tensorboard_logs(self) -> None:\n    \"\"\"Clean old TensorBoard log directories from the runs folder.\"\"\"\n    if self.tensorboard_clean:\n        runs_dir = \"runs\"\n        if os.path.exists(runs_dir) and os.path.isdir(runs_dir):\n            # Get all subdirectories in runs\n            subdirs = [\n                os.path.join(runs_dir, d)\n                for d in os.listdir(runs_dir)\n                if os.path.isdir(os.path.join(runs_dir, d))\n            ]\n            \n            # Remove each subdirectory\n            for subdir in subdirs:\n                try:\n                    shutil.rmtree(subdir)\n                    if self.verbose:\n                        print(f\"Removed old TensorBoard logs: {subdir}\")\n                except Exception as e:\n                    if self.verbose:\n                        print(f\"Warning: Could not remove {subdir}: {e}\")\n\n\n\n13.3.4 Execution Flow\n\nUser creates SpotOptim instance with tensorboard_clean=True\nDuring initialization, _clean_tensorboard_logs() is called\nMethod checks if ‘runs’ directory exists\nRemoves all subdirectories (but preserves files)\nIf tensorboard_log=True, a new log directory is created\nOptimization proceeds normally",
      +    "text": "13.3 TensorBoard Log Cleaning Feature in SpotOptim\nAutomatic cleaning of old TensorBoard log directories with the tensorboard_clean parameter.\n\n13.3.1 Basic Usage\n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef sphere(X):\n    \"\"\"Simple sphere function\"\"\"\n    return np.sum(X**2, axis=1)\n\n# Remove old logs and create new log directory\noptimizer = SpotOptim(\n    fun=sphere,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=20,\n    n_initial=10,\n    tensorboard_log=True,\n    tensorboard_clean=True,  # Removes all subdirectories in 'runs'\n    verbose=True,\n    seed=42\n)\n\nresult = optimizer.optimize()\nprint(f\"Best value: {result.fun:.6f}\")\nprint(f\"Logs saved to: runs/{optimizer.tensorboard_path}\")\n\nRemoved old TensorBoard logs: runs/spotoptim_20260411_213816\nCleaned 1 old TensorBoard log directory\nTensorBoard logging enabled: runs/spotoptim_20260411_213816\nInitial best: f(x) = 3.925721\nIter 1 | Best: 0.020506 | Rate: 1.00 | Evals: 55.0%\nIter 2 | Best: 0.000751 | Rate: 1.00 | Evals: 60.0%\nIter 3 | Best: 0.000024 | Rate: 1.00 | Evals: 65.0%\nIter 4 | Best: 0.000000 | Rate: 1.00 | Evals: 70.0%\nIter 5 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.80 | Evals: 75.0%\nIter 6 | Best: 0.000000 | Rate: 0.83 | Evals: 80.0%\nIter 7 | Best: 0.000000 | Rate: 0.86 | Evals: 85.0%\nIter 8 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.75 | Evals: 90.0%\nIter 9 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.67 | Evals: 95.0%\nIter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.60 | Evals: 100.0%\nTensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260411_213816\nBest value: 0.000000\nLogs saved to: runs/runs/spotoptim_20260411_213816\n\n\n\n\n13.3.2 Use Cases\n\n\n\ntensorboard_log\ntensorboard_clean\nBehavior\n\n\n\n\nTrue\nTrue\nClean old logs, create new log directory\n\n\nTrue\nFalse\nPreserve old logs, create new log directory\n\n\nFalse\nTrue\nClean old logs, no new logging\n\n\nFalse\nFalse\nNo logging, no cleaning (default)\n\n\n\n\n\n13.3.3 Implementation Details\n\n13.3.3.1 Cleaning Method\ndef _clean_tensorboard_logs(self) -> None:\n    \"\"\"Clean old TensorBoard log directories from the runs folder.\"\"\"\n    if self.tensorboard_clean:\n        runs_dir = \"runs\"\n        if os.path.exists(runs_dir) and os.path.isdir(runs_dir):\n            # Get all subdirectories in runs\n            subdirs = [\n                os.path.join(runs_dir, d)\n                for d in os.listdir(runs_dir)\n                if os.path.isdir(os.path.join(runs_dir, d))\n            ]\n            \n            # Remove each subdirectory\n            for subdir in subdirs:\n                try:\n                    shutil.rmtree(subdir)\n                    if self.verbose:\n                        print(f\"Removed old TensorBoard logs: {subdir}\")\n                except Exception as e:\n                    if self.verbose:\n                        print(f\"Warning: Could not remove {subdir}: {e}\")\n\n\n\n13.3.4 Execution Flow\n\nUser creates SpotOptim instance with tensorboard_clean=True\nDuring initialization, _clean_tensorboard_logs() is called\nMethod checks if ‘runs’ directory exists\nRemoves all subdirectories (but preserves files)\nIf tensorboard_log=True, a new log directory is created\nOptimization proceeds normally",
           "crumbs": [
             "Visualization",
             "13  TensorBoard Logging in SpotOptim"
      @@ -1675,7 +1675,7 @@
           "href": "surrogate_selection.html#default-surrogate-gaussian-process-with-matern-kernel",
           "title": "16  Surrogate Model Selection in SpotOptim",
           "section": "16.4 1. Default Surrogate: Gaussian Process with Matern Kernel",
      -    "text": "16.4 1. Default Surrogate: Gaussian Process with Matern Kernel\nSpotOptim’s default surrogate is a Gaussian Process with a Matern kernel (\\(\\nu\\)=2.5), which provides twice-differentiable sample paths and good performance for most optimization problems.\n\nstart_time = time.time()\n\n# Default GP (no surrogate specified)\noptimizer_default = SpotOptim(\n    fun=wingwt,\n    bounds=bounds,\n    max_iter=max_iter,\n    n_initial=n_initial,\n    var_name=param_names,\n    acquisition='ei',\n    seed=seed,\n    verbose=False\n)\n\nresult_default = optimizer_default.optimize()\ntime_default = time.time() - start_time\n\nprint(f\"\\nResults:\")\nprint(f\"  Best weight: {result_default.fun:.4f} lb\")\nprint(f\"  Function evaluations: {result_default.nfev}\")\nprint(f\"  Time: {time_default:.2f}s\")\nprint(f\"  Success: {result_default.success}\")\n\n# Store for comparison\nresults_comparison = [{\n    'Surrogate': 'GP Matern nu=2.5 (Default)',\n    'Best Weight': result_default.fun,\n    'Evaluations': result_default.nfev,\n    'Time (s)': time_default,\n    'Success': result_default.success\n}]\n\n\nResults:\n  Best weight: 119.8664 lb\n  Function evaluations: 20\n  Time: 1.65s\n  Success: True\n\n\n\n16.4.1 Visualization: Default Surrogate\n\n# Plot convergence\noptimizer_default.plot_progress(log_y=False, figsize=(10, 5))\n\n\n\n\n\n\n\n\n\n# Plot most important hyperparameters\noptimizer_default.plot_important_hyperparameter_contour(max_imp=3)\nplt.suptitle('Default GP Matern nu=2.5: Most Important Parameters', y=1.02)\nplt.show()\n\nPlotting surrogate contours for top 3 most important parameters:\n  Nz: importance = 22.46% (type: float)\n  A: importance = 20.27% (type: float)\n  Rtc: importance = 17.83% (type: float)\n\nGenerating 3 surrogate plots...\n  Plotting Nz vs A\n\n\n\n\n\n\n\n\n\n  Plotting Nz vs Rtc\n\n\n\n\n\n\n\n\n\n  Plotting A vs Rtc\n\n\n\n\n\n\n\n\n\n<Figure size 672x480 with 0 Axes>",
      +    "text": "16.4 1. Default Surrogate: Gaussian Process with Matern Kernel\nSpotOptim’s default surrogate is a Gaussian Process with a Matern kernel (\\(\\nu\\)=2.5), which provides twice-differentiable sample paths and good performance for most optimization problems.\n\nstart_time = time.time()\n\n# Default GP (no surrogate specified)\noptimizer_default = SpotOptim(\n    fun=wingwt,\n    bounds=bounds,\n    max_iter=max_iter,\n    n_initial=n_initial,\n    var_name=param_names,\n    acquisition='ei',\n    seed=seed,\n    verbose=False\n)\n\nresult_default = optimizer_default.optimize()\ntime_default = time.time() - start_time\n\nprint(f\"\\nResults:\")\nprint(f\"  Best weight: {result_default.fun:.4f} lb\")\nprint(f\"  Function evaluations: {result_default.nfev}\")\nprint(f\"  Time: {time_default:.2f}s\")\nprint(f\"  Success: {result_default.success}\")\n\n# Store for comparison\nresults_comparison = [{\n    'Surrogate': 'GP Matern nu=2.5 (Default)',\n    'Best Weight': result_default.fun,\n    'Evaluations': result_default.nfev,\n    'Time (s)': time_default,\n    'Success': result_default.success\n}]\n\n\nResults:\n  Best weight: 119.8664 lb\n  Function evaluations: 20\n  Time: 1.71s\n  Success: True\n\n\n\n16.4.1 Visualization: Default Surrogate\n\n# Plot convergence\noptimizer_default.plot_progress(log_y=False, figsize=(10, 5))\n\n\n\n\n\n\n\n\n\n# Plot most important hyperparameters\noptimizer_default.plot_important_hyperparameter_contour(max_imp=3)\nplt.suptitle('Default GP Matern nu=2.5: Most Important Parameters', y=1.02)\nplt.show()\n\nPlotting surrogate contours for top 3 most important parameters:\n  Nz: importance = 22.46% (type: float)\n  A: importance = 20.27% (type: float)\n  Rtc: importance = 17.83% (type: float)\n\nGenerating 3 surrogate plots...\n  Plotting Nz vs A\n\n\n\n\n\n\n\n\n\n  Plotting Nz vs Rtc\n\n\n\n\n\n\n\n\n\n  Plotting A vs Rtc\n\n\n\n\n\n\n\n\n\n<Figure size 672x480 with 0 Axes>",
           "crumbs": [
             "Surrogate Handling",
             "16  Surrogate Model Selection in SpotOptim"
      @@ -1719,7 +1719,7 @@
           "href": "surrogate_selection.html#spotoptim-kriging-model",
           "title": "16  Surrogate Model Selection in SpotOptim",
           "section": "16.8 5. SpotOptim Kriging Model",
      -    "text": "16.8 5. SpotOptim Kriging Model\nSpotOptim includes its own Kriging implementation optimized for sequential design. It uses Gaussian correlation function and optimizes hyperparameters via differential evolution.\n\nstart_time = time.time()\n\n# Configure Kriging model\nkriging_model = Kriging(\n    noise=1e-10,          # Regularization parameter\n    kernel='gauss',       # Gaussian/RBF kernel\n    n_theta=None,         # Auto: use number of dimensions\n    min_theta=-3.0,       # Min log10(theta) bound\n    max_theta=2.0,        # Max log10(theta) bound\n    seed=seed\n)\n\noptimizer_kriging = SpotOptim(\n    fun=wingwt,\n    bounds=bounds,\n    surrogate=kriging_model,\n    max_iter=max_iter,\n    n_initial=n_initial,\n    var_name=param_names,\n    acquisition='ei',\n    seed=seed,\n    verbose=False\n)\n\nresult_kriging = optimizer_kriging.optimize()\ntime_kriging = time.time() - start_time\n\nprint(f\"\\nResults:\")\nprint(f\"  Best weight: {result_kriging.fun:.4f} lb\")\nprint(f\"  Function evaluations: {result_kriging.nfev}\")\nprint(f\"  Time: {time_kriging:.2f}s\")\nprint(f\"  Success: {result_kriging.success}\")\n\nresults_comparison.append({\n    'Surrogate': 'SpotOptim Kriging',\n    'Best Weight': result_kriging.fun,\n    'Evaluations': result_kriging.nfev,\n    'Time (s)': time_kriging,\n    'Success': result_kriging.success\n})\n\n\nResults:\n  Best weight: 121.2932 lb\n  Function evaluations: 20\n  Time: 8.08s\n  Success: True\n\n\n\n16.8.1 Visualization: Kriging Model\n\noptimizer_kriging.plot_progress(log_y=False, figsize=(10, 5))\n\n\n\n\n\n\n\n\n\noptimizer_kriging.plot_important_hyperparameter_contour(max_imp=3)\nplt.suptitle('SpotOptim Kriging: Most Important Parameters', y=1.02)\nplt.show()\n\nPlotting surrogate contours for top 3 most important parameters:\n  Nz: importance = 20.85% (type: float)\n  A: importance = 18.80% (type: float)\n  Rtc: importance = 16.51% (type: float)\n\nGenerating 3 surrogate plots...\n  Plotting Nz vs A\n\n\n\n\n\n\n\n\n\n  Plotting Nz vs Rtc\n\n\n\n\n\n\n\n\n\n  Plotting A vs Rtc\n\n\n\n\n\n\n\n\n\n<Figure size 672x480 with 0 Axes>",
      +    "text": "16.8 5. SpotOptim Kriging Model\nSpotOptim includes its own Kriging implementation optimized for sequential design. It uses Gaussian correlation function and optimizes hyperparameters via differential evolution.\n\nstart_time = time.time()\n\n# Configure Kriging model\nkriging_model = Kriging(\n    noise=1e-10,          # Regularization parameter\n    kernel='gauss',       # Gaussian/RBF kernel\n    n_theta=None,         # Auto: use number of dimensions\n    min_theta=-3.0,       # Min log10(theta) bound\n    max_theta=2.0,        # Max log10(theta) bound\n    seed=seed\n)\n\noptimizer_kriging = SpotOptim(\n    fun=wingwt,\n    bounds=bounds,\n    surrogate=kriging_model,\n    max_iter=max_iter,\n    n_initial=n_initial,\n    var_name=param_names,\n    acquisition='ei',\n    seed=seed,\n    verbose=False\n)\n\nresult_kriging = optimizer_kriging.optimize()\ntime_kriging = time.time() - start_time\n\nprint(f\"\\nResults:\")\nprint(f\"  Best weight: {result_kriging.fun:.4f} lb\")\nprint(f\"  Function evaluations: {result_kriging.nfev}\")\nprint(f\"  Time: {time_kriging:.2f}s\")\nprint(f\"  Success: {result_kriging.success}\")\n\nresults_comparison.append({\n    'Surrogate': 'SpotOptim Kriging',\n    'Best Weight': result_kriging.fun,\n    'Evaluations': result_kriging.nfev,\n    'Time (s)': time_kriging,\n    'Success': result_kriging.success\n})\n\n\nResults:\n  Best weight: 121.2932 lb\n  Function evaluations: 20\n  Time: 8.06s\n  Success: True\n\n\n\n16.8.1 Visualization: Kriging Model\n\noptimizer_kriging.plot_progress(log_y=False, figsize=(10, 5))\n\n\n\n\n\n\n\n\n\noptimizer_kriging.plot_important_hyperparameter_contour(max_imp=3)\nplt.suptitle('SpotOptim Kriging: Most Important Parameters', y=1.02)\nplt.show()\n\nPlotting surrogate contours for top 3 most important parameters:\n  Nz: importance = 20.85% (type: float)\n  A: importance = 18.80% (type: float)\n  Rtc: importance = 16.51% (type: float)\n\nGenerating 3 surrogate plots...\n  Plotting Nz vs A\n\n\n\n\n\n\n\n\n\n  Plotting Nz vs Rtc\n\n\n\n\n\n\n\n\n\n  Plotting A vs Rtc\n\n\n\n\n\n\n\n\n\n<Figure size 672x480 with 0 Axes>",
           "crumbs": [
             "Surrogate Handling",
             "16  Surrogate Model Selection in SpotOptim"
      @@ -1741,7 +1741,7 @@
           "href": "surrogate_selection.html#xgboost-regressor",
           "title": "16  Surrogate Model Selection in SpotOptim",
           "section": "16.10 7. XGBoost Regressor",
      -    "text": "16.10 7. XGBoost Regressor\nXGBoost is a gradient boosting implementation known for excellent performance on structured data and fast training/prediction times.\n\nif XGBOOST_AVAILABLE:\n    start_time = time.time()\n    \n    # Configure XGBoost\n    xgb_model = xgb.XGBRegressor(\n        n_estimators=100,\n        max_depth=6,\n        learning_rate=0.1,\n        subsample=0.8,\n        colsample_bytree=0.8,\n        random_state=seed,\n        nthread=1,\n        verbosity=0,\n    )\n    \n    optimizer_xgb = SpotOptim(\n        fun=wingwt,\n        bounds=bounds,\n        surrogate=xgb_model,\n        max_iter=max_iter,\n        n_initial=n_initial,\n        var_name=param_names,\n        acquisition='y',  # Use 'y' (greedy) since XGBoost doesn't provide std\n        seed=seed,\n        verbose=False\n    )\n    \n    result_xgb = optimizer_xgb.optimize()\n    time_xgb = time.time() - start_time\n    \n    print(f\"\\nResults:\")\n    print(f\"  Best weight: {result_xgb.fun:.4f} lb\")\n    print(f\"  Function evaluations: {result_xgb.nfev}\")\n    print(f\"  Time: {time_xgb:.2f}s\")\n    print(f\"  Success: {result_xgb.success}\")\n    print(f\"  Note: Using acquisition='y' (greedy) since XGBoost doesn't provide uncertainty\")\n    \n    results_comparison.append({\n        'Surrogate': 'XGBoost',\n        'Best Weight': result_xgb.fun,\n        'Evaluations': result_xgb.nfev,\n        'Time (s)': time_xgb,\n        'Success': result_xgb.success\n    })\n    \n    # Visualization\n    optimizer_xgb.plot_progress(log_y=False, figsize=(10, 5))\n    plt.title('XGBoost: Convergence')\n    plt.show()\n    \n    optimizer_xgb.plot_important_hyperparameter_contour(max_imp=3)\n    plt.suptitle('XGBoost: Most Important Parameters', y=1.02)\n    plt.show()\nelse:\n    print(\"=\" * 80)\n    print(\"7. XGBoost Regressor - SKIPPED (not installed)\")\n    print(\"=\" * 80)\n    print(\"Install XGBoost with: pip install xgboost\")\n\n\nResults:\n  Best weight: 165.3064 lb\n  Function evaluations: 20\n  Time: 0.10s\n  Success: True\n  Note: Using acquisition='y' (greedy) since XGBoost doesn't provide uncertainty\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPlotting surrogate contours for top 3 most important parameters:\n  Nz: importance = 20.61% (type: float)\n  A: importance = 18.24% (type: float)\n  Rtc: importance = 14.07% (type: float)\n\nGenerating 3 surrogate plots...\n  Plotting Nz vs A\n\n\n\n\n\n\n\n\n\n  Plotting Nz vs Rtc\n\n\n\n\n\n\n\n\n\n  Plotting A vs Rtc\n\n\n\n\n\n\n\n\n\n<Figure size 672x480 with 0 Axes>",
      +    "text": "16.10 7. XGBoost Regressor\nXGBoost is a gradient boosting implementation known for excellent performance on structured data and fast training/prediction times.\n\nif XGBOOST_AVAILABLE:\n    start_time = time.time()\n    \n    # Configure XGBoost\n    xgb_model = xgb.XGBRegressor(\n        n_estimators=100,\n        max_depth=6,\n        learning_rate=0.1,\n        subsample=0.8,\n        colsample_bytree=0.8,\n        random_state=seed,\n        nthread=1,\n        verbosity=0,\n    )\n    \n    optimizer_xgb = SpotOptim(\n        fun=wingwt,\n        bounds=bounds,\n        surrogate=xgb_model,\n        max_iter=max_iter,\n        n_initial=n_initial,\n        var_name=param_names,\n        acquisition='y',  # Use 'y' (greedy) since XGBoost doesn't provide std\n        seed=seed,\n        verbose=False\n    )\n    \n    result_xgb = optimizer_xgb.optimize()\n    time_xgb = time.time() - start_time\n    \n    print(f\"\\nResults:\")\n    print(f\"  Best weight: {result_xgb.fun:.4f} lb\")\n    print(f\"  Function evaluations: {result_xgb.nfev}\")\n    print(f\"  Time: {time_xgb:.2f}s\")\n    print(f\"  Success: {result_xgb.success}\")\n    print(f\"  Note: Using acquisition='y' (greedy) since XGBoost doesn't provide uncertainty\")\n    \n    results_comparison.append({\n        'Surrogate': 'XGBoost',\n        'Best Weight': result_xgb.fun,\n        'Evaluations': result_xgb.nfev,\n        'Time (s)': time_xgb,\n        'Success': result_xgb.success\n    })\n    \n    # Visualization\n    optimizer_xgb.plot_progress(log_y=False, figsize=(10, 5))\n    plt.title('XGBoost: Convergence')\n    plt.show()\n    \n    optimizer_xgb.plot_important_hyperparameter_contour(max_imp=3)\n    plt.suptitle('XGBoost: Most Important Parameters', y=1.02)\n    plt.show()\nelse:\n    print(\"=\" * 80)\n    print(\"7. XGBoost Regressor - SKIPPED (not installed)\")\n    print(\"=\" * 80)\n    print(\"Install XGBoost with: pip install xgboost\")\n\n\nResults:\n  Best weight: 165.3064 lb\n  Function evaluations: 20\n  Time: 0.11s\n  Success: True\n  Note: Using acquisition='y' (greedy) since XGBoost doesn't provide uncertainty\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPlotting surrogate contours for top 3 most important parameters:\n  Nz: importance = 20.61% (type: float)\n  A: importance = 18.24% (type: float)\n  Rtc: importance = 14.07% (type: float)\n\nGenerating 3 surrogate plots...\n  Plotting Nz vs A\n\n\n\n\n\n\n\n\n\n  Plotting Nz vs Rtc\n\n\n\n\n\n\n\n\n\n  Plotting A vs Rtc\n\n\n\n\n\n\n\n\n\n<Figure size 672x480 with 0 Axes>",
           "crumbs": [
             "Surrogate Handling",
             "16  Surrogate Model Selection in SpotOptim"
      @@ -1774,7 +1774,7 @@
           "href": "surrogate_selection.html#comprehensive-comparison",
           "title": "16  Surrogate Model Selection in SpotOptim",
           "section": "16.13 Comprehensive Comparison",
      -    "text": "16.13 Comprehensive Comparison\nNow let’s compare all surrogate models side-by-side.\n\n# Create comparison DataFrame\ndf_comparison = pd.DataFrame(results_comparison)\n\n# Calculate improvement from best\nbest_weight = df_comparison['Best Weight'].min()\ndf_comparison['Gap to Best (%)'] = (\n    (df_comparison['Best Weight'] - best_weight) / best_weight * 100\n)\n\n# Sort by best weight\ndf_comparison = df_comparison.sort_values('Best Weight')\n\nprint(\"\\n\" + \"=\" * 100)\nprint(\"SURROGATE MODEL COMPARISON\")\nprint(\"=\" * 100)\nprint(df_comparison.to_string(index=False))\nprint(\"=\" * 100)\n\n\n====================================================================================================\nSURROGATE MODEL COMPARISON\n====================================================================================================\n                 Surrogate  Best Weight  Evaluations  Time (s)  Success  Gap to Best (%)\n          GP Matern nu=1.5   119.546454           20  0.449904     True         0.000000\nGP Matern nu=2.5 (Default)   119.866409           20  1.646203     True         0.267641\n     GP Rational Quadratic   120.107282           20  0.615868     True         0.469130\n                    GP RBF   120.109251           20  0.467476     True         0.470777\n         SpotOptim Kriging   121.293180           20  8.080324     True         1.461127\n         Gradient Boosting   134.515037           20  0.184076     True        12.521144\n             Random Forest   145.979231           20  0.869794     True        22.110883\n                 SVR (RBF)   156.707097           20  0.071585     True        31.084689\n                   XGBoost   165.306404           20  0.096069     True        38.277965\n====================================================================================================\n\n\n\n16.13.1 Visualization: Performance Comparison\n\nfig, axes = plt.subplots(2, 2, figsize=(16, 12))\n\n# Plot 1: Best weight comparison\nax1 = axes[0, 0]\ncolors = ['green' if i == 0 else 'steelblue' for i in range(len(df_comparison))]\nax1.barh(df_comparison['Surrogate'], df_comparison['Best Weight'], color=colors)\nax1.set_xlabel('Best Weight (lb)')\nax1.set_title('Best Weight Found by Each Surrogate')\nax1.axvline(x=best_weight, color='red', linestyle='--', linewidth=2, label='Best Overall')\nax1.legend()\nax1.grid(True, alpha=0.3, axis='x')\n\n# Plot 2: Computational time\nax2 = axes[0, 1]\nax2.barh(df_comparison['Surrogate'], df_comparison['Time (s)'], color='coral')\nax2.set_xlabel('Time (seconds)')\nax2.set_title('Computational Time')\nax2.grid(True, alpha=0.3, axis='x')\n\n# Plot 3: Gap to best\nax3 = axes[1, 0]\ncolors_gap = ['green' if gap < 0.1 else 'orange' if gap < 1.0 else 'red' \n              for gap in df_comparison['Gap to Best (%)']]\nax3.barh(df_comparison['Surrogate'], df_comparison['Gap to Best (%)'], color=colors_gap)\nax3.set_xlabel('Gap to Best Solution (%)')\nax3.set_title('Solution Quality (Lower is Better)')\nax3.axvline(x=1.0, color='black', linestyle='--', alpha=0.5, linewidth=1)\nax3.grid(True, alpha=0.3, axis='x')\n\n# Plot 4: Efficiency (weight reduction per second)\nax4 = axes[1, 1]\nbaseline_weight = wingwt(np.array([[0.48, 0.4, 0.38, 0.5, 0.62, 0.344, 0.4, 0.37, 0.38]]))[0]\ndf_comparison['Efficiency'] = (baseline_weight - df_comparison['Best Weight']) / df_comparison['Time (s)']\nax4.barh(df_comparison['Surrogate'], df_comparison['Efficiency'], color='mediumseagreen')\nax4.set_xlabel('Weight Reduction per Second (lb/s)')\nax4.set_title('Optimization Efficiency')\nax4.grid(True, alpha=0.3, axis='x')\n\nplt.tight_layout()\nplt.show()",
      +    "text": "16.13 Comprehensive Comparison\nNow let’s compare all surrogate models side-by-side.\n\n# Create comparison DataFrame\ndf_comparison = pd.DataFrame(results_comparison)\n\n# Calculate improvement from best\nbest_weight = df_comparison['Best Weight'].min()\ndf_comparison['Gap to Best (%)'] = (\n    (df_comparison['Best Weight'] - best_weight) / best_weight * 100\n)\n\n# Sort by best weight\ndf_comparison = df_comparison.sort_values('Best Weight')\n\nprint(\"\\n\" + \"=\" * 100)\nprint(\"SURROGATE MODEL COMPARISON\")\nprint(\"=\" * 100)\nprint(df_comparison.to_string(index=False))\nprint(\"=\" * 100)\n\n\n====================================================================================================\nSURROGATE MODEL COMPARISON\n====================================================================================================\n                 Surrogate  Best Weight  Evaluations  Time (s)  Success  Gap to Best (%)\n          GP Matern nu=1.5   119.546454           20  0.452622     True         0.000000\nGP Matern nu=2.5 (Default)   119.866409           20  1.705245     True         0.267641\n     GP Rational Quadratic   120.107282           20  0.615395     True         0.469130\n                    GP RBF   120.109251           20  0.471836     True         0.470777\n         SpotOptim Kriging   121.293180           20  8.057606     True         1.461127\n         Gradient Boosting   134.515037           20  0.184088     True        12.521144\n             Random Forest   145.979231           20  0.865507     True        22.110883\n                 SVR (RBF)   156.707097           20  0.071420     True        31.084689\n                   XGBoost   165.306404           20  0.109269     True        38.277965\n====================================================================================================\n\n\n\n16.13.1 Visualization: Performance Comparison\n\nfig, axes = plt.subplots(2, 2, figsize=(16, 12))\n\n# Plot 1: Best weight comparison\nax1 = axes[0, 0]\ncolors = ['green' if i == 0 else 'steelblue' for i in range(len(df_comparison))]\nax1.barh(df_comparison['Surrogate'], df_comparison['Best Weight'], color=colors)\nax1.set_xlabel('Best Weight (lb)')\nax1.set_title('Best Weight Found by Each Surrogate')\nax1.axvline(x=best_weight, color='red', linestyle='--', linewidth=2, label='Best Overall')\nax1.legend()\nax1.grid(True, alpha=0.3, axis='x')\n\n# Plot 2: Computational time\nax2 = axes[0, 1]\nax2.barh(df_comparison['Surrogate'], df_comparison['Time (s)'], color='coral')\nax2.set_xlabel('Time (seconds)')\nax2.set_title('Computational Time')\nax2.grid(True, alpha=0.3, axis='x')\n\n# Plot 3: Gap to best\nax3 = axes[1, 0]\ncolors_gap = ['green' if gap < 0.1 else 'orange' if gap < 1.0 else 'red' \n              for gap in df_comparison['Gap to Best (%)']]\nax3.barh(df_comparison['Surrogate'], df_comparison['Gap to Best (%)'], color=colors_gap)\nax3.set_xlabel('Gap to Best Solution (%)')\nax3.set_title('Solution Quality (Lower is Better)')\nax3.axvline(x=1.0, color='black', linestyle='--', alpha=0.5, linewidth=1)\nax3.grid(True, alpha=0.3, axis='x')\n\n# Plot 4: Efficiency (weight reduction per second)\nax4 = axes[1, 1]\nbaseline_weight = wingwt(np.array([[0.48, 0.4, 0.38, 0.5, 0.62, 0.344, 0.4, 0.37, 0.38]]))[0]\ndf_comparison['Efficiency'] = (baseline_weight - df_comparison['Best Weight']) / df_comparison['Time (s)']\nax4.barh(df_comparison['Surrogate'], df_comparison['Efficiency'], color='mediumseagreen')\nax4.set_xlabel('Weight Reduction per Second (lb/s)')\nax4.set_title('Optimization Efficiency')\nax4.grid(True, alpha=0.3, axis='x')\n\nplt.tight_layout()\nplt.show()",
           "crumbs": [
             "Surrogate Handling",
             "16  Surrogate Model Selection in SpotOptim"
      @@ -1796,7 +1796,7 @@
           "href": "surrogate_selection.html#key-insights-and-recommendations",
           "title": "16  Surrogate Model Selection in SpotOptim",
           "section": "16.15 Key Insights and Recommendations",
      -    "text": "16.15 Key Insights and Recommendations\n\nprint(\"\\n\" + \"=\" * 100)\nprint(\"KEY INSIGHTS AND RECOMMENDATIONS\")\nprint(\"=\" * 100)\n\n# Find best surrogate\nbest_surrogate = df_comparison.iloc[0]['Surrogate']\nbest_value = df_comparison.iloc[0]['Best Weight']\nbest_time = df_comparison.iloc[0]['Time (s)']\n\nprint(f\"\\n1. BEST OVERALL PERFORMANCE:\")\nprint(f\"   Surrogate: {best_surrogate}\")\nprint(f\"   Best Weight: {best_value:.4f} lb\")\nprint(f\"   Computation Time: {best_time:.2f}s\")\n\n# Find fastest\nfastest_idx = df_comparison['Time (s)'].idxmin()\nfastest_surrogate = df_comparison.loc[fastest_idx, 'Surrogate']\nfastest_time = df_comparison.loc[fastest_idx, 'Time (s)']\n\nprint(f\"\\n2. FASTEST OPTIMIZATION:\")\nprint(f\"   Surrogate: {fastest_surrogate}\")\nprint(f\"   Time: {fastest_time:.2f}s\")\nprint(f\"   Best Weight: {df_comparison.loc[fastest_idx, 'Best Weight']:.4f} lb\")\n\n# Find most efficient\nmost_efficient_idx = df_comparison['Efficiency'].idxmax()\nmost_efficient = df_comparison.loc[most_efficient_idx, 'Surrogate']\n\nprint(f\"\\n3. MOST EFFICIENT (weight reduction per second):\")\nprint(f\"   Surrogate: {most_efficient}\")\nprint(f\"   Efficiency: {df_comparison.loc[most_efficient_idx, 'Efficiency']:.4f} lb/s\")\n\nprint(f\"\\n4. RECOMMENDATIONS BY PROBLEM TYPE:\")\nprint(f\"   - Smooth, continuous functions: Gaussian Process with RBF or Matern nu=2.5\")\nprint(f\"   - Functions with noise: Random Forest or Gradient Boosting\")\nprint(f\"   - High-dimensional problems (>20D): XGBoost or Random Forest\")\nprint(f\"   - Limited budget (<50 evals): Gaussian Process with Expected Improvement\")\nprint(f\"   - Fast evaluation needed: XGBoost or Random Forest\")\nprint(f\"   - Need uncertainty estimates: Gaussian Process or Kriging\")\nprint(f\"   - Non-smooth/discontinuous: Random Forest or Gradient Boosting\")\n\nprint(f\"\\n5. KERNEL COMPARISON (Gaussian Process):\")\ngp_results = df_comparison[df_comparison['Surrogate'].str.contains('GP')]\nprint(gp_results[['Surrogate', 'Best Weight', 'Time (s)']].to_string(index=False))\n\nprint(\"\\n\" + \"=\" * 100)\n\n\n====================================================================================================\nKEY INSIGHTS AND RECOMMENDATIONS\n====================================================================================================\n\n1. BEST OVERALL PERFORMANCE:\n   Surrogate: GP Matern nu=1.5\n   Best Weight: 119.5465 lb\n   Computation Time: 0.45s\n\n2. FASTEST OPTIMIZATION:\n   Surrogate: SVR (RBF)\n   Time: 0.07s\n   Best Weight: 156.7071 lb\n\n3. MOST EFFICIENT (weight reduction per second):\n   Surrogate: SVR (RBF)\n   Efficiency: 1078.4574 lb/s\n\n4. RECOMMENDATIONS BY PROBLEM TYPE:\n   - Smooth, continuous functions: Gaussian Process with RBF or Matern nu=2.5\n   - Functions with noise: Random Forest or Gradient Boosting\n   - High-dimensional problems (>20D): XGBoost or Random Forest\n   - Limited budget (<50 evals): Gaussian Process with Expected Improvement\n   - Fast evaluation needed: XGBoost or Random Forest\n   - Need uncertainty estimates: Gaussian Process or Kriging\n   - Non-smooth/discontinuous: Random Forest or Gradient Boosting\n\n5. KERNEL COMPARISON (Gaussian Process):\n                 Surrogate  Best Weight  Time (s)\n          GP Matern nu=1.5   119.546454  0.449904\nGP Matern nu=2.5 (Default)   119.866409  1.646203\n     GP Rational Quadratic   120.107282  0.615868\n                    GP RBF   120.109251  0.467476\n\n====================================================================================================",
      +    "text": "16.15 Key Insights and Recommendations\n\nprint(\"\\n\" + \"=\" * 100)\nprint(\"KEY INSIGHTS AND RECOMMENDATIONS\")\nprint(\"=\" * 100)\n\n# Find best surrogate\nbest_surrogate = df_comparison.iloc[0]['Surrogate']\nbest_value = df_comparison.iloc[0]['Best Weight']\nbest_time = df_comparison.iloc[0]['Time (s)']\n\nprint(f\"\\n1. BEST OVERALL PERFORMANCE:\")\nprint(f\"   Surrogate: {best_surrogate}\")\nprint(f\"   Best Weight: {best_value:.4f} lb\")\nprint(f\"   Computation Time: {best_time:.2f}s\")\n\n# Find fastest\nfastest_idx = df_comparison['Time (s)'].idxmin()\nfastest_surrogate = df_comparison.loc[fastest_idx, 'Surrogate']\nfastest_time = df_comparison.loc[fastest_idx, 'Time (s)']\n\nprint(f\"\\n2. FASTEST OPTIMIZATION:\")\nprint(f\"   Surrogate: {fastest_surrogate}\")\nprint(f\"   Time: {fastest_time:.2f}s\")\nprint(f\"   Best Weight: {df_comparison.loc[fastest_idx, 'Best Weight']:.4f} lb\")\n\n# Find most efficient\nmost_efficient_idx = df_comparison['Efficiency'].idxmax()\nmost_efficient = df_comparison.loc[most_efficient_idx, 'Surrogate']\n\nprint(f\"\\n3. MOST EFFICIENT (weight reduction per second):\")\nprint(f\"   Surrogate: {most_efficient}\")\nprint(f\"   Efficiency: {df_comparison.loc[most_efficient_idx, 'Efficiency']:.4f} lb/s\")\n\nprint(f\"\\n4. RECOMMENDATIONS BY PROBLEM TYPE:\")\nprint(f\"   - Smooth, continuous functions: Gaussian Process with RBF or Matern nu=2.5\")\nprint(f\"   - Functions with noise: Random Forest or Gradient Boosting\")\nprint(f\"   - High-dimensional problems (>20D): XGBoost or Random Forest\")\nprint(f\"   - Limited budget (<50 evals): Gaussian Process with Expected Improvement\")\nprint(f\"   - Fast evaluation needed: XGBoost or Random Forest\")\nprint(f\"   - Need uncertainty estimates: Gaussian Process or Kriging\")\nprint(f\"   - Non-smooth/discontinuous: Random Forest or Gradient Boosting\")\n\nprint(f\"\\n5. KERNEL COMPARISON (Gaussian Process):\")\ngp_results = df_comparison[df_comparison['Surrogate'].str.contains('GP')]\nprint(gp_results[['Surrogate', 'Best Weight', 'Time (s)']].to_string(index=False))\n\nprint(\"\\n\" + \"=\" * 100)\n\n\n====================================================================================================\nKEY INSIGHTS AND RECOMMENDATIONS\n====================================================================================================\n\n1. BEST OVERALL PERFORMANCE:\n   Surrogate: GP Matern nu=1.5\n   Best Weight: 119.5465 lb\n   Computation Time: 0.45s\n\n2. FASTEST OPTIMIZATION:\n   Surrogate: SVR (RBF)\n   Time: 0.07s\n   Best Weight: 156.7071 lb\n\n3. MOST EFFICIENT (weight reduction per second):\n   Surrogate: SVR (RBF)\n   Efficiency: 1080.9451 lb/s\n\n4. RECOMMENDATIONS BY PROBLEM TYPE:\n   - Smooth, continuous functions: Gaussian Process with RBF or Matern nu=2.5\n   - Functions with noise: Random Forest or Gradient Boosting\n   - High-dimensional problems (>20D): XGBoost or Random Forest\n   - Limited budget (<50 evals): Gaussian Process with Expected Improvement\n   - Fast evaluation needed: XGBoost or Random Forest\n   - Need uncertainty estimates: Gaussian Process or Kriging\n   - Non-smooth/discontinuous: Random Forest or Gradient Boosting\n\n5. KERNEL COMPARISON (Gaussian Process):\n                 Surrogate  Best Weight  Time (s)\n          GP Matern nu=1.5   119.546454  0.452622\nGP Matern nu=2.5 (Default)   119.866409  1.705245\n     GP Rational Quadratic   120.107282  0.615395\n                    GP RBF   120.109251  0.471836\n\n====================================================================================================",
           "crumbs": [
             "Surrogate Handling",
             "16  Surrogate Model Selection in SpotOptim"
      @@ -1807,7 +1807,7 @@
           "href": "surrogate_selection.html#summary-statistics",
           "title": "16  Surrogate Model Selection in SpotOptim",
           "section": "16.16 Summary Statistics",
      -    "text": "16.16 Summary Statistics\n\n# Summary statistics\nprint(\"\\n\" + \"=\" * 100)\nprint(\"SUMMARY STATISTICS\")\nprint(\"=\" * 100)\n\nsummary_stats = pd.DataFrame({\n    'Metric': [\n        'Best Weight Found',\n        'Worst Weight Found',\n        'Average Weight',\n        'Std Dev Weight',\n        'Fastest Time',\n        'Slowest Time',\n        'Average Time',\n    ],\n    'Value': [\n        f\"{df_comparison['Best Weight'].min():.4f} lb\",\n        f\"{df_comparison['Best Weight'].max():.4f} lb\",\n        f\"{df_comparison['Best Weight'].mean():.4f} lb\",\n        f\"{df_comparison['Best Weight'].std():.4f} lb\",\n        f\"{df_comparison['Time (s)'].min():.2f} s\",\n        f\"{df_comparison['Time (s)'].max():.2f} s\",\n        f\"{df_comparison['Time (s)'].mean():.2f} s\",\n    ]\n})\n\nprint(summary_stats.to_string(index=False))\nprint(\"=\" * 100)\n\n\n====================================================================================================\nSUMMARY STATISTICS\n====================================================================================================\n            Metric       Value\n Best Weight Found 119.5465 lb\nWorst Weight Found 165.3064 lb\n    Average Weight 133.7145 lb\n    Std Dev Weight  18.0098 lb\n      Fastest Time      0.07 s\n      Slowest Time      8.08 s\n      Average Time      1.39 s\n====================================================================================================",
      +    "text": "16.16 Summary Statistics\n\n# Summary statistics\nprint(\"\\n\" + \"=\" * 100)\nprint(\"SUMMARY STATISTICS\")\nprint(\"=\" * 100)\n\nsummary_stats = pd.DataFrame({\n    'Metric': [\n        'Best Weight Found',\n        'Worst Weight Found',\n        'Average Weight',\n        'Std Dev Weight',\n        'Fastest Time',\n        'Slowest Time',\n        'Average Time',\n    ],\n    'Value': [\n        f\"{df_comparison['Best Weight'].min():.4f} lb\",\n        f\"{df_comparison['Best Weight'].max():.4f} lb\",\n        f\"{df_comparison['Best Weight'].mean():.4f} lb\",\n        f\"{df_comparison['Best Weight'].std():.4f} lb\",\n        f\"{df_comparison['Time (s)'].min():.2f} s\",\n        f\"{df_comparison['Time (s)'].max():.2f} s\",\n        f\"{df_comparison['Time (s)'].mean():.2f} s\",\n    ]\n})\n\nprint(summary_stats.to_string(index=False))\nprint(\"=\" * 100)\n\n\n====================================================================================================\nSUMMARY STATISTICS\n====================================================================================================\n            Metric       Value\n Best Weight Found 119.5465 lb\nWorst Weight Found 165.3064 lb\n    Average Weight 133.7145 lb\n    Std Dev Weight  18.0098 lb\n      Fastest Time      0.07 s\n      Slowest Time      8.06 s\n      Average Time      1.39 s\n====================================================================================================",
           "crumbs": [
             "Surrogate Handling",
             "16  Surrogate Model Selection in SpotOptim"
      @@ -1851,7 +1851,7 @@
           "href": "acquisition_failure.html#fallback-strategies",
           "title": "17  Acquisition Failure Handling in SpotOptim",
           "section": "17.2 Fallback Strategies",
      -    "text": "17.2 Fallback Strategies\nSpotOptim uses a fallback strategy to propose an alternative point. The acquisition_failure_strategy parameter controls this behavior, defaulting to \"random\".\n\n17.2.1 Random Space-Filling Design (Default)\nStrategy name: \"random\"\nThis strategy uses Latin Hypercube Sampling (LHS) to generate a new space-filling point. LHS ensures good coverage of the search space by dividing each dimension into equal-probability intervals.\nWhen to use:\n\nGeneral-purpose optimization\nWhen you want simplicity and good space-filling properties\nDefault choice for most problems\n\nExample:\n\nfrom spotoptim import SpotOptim\nimport numpy as np\n\ndef sphere(X):\n    return np.sum(X**2, axis=1)\n\noptimizer = SpotOptim(\n    fun=sphere,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=50,\n    n_initial=10,\n    acquisition_failure_strategy=\"random\",  # Default\n    verbose=True\n)\n\nresult = optimizer.optimize()\n\nTensorBoard logging disabled\nInitial best: f(x) = 1.420785\nIter 1 | Best: 0.023695 | Rate: 1.00 | Evals: 22.0%\nIter 2 | Best: 0.000476 | Rate: 1.00 | Evals: 24.0%\nIter 3 | Best: 0.000076 | Rate: 1.00 | Evals: 26.0%\nIter 4 | Best: 0.000000 | Rate: 1.00 | Evals: 28.0%\nIter 5 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.80 | Evals: 30.0%\nIter 6 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.67 | Evals: 32.0%\nIter 7 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.57 | Evals: 34.0%\nIter 8 | Best: 0.000000 | Rate: 0.62 | Evals: 36.0%\nIter 9 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.56 | Evals: 38.0%\nIter 10 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.50 | Evals: 40.0%\nIter 11 | Best: 0.000000 | Rate: 0.55 | Evals: 42.0%\nIter 12 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.50 | Evals: 44.0%\nIter 13 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.46 | Evals: 46.0%\nIter 14 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.43 | Evals: 48.0%\nIter 15 | Best: 0.000000 | Rate: 0.47 | Evals: 50.0%\nIter 16 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.44 | Evals: 52.0%\nIter 17 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.41 | Evals: 54.0%\nIter 18 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.39 | Evals: 56.0%\nIter 19 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.37 | Evals: 58.0%\nIter 20 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.35 | Evals: 60.0%\nIter 21 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.33 | Evals: 62.0%\nIter 22 | Best: 0.000000 | Rate: 0.36 | Evals: 64.0%\nIter 23 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.35 | Evals: 66.0%\nIter 24 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.33 | Evals: 68.0%\nIter 25 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.32 | Evals: 70.0%\nIter 26 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.31 | Evals: 72.0%\nIter 27 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.30 | Evals: 74.0%\nIter 28 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.29 | Evals: 76.0%\nIter 29 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.28 | Evals: 78.0%\nIter 30 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.27 | Evals: 80.0%\nIter 31 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.26 | Evals: 82.0%\nIter 32 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.25 | Evals: 84.0%\nIter 33 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.24 | Evals: 86.0%\nIter 34 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.24 | Evals: 88.0%\nIter 35 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.23 | Evals: 90.0%\nIter 36 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.22 | Evals: 92.0%\nIter 37 | Best: 0.000000 | Rate: 0.24 | Evals: 94.0%\nIter 38 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.24 | Evals: 96.0%\nIter 39 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.23 | Evals: 98.0%\nIter 40 | Best: 0.000000 | Curr: 0.000005 | Rate: 0.23 | Evals: 100.0%",
      +    "text": "17.2 Fallback Strategies\nSpotOptim uses a fallback strategy to propose an alternative point. The acquisition_failure_strategy parameter controls this behavior, defaulting to \"random\".\n\n17.2.1 Random Space-Filling Design (Default)\nStrategy name: \"random\"\nThis strategy uses Latin Hypercube Sampling (LHS) to generate a new space-filling point. LHS ensures good coverage of the search space by dividing each dimension into equal-probability intervals.\nWhen to use:\n\nGeneral-purpose optimization\nWhen you want simplicity and good space-filling properties\nDefault choice for most problems\n\nExample:\n\nfrom spotoptim import SpotOptim\nimport numpy as np\n\ndef sphere(X):\n    return np.sum(X**2, axis=1)\n\noptimizer = SpotOptim(\n    fun=sphere,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=50,\n    n_initial=10,\n    acquisition_failure_strategy=\"random\",  # Default\n    verbose=True\n)\n\nresult = optimizer.optimize()\n\nTensorBoard logging disabled\nInitial best: f(x) = 1.440556\nIter 1 | Best: 0.019454 | Rate: 1.00 | Evals: 22.0%\nIter 2 | Best: 0.000172 | Rate: 1.00 | Evals: 24.0%\nIter 3 | Best: 0.000044 | Rate: 1.00 | Evals: 26.0%\nIter 4 | Best: 0.000000 | Rate: 1.00 | Evals: 28.0%\nIter 5 | Best: 0.000000 | Rate: 1.00 | Evals: 30.0%\nIter 6 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.83 | Evals: 32.0%\nIter 7 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.71 | Evals: 34.0%\nIter 8 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.62 | Evals: 36.0%\nIter 9 | Best: 0.000000 | Rate: 0.67 | Evals: 38.0%\nIter 10 | Best: 0.000000 | Rate: 0.70 | Evals: 40.0%\nIter 11 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.64 | Evals: 42.0%\nIter 12 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.58 | Evals: 44.0%\nIter 13 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.54 | Evals: 46.0%\nIter 14 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.50 | Evals: 48.0%\nIter 15 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.47 | Evals: 50.0%\nIter 16 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.44 | Evals: 52.0%\nIter 17 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.41 | Evals: 54.0%\nIter 18 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.39 | Evals: 56.0%\nIter 19 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.37 | Evals: 58.0%\nIter 20 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.35 | Evals: 60.0%\nIter 21 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.33 | Evals: 62.0%\nIter 22 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.32 | Evals: 64.0%\nIter 23 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.30 | Evals: 66.0%\nIter 24 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.29 | Evals: 68.0%\nIter 25 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.28 | Evals: 70.0%\nIter 26 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.27 | Evals: 72.0%\nIter 27 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.26 | Evals: 74.0%\nIter 28 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.25 | Evals: 76.0%\nIter 29 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.24 | Evals: 78.0%\nIter 30 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.23 | Evals: 80.0%\nIter 31 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.23 | Evals: 82.0%\nIter 32 | Best: 0.000000 | Rate: 0.25 | Evals: 84.0%\nIter 33 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.24 | Evals: 86.0%\nIter 34 | Best: 0.000000 | Rate: 0.26 | Evals: 88.0%\nIter 35 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.26 | Evals: 90.0%\nIter 36 | Best: 0.000000 | Curr: 0.000002 | Rate: 0.25 | Evals: 92.0%\nIter 37 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.24 | Evals: 94.0%\nIter 38 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.24 | Evals: 96.0%\nIter 39 | Best: 0.000000 | Curr: 0.000001 | Rate: 0.23 | Evals: 98.0%\nIter 40 | Best: 0.000000 | Curr: 0.000000 | Rate: 0.23 | Evals: 100.0%",
           "crumbs": [
             "Surrogate Handling",
             "17  Acquisition Failure Handling in SpotOptim"
      @@ -2027,7 +2027,7 @@
           "href": "acquisition_optimization.html#examples",
           "title": "19  Optimization on the Surrogate",
           "section": "19.2 Examples",
      -    "text": "19.2 Examples\nThe following examples demonstrate how to configure these parameters.\n\n19.2.1 1. Default Configuration (Differential Evolution)\nBy default, SpotOptim uses Differential Evolution (scipy.optimize.differential_evolution).\n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef obj_fun(X):\n    return np.sum(X**2, axis=1)\n\n# Default behavior\nspot = SpotOptim(\n    fun=obj_fun,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=10,\n    n_initial=2,\n    acquisition=\"EI\",\n    # Default: acquisition_optimizer=\"differential_evolution\"\n)\nspot.optimize()\nprint(\"Best y:\", spot.best_y_)\n\nBest y: 0.011893909296361216\n\n\n\n\n19.2.2 2. Customizing Differential Evolution\nYou can use acquisition_optimizer_kwargs to adjust Differential Evolution parameters, such as increasing maxiter or changing the popsize.\n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef obj_fun(X):\n    return np.sum(X**2, axis=1)\n\n# Configure DE parameters\nde_kwargs = {\n    \"maxiter\": 200,    # Increase max iterations\n    \"popsize\": 30,     # Increase population size\n    \"mutation\": (0.6, 1.1)\n}\n\nspot = SpotOptim(\n    fun=obj_fun,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=10,\n    n_initial=2,\n    acquisition=\"EI\",\n    acquisition_optimizer=\"differential_evolution\",\n    acquisition_optimizer_kwargs=de_kwargs\n)\nspot.optimize()\nprint(\"Best y with Custom DE:\", spot.best_y_)\n\nBest y with Custom DE: 0.05032941308851764\n\n\n\n\n19.2.3 3. Using Gradient-Based Optimization (L-BFGS-B)\nYou can switch to a gradient-based optimizer like L-BFGS-B by specifying it in acquisition_optimizer. Note that for minimize-based methods, parameters are usually passed via an options dictionary within acquisition_optimizer_kwargs.\n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef obj_fun(X):\n    return np.sum(X**2, axis=1)\n\n# Configure L-BFGS-B parameters\nlbfgs_kwargs = {\n    \"method\": \"L-BFGS-B\",  # Explicitly state method (good practice)\n    \"options\": {\n        \"maxiter\": 100,\n        \"ftol\": 1e-9\n    }\n}\n\nspot = SpotOptim(\n    fun=obj_fun,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=10,\n    n_initial=2,\n    acquisition=\"EI\",\n    acquisition_optimizer=\"L-BFGS-B\",\n    acquisition_optimizer_kwargs=lbfgs_kwargs\n)\nspot.optimize()\nprint(\"Best y with L-BFGS-B:\", spot.best_y_)\n\nBest y with L-BFGS-B: 0.2641559097007588\n\n\n\n\n19.2.4 4. Using Gradient-Free Optimization (Nelder-Mead)\nFor non-smooth acquisition landscapes or when robustness is needed without gradients, Nelder-Mead is a good choice. SpotOptim automatically handles the interface to ensure compatibility.\n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef obj_fun(X):\n    return np.sum(X**2, axis=1)\n\n# Configure Nelder-Mead\nnm_kwargs = {\n    \"method\": \"Nelder-Mead\",\n    \"options\": {\n        \"maxiter\": 500,\n        \"adaptive\": True\n    }\n}\n\nspot = SpotOptim(\n    fun=obj_fun,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=10,\n    n_initial=2,\n    acquisition=\"EI\",\n    acquisition_optimizer=\"Nelder-Mead\",\n    acquisition_optimizer_kwargs=nm_kwargs\n)\nspot.optimize()\nprint(\"Best y with Nelder-Mead:\", spot.best_y_)\n\nBest y with Nelder-Mead: 0.5883781258898368\n\n\n\n\n19.2.5 5. Returning Multiple Candidates\nSetting acquisition_fun_return_size > 1 forces the optimizer to return multiple candidate points (e.g., the top N from the final population).\n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef obj_fun(X):\n    return np.sum(X**2, axis=1)\n\nspot = SpotOptim(\n    fun=obj_fun,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=5, # Short run just to demo config\n    n_initial=2,\n    acquisition=\"EI\",\n    acquisition_fun_return_size=5  # Return top 5 candidates\n)\n# The internal optimization loop handles these candidates automatically\nspot.optimize()\n\n message: Optimization terminated: maximum evaluations (5) reached\n                   Current function value: 7.568267\n                   Iterations: 3\n                   Function evaluations: 5\n success: True\n     fun: 7.568266507987052\n       x: [ 1.437e+00  2.346e+00]\n       X: [[ 1.437e+00  2.346e+00]\n           [-1.993e+00 -2.151e+00]\n           [ 1.387e+00  2.696e+00]\n           [-2.391e+00 -1.403e+00]\n           [-2.399e+00 -1.405e+00]]\n     nit: 3\n    nfev: 5\n       y: [ 7.568e+00  8.600e+00  9.193e+00  7.687e+00  7.728e+00]",
      +    "text": "19.2 Examples\nThe following examples demonstrate how to configure these parameters.\n\n19.2.1 1. Default Configuration (Differential Evolution)\nBy default, SpotOptim uses Differential Evolution (scipy.optimize.differential_evolution).\n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef obj_fun(X):\n    return np.sum(X**2, axis=1)\n\n# Default behavior\nspot = SpotOptim(\n    fun=obj_fun,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=10,\n    n_initial=2,\n    acquisition=\"EI\",\n    # Default: acquisition_optimizer=\"differential_evolution\"\n)\nspot.optimize()\nprint(\"Best y:\", spot.best_y_)\n\nBest y: 0.020703279400656156\n\n\n\n\n19.2.2 2. Customizing Differential Evolution\nYou can use acquisition_optimizer_kwargs to adjust Differential Evolution parameters, such as increasing maxiter or changing the popsize.\n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef obj_fun(X):\n    return np.sum(X**2, axis=1)\n\n# Configure DE parameters\nde_kwargs = {\n    \"maxiter\": 200,    # Increase max iterations\n    \"popsize\": 30,     # Increase population size\n    \"mutation\": (0.6, 1.1)\n}\n\nspot = SpotOptim(\n    fun=obj_fun,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=10,\n    n_initial=2,\n    acquisition=\"EI\",\n    acquisition_optimizer=\"differential_evolution\",\n    acquisition_optimizer_kwargs=de_kwargs\n)\nspot.optimize()\nprint(\"Best y with Custom DE:\", spot.best_y_)\n\nBest y with Custom DE: 0.0014445206408396619\n\n\n\n\n19.2.3 3. Using Gradient-Based Optimization (L-BFGS-B)\nYou can switch to a gradient-based optimizer like L-BFGS-B by specifying it in acquisition_optimizer. Note that for minimize-based methods, parameters are usually passed via an options dictionary within acquisition_optimizer_kwargs.\n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef obj_fun(X):\n    return np.sum(X**2, axis=1)\n\n# Configure L-BFGS-B parameters\nlbfgs_kwargs = {\n    \"method\": \"L-BFGS-B\",  # Explicitly state method (good practice)\n    \"options\": {\n        \"maxiter\": 100,\n        \"ftol\": 1e-9\n    }\n}\n\nspot = SpotOptim(\n    fun=obj_fun,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=10,\n    n_initial=2,\n    acquisition=\"EI\",\n    acquisition_optimizer=\"L-BFGS-B\",\n    acquisition_optimizer_kwargs=lbfgs_kwargs\n)\nspot.optimize()\nprint(\"Best y with L-BFGS-B:\", spot.best_y_)\n\nBest y with L-BFGS-B: 0.013698181964238821\n\n\n\n\n19.2.4 4. Using Gradient-Free Optimization (Nelder-Mead)\nFor non-smooth acquisition landscapes or when robustness is needed without gradients, Nelder-Mead is a good choice. SpotOptim automatically handles the interface to ensure compatibility.\n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef obj_fun(X):\n    return np.sum(X**2, axis=1)\n\n# Configure Nelder-Mead\nnm_kwargs = {\n    \"method\": \"Nelder-Mead\",\n    \"options\": {\n        \"maxiter\": 500,\n        \"adaptive\": True\n    }\n}\n\nspot = SpotOptim(\n    fun=obj_fun,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=10,\n    n_initial=2,\n    acquisition=\"EI\",\n    acquisition_optimizer=\"Nelder-Mead\",\n    acquisition_optimizer_kwargs=nm_kwargs\n)\nspot.optimize()\nprint(\"Best y with Nelder-Mead:\", spot.best_y_)\n\nBest y with Nelder-Mead: 0.03305715511489834\n\n\n\n\n19.2.5 5. Returning Multiple Candidates\nSetting acquisition_fun_return_size > 1 forces the optimizer to return multiple candidate points (e.g., the top N from the final population).\n\nimport numpy as np\nfrom spotoptim import SpotOptim\n\ndef obj_fun(X):\n    return np.sum(X**2, axis=1)\n\nspot = SpotOptim(\n    fun=obj_fun,\n    bounds=[(-5, 5), (-5, 5)],\n    max_iter=5, # Short run just to demo config\n    n_initial=2,\n    acquisition=\"EI\",\n    acquisition_fun_return_size=5  # Return top 5 candidates\n)\n# The internal optimization loop handles these candidates automatically\nspot.optimize()\n\n message: Optimization terminated: maximum evaluations (5) reached\n                   Current function value: 5.221754\n                   Iterations: 3\n                   Function evaluations: 5\n success: True\n     fun: 5.221754123644815\n       x: [ 1.770e+00  1.446e+00]\n       X: [[-4.921e+00 -4.794e+00]\n           [ 2.306e+00  4.720e+00]\n           [ 2.209e+00  4.434e+00]\n           [ 1.893e+00  3.502e+00]\n           [ 1.770e+00  1.446e+00]]\n     nit: 3\n    nfev: 5\n       y: [ 4.720e+01  2.760e+01  2.454e+01  1.585e+01  5.222e+00]",
           "crumbs": [
             "Optimization on the Surrogate",
             "19  Optimization on the Surrogate"
      @@ -2676,7 +2676,7 @@
           "href": "mlp.html#hyperparameter-tuning",
           "title": "26  The MLP Class",
           "section": "26.5 Hyperparameter Tuning",
      -    "text": "26.5 Hyperparameter Tuning\nOne of the key features of the MLP class is its ability to suggest a default ParameterSet for tuning. This provides a great starting point for finding the best architecture.\n\nfrom spotoptim.hyperparameters import ParameterSet\n\n# Get default search space\nparams = MLP.get_default_parameters()\nprint(\"Default tunable parameters:\", params.names())\n\nDefault tunable parameters: ['l1', 'num_hidden_layers', 'activation', 'lr', 'optimizer']\n\n\n\n26.5.1 Example: Tuning with SpotOptim\nHere is how you can use the MLP class in a full SpotOptim tuning loop using TorchObjective.\n\nfrom spotoptim import SpotOptim\nfrom spotoptim.core.experiment import ExperimentControl\nfrom spotoptim.core.data import SpotDataFromArray\nfrom spotoptim.function.torch_objective import TorchObjective\nimport numpy as np\n\n# 1. Dummy Data\nX = np.random.rand(100, 10)\ny = np.random.rand(100, 1)\ndata = SpotDataFromArray(X, y)\n\n# 2. Get Default Parameters & Add Custom Ones\nparams = MLP.get_default_parameters()\n\n# Customize: fix the optimizer to Adam, but tune epochs\nparams.add_int(\"epochs\", 5, 20, default=10)\n\n# 3. Setup Experiment\nexperiment = ExperimentControl(\n    experiment_name=\"mlp_tuning_demo\",\n    model_class=MLP,\n    dataset=data,\n    hyperparameters=params,\n    metrics=[\"val_loss\"], \n    device=\"cpu\",\n    batch_size=16\n)\n\n# 4. Create Objective\nobjective = TorchObjective(experiment)\n\n# 5. Optimize\noptimizer = SpotOptim(\n    fun=objective,\n    bounds=objective.bounds,\n    var_type=objective.var_type,\n    var_name=objective.var_name,\n    var_trans=objective.var_trans,\n    n_initial=3,\n    max_iter=5,\n    seed=42,\n    verbose=False\n)\n\nres = optimizer.optimize()\n\nprint(\"Best Parameters:\")\nprint(objective._get_hyperparameters(res.x))\n\nBest Parameters:\n{'l1': 64, 'num_hidden_layers': 2, 'activation': 'LeakyReLU', 'lr': 3.878308634750335, 'optimizer': 'Adam', 'epochs': 14}\n\n\nThis setup automatically tunes the architecture (l1, num_hidden_layers), usage of activation functions (activation), learning rate (lr), and optimization method (optimizer) if left in the parameter set.",
      +    "text": "26.5 Hyperparameter Tuning\nOne of the key features of the MLP class is its ability to suggest a default ParameterSet for tuning. This provides a great starting point for finding the best architecture.\n\nfrom spotoptim.hyperparameters import ParameterSet\n\n# Get default search space\nparams = MLP.get_default_parameters()\nprint(\"Default tunable parameters:\", params.names())\n\nDefault tunable parameters: ['l1', 'num_hidden_layers', 'activation', 'lr', 'optimizer']\n\n\n\n26.5.1 Example: Tuning with SpotOptim\nHere is how you can use the MLP class in a full SpotOptim tuning loop using TorchObjective.\n\nfrom spotoptim import SpotOptim\nfrom spotoptim.core.experiment import ExperimentControl\nfrom spotoptim.core.data import SpotDataFromArray\nfrom spotoptim.function.torch_objective import TorchObjective\nimport numpy as np\n\n# 1. Dummy Data\nX = np.random.rand(100, 10)\ny = np.random.rand(100, 1)\ndata = SpotDataFromArray(X, y)\n\n# 2. Get Default Parameters & Add Custom Ones\nparams = MLP.get_default_parameters()\n\n# Customize: fix the optimizer to Adam, but tune epochs\nparams.add_int(\"epochs\", 5, 20, default=10)\n\n# 3. Setup Experiment\nexperiment = ExperimentControl(\n    experiment_name=\"mlp_tuning_demo\",\n    model_class=MLP,\n    dataset=data,\n    hyperparameters=params,\n    metrics=[\"val_loss\"], \n    device=\"cpu\",\n    batch_size=16\n)\n\n# 4. Create Objective\nobjective = TorchObjective(experiment)\n\n# 5. Optimize\noptimizer = SpotOptim(\n    fun=objective,\n    bounds=objective.bounds,\n    var_type=objective.var_type,\n    var_name=objective.var_name,\n    var_trans=objective.var_trans,\n    n_initial=3,\n    max_iter=5,\n    seed=42,\n    verbose=False\n)\n\nres = optimizer.optimize()\n\nprint(\"Best Parameters:\")\nprint(objective._get_hyperparameters(res.x))\n\nBest Parameters:\n{'l1': 64, 'num_hidden_layers': 2, 'activation': 'LeakyReLU', 'lr': 0.39507990921803654, 'optimizer': 'Adam', 'epochs': 14}\n\n\nThis setup automatically tunes the architecture (l1, num_hidden_layers), usage of activation functions (activation), learning rate (lr), and optimization method (optimizer) if left in the parameter set.",
           "crumbs": [
             "Hyperparameter Tuning",
             "26  The MLP Class"
      @@ -2852,7 +2852,7 @@
           "href": "pinns_2_hyperparameter_tuning.html#run-the-optimization",
           "title": "28  Hyperparameter Tuning for Physics-Informed Neural Networks",
           "section": "34.2 Run the Optimization",
      -    "text": "34.2 Run the Optimization\nUse tensorboard --logdir=runs from a shell in the current directory (where this notebook is located) to visualize the optimization process.\nSetting the bounds for the search space:\n\n# Define search space with var_trans for automatic log-scale handling\nbounds = [\n    (16, 128),                                      # l1: neurons per layer (16 to 128)\n    (1, 4),                                         # num_layers: 1 to 4 hidden layers\n    (\"Tanh\", \"ReLU\", \"Sigmoid\", \"GELU\"),         # activation: activation function\n    (\"Adam\", \"SGD\", \"RMSprop\", \"AdamW\"),          # optimizer: optimizer algorithm\n    (0.1, 10.0),                                    # lr_unified: learning rate (0.1 to 10)\n    (0.01, 1.0)                                     # alpha: physics weight (0.01 to 1.0)\n]\n\nSpecify the variable types and transformations. Use var_trans to handle log-scale transformations automatically, factor variables don’t need transformations (None):\n\nvar_type = [\"int\", \"int\", \"factor\", \"factor\", \"float\", \"float\"]\nvar_name = [\"l1\", \"num_layers\", \"activation\", \"optimizer\", \"lr_unified\", \"alpha\"]\nvar_trans = [None, None, None, None, \"log10\", \"log10\"]\n\n\n# Create optimizer\noptimizer = SpotOptim(\n    fun=objective_pinn,\n    bounds=bounds,\n    var_type=var_type,\n    var_name=var_name,\n    var_trans=var_trans,  # Automatic log-scale handling!\n    max_iter=MAX_ITER,\n    n_initial=N_INITIAL,\n    seed=42,\n    verbose=True,\n    tensorboard_clean=True,\n    tensorboard_log=True\n)\n\nFactor variable at dimension 2:\n  Levels: ['Tanh', 'ReLU', 'Sigmoid', 'GELU']\n  Mapped to integers: 0 to 3\nFactor variable at dimension 3:\n  Levels: ['Adam', 'SGD', 'RMSprop', 'AdamW']\n  Mapped to integers: 0 to 3\nRemoved old TensorBoard logs: runs/spotoptim_20260404_094129\nCleaned 1 old TensorBoard log directory\nTensorBoard logging enabled: runs/spotoptim_20260404_101907\n\n\nDisplay search space configuration. The transcolumn shows applied transformations. lr_unified and alpha use log10 transformation internally. This enables efficient exploration of log-scale parameters. All values shown are in original scale (not transformed).\n\ndesign_table = optimizer.get_design_table(tablefmt=\"github\")\nprint(design_table)\n\n|       name |   type |   lower |    upper |   default |   transform |\n|------------|--------|---------|----------|-----------|-------------|\n|         l1 |    int | 16.0000 | 128.0000 |        72 |           - |\n| num_layers |    int |  1.0000 |   4.0000 |         2 |           - |\n| activation | factor |       - |        - |   Sigmoid |           - |\n|  optimizer | factor |       - |        - |   RMSprop |           - |\n| lr_unified |  float |  0.1000 |  10.0000 |    5.0500 |       log10 |\n|      alpha |  float |  0.0100 |   1.0000 |    0.5050 |       log10 |\n\n\nRun optimization\n\nresult = optimizer.optimize()\n\n\nConfiguration 1/10:\n  l1=28, num_layers=2, activation=Sigmoid, \n  optimizer=RMSprop, lr_unified=1.0211, alpha=0.0923\n  Validation MSE: 0.192541\n\nConfiguration 2/10:\n  l1=117, num_layers=3, activation=ReLU, \n  optimizer=SGD, lr_unified=0.4832, alpha=0.0146\n  Validation MSE: 0.205611\n\nConfiguration 3/10:\n  l1=92, num_layers=1, activation=Tanh, \n  optimizer=SGD, lr_unified=6.5194, alpha=0.6267\n  Validation MSE: nan\n\nConfiguration 4/10:\n  l1=41, num_layers=4, activation=ReLU, \n  optimizer=Adam, lr_unified=0.2742, alpha=0.1697\n  Validation MSE: 0.185978\n\nConfiguration 5/10:\n  l1=105, num_layers=2, activation=ReLU, \n  optimizer=AdamW, lr_unified=0.1136, alpha=0.0176\n  Validation MSE: 0.202467\n\nConfiguration 6/10:\n  l1=24, num_layers=1, activation=Sigmoid, \n  optimizer=RMSprop, lr_unified=4.4408, alpha=0.0293\n  Validation MSE: 0.189705\n\nConfiguration 7/10:\n  l1=83, num_layers=3, activation=GELU, \n  optimizer=AdamW, lr_unified=0.2277, alpha=0.2652\n  Validation MSE: 0.181881\n\nConfiguration 8/10:\n  l1=65, num_layers=2, activation=Tanh, \n  optimizer=SGD, lr_unified=3.4389, alpha=0.9830\n  Validation MSE: 0.195487\n\nConfiguration 9/10:\n  l1=116, num_layers=3, activation=Sigmoid, \n  optimizer=RMSprop, lr_unified=0.8521, alpha=0.1020\n  Validation MSE: 0.180443\n\nConfiguration 10/10:\n  l1=51, num_layers=4, activation=Sigmoid, \n  optimizer=Adam, lr_unified=1.6729, alpha=0.0458\n  Validation MSE: 0.205772\nWarning: 1 initial design point(s) returned NaN/inf and will be ignored (reduced from 10 to 9 points)\nNote: Initial design size (9) is smaller than requested (10) due to NaN/inf values\nInitial best: f(x) = 0.180443\n\nConfiguration 1/1:\n  l1=100, num_layers=2, activation=Sigmoid, \n  optimizer=RMSprop, lr_unified=0.1469, alpha=0.0131\n  Validation MSE: 0.196912\nIter 1 | Best: 0.180443 | Curr: 0.196912 | Rate: 0.00 | Evals: 66.7%\n\nConfiguration 1/1:\n  l1=94, num_layers=1, activation=Sigmoid, \n  optimizer=SGD, lr_unified=3.6048, alpha=0.0374\n  Validation MSE: 0.196974\nIter 2 | Best: 0.180443 | Curr: 0.196974 | Rate: 0.00 | Evals: 73.3%\n\nConfiguration 1/1:\n  l1=96, num_layers=3, activation=GELU, \n  optimizer=RMSprop, lr_unified=0.2996, alpha=0.1593\n  Validation MSE: 0.169332\nIter 3 | Best: 0.169332 | Rate: 0.33 | Evals: 80.0%\n\nConfiguration 1/1:\n  l1=127, num_layers=1, activation=Sigmoid, \n  optimizer=SGD, lr_unified=1.9791, alpha=0.2477\n  Validation MSE: 0.208434\nIter 4 | Best: 0.169332 | Curr: 0.208434 | Rate: 0.25 | Evals: 86.7%\n\nConfiguration 1/1:\n  l1=78, num_layers=3, activation=Sigmoid, \n  optimizer=SGD, lr_unified=0.5950, alpha=0.4743\n  Validation MSE: 0.228979\nIter 5 | Best: 0.169332 | Curr: 0.228979 | Rate: 0.20 | Evals: 93.3%\n\nConfiguration 1/1:\n  l1=81, num_layers=2, activation=Sigmoid, \n  optimizer=Adam, lr_unified=0.2781, alpha=0.0168\n  Validation MSE: 0.202393\nIter 6 | Best: 0.169332 | Curr: 0.202393 | Rate: 0.17 | Evals: 100.0%\nTensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260404_101907",
      +    "text": "34.2 Run the Optimization\nUse tensorboard --logdir=runs from a shell in the current directory (where this notebook is located) to visualize the optimization process.\nSetting the bounds for the search space:\n\n# Define search space with var_trans for automatic log-scale handling\nbounds = [\n    (16, 128),                                      # l1: neurons per layer (16 to 128)\n    (1, 4),                                         # num_layers: 1 to 4 hidden layers\n    (\"Tanh\", \"ReLU\", \"Sigmoid\", \"GELU\"),         # activation: activation function\n    (\"Adam\", \"SGD\", \"RMSprop\", \"AdamW\"),          # optimizer: optimizer algorithm\n    (0.1, 10.0),                                    # lr_unified: learning rate (0.1 to 10)\n    (0.01, 1.0)                                     # alpha: physics weight (0.01 to 1.0)\n]\n\nSpecify the variable types and transformations. Use var_trans to handle log-scale transformations automatically, factor variables don’t need transformations (None):\n\nvar_type = [\"int\", \"int\", \"factor\", \"factor\", \"float\", \"float\"]\nvar_name = [\"l1\", \"num_layers\", \"activation\", \"optimizer\", \"lr_unified\", \"alpha\"]\nvar_trans = [None, None, None, None, \"log10\", \"log10\"]\n\n\n# Create optimizer\noptimizer = SpotOptim(\n    fun=objective_pinn,\n    bounds=bounds,\n    var_type=var_type,\n    var_name=var_name,\n    var_trans=var_trans,  # Automatic log-scale handling!\n    max_iter=MAX_ITER,\n    n_initial=N_INITIAL,\n    seed=42,\n    verbose=True,\n    tensorboard_clean=True,\n    tensorboard_log=True\n)\n\nFactor variable at dimension 2:\n  Levels: ['Tanh', 'ReLU', 'Sigmoid', 'GELU']\n  Mapped to integers: 0 to 3\nFactor variable at dimension 3:\n  Levels: ['Adam', 'SGD', 'RMSprop', 'AdamW']\n  Mapped to integers: 0 to 3\nRemoved old TensorBoard logs: runs/spotoptim_20260411_213846\nCleaned 1 old TensorBoard log directory\nTensorBoard logging enabled: runs/spotoptim_20260411_221608\n\n\nDisplay search space configuration. The transcolumn shows applied transformations. lr_unified and alpha use log10 transformation internally. This enables efficient exploration of log-scale parameters. All values shown are in original scale (not transformed).\n\ndesign_table = optimizer.get_design_table(tablefmt=\"github\")\nprint(design_table)\n\n|       name |   type |   lower |    upper |   default |   transform |\n|------------|--------|---------|----------|-----------|-------------|\n|         l1 |    int | 16.0000 | 128.0000 |        72 |           - |\n| num_layers |    int |  1.0000 |   4.0000 |         2 |           - |\n| activation | factor |       - |        - |   Sigmoid |           - |\n|  optimizer | factor |       - |        - |   RMSprop |           - |\n| lr_unified |  float |  0.1000 |  10.0000 |    5.0500 |       log10 |\n|      alpha |  float |  0.0100 |   1.0000 |    0.5050 |       log10 |\n\n\nRun optimization\n\nresult = optimizer.optimize()\n\n\nConfiguration 1/10:\n  l1=28, num_layers=2, activation=Sigmoid, \n  optimizer=RMSprop, lr_unified=1.0211, alpha=0.0923\n  Validation MSE: 0.192541\n\nConfiguration 2/10:\n  l1=117, num_layers=3, activation=ReLU, \n  optimizer=SGD, lr_unified=0.4832, alpha=0.0146\n  Validation MSE: 0.205611\n\nConfiguration 3/10:\n  l1=92, num_layers=1, activation=Tanh, \n  optimizer=SGD, lr_unified=6.5194, alpha=0.6267\n  Validation MSE: nan\n\nConfiguration 4/10:\n  l1=41, num_layers=4, activation=ReLU, \n  optimizer=Adam, lr_unified=0.2742, alpha=0.1697\n  Validation MSE: 0.185978\n\nConfiguration 5/10:\n  l1=105, num_layers=2, activation=ReLU, \n  optimizer=AdamW, lr_unified=0.1136, alpha=0.0176\n  Validation MSE: 0.202467\n\nConfiguration 6/10:\n  l1=24, num_layers=1, activation=Sigmoid, \n  optimizer=RMSprop, lr_unified=4.4408, alpha=0.0293\n  Validation MSE: 0.189705\n\nConfiguration 7/10:\n  l1=83, num_layers=3, activation=GELU, \n  optimizer=AdamW, lr_unified=0.2277, alpha=0.2652\n  Validation MSE: 0.181881\n\nConfiguration 8/10:\n  l1=65, num_layers=2, activation=Tanh, \n  optimizer=SGD, lr_unified=3.4389, alpha=0.9830\n  Validation MSE: 0.195487\n\nConfiguration 9/10:\n  l1=116, num_layers=3, activation=Sigmoid, \n  optimizer=RMSprop, lr_unified=0.8521, alpha=0.1020\n  Validation MSE: 0.180443\n\nConfiguration 10/10:\n  l1=51, num_layers=4, activation=Sigmoid, \n  optimizer=Adam, lr_unified=1.6729, alpha=0.0458\n  Validation MSE: 0.205772\nWarning: 1 initial design point(s) returned NaN/inf and will be ignored (reduced from 10 to 9 points)\nNote: Initial design size (9) is smaller than requested (10) due to NaN/inf values\nInitial best: f(x) = 0.180443\n\nConfiguration 1/1:\n  l1=100, num_layers=2, activation=Sigmoid, \n  optimizer=RMSprop, lr_unified=0.1469, alpha=0.0131\n  Validation MSE: 0.196912\nIter 1 | Best: 0.180443 | Curr: 0.196912 | Rate: 0.00 | Evals: 66.7%\n\nConfiguration 1/1:\n  l1=94, num_layers=1, activation=Sigmoid, \n  optimizer=SGD, lr_unified=3.6048, alpha=0.0374\n  Validation MSE: 0.196974\nIter 2 | Best: 0.180443 | Curr: 0.196974 | Rate: 0.00 | Evals: 73.3%\n\nConfiguration 1/1:\n  l1=96, num_layers=3, activation=GELU, \n  optimizer=RMSprop, lr_unified=0.2996, alpha=0.1593\n  Validation MSE: 0.169332\nIter 3 | Best: 0.169332 | Rate: 0.33 | Evals: 80.0%\n\nConfiguration 1/1:\n  l1=127, num_layers=1, activation=Sigmoid, \n  optimizer=SGD, lr_unified=1.9791, alpha=0.2477\n  Validation MSE: 0.208434\nIter 4 | Best: 0.169332 | Curr: 0.208434 | Rate: 0.25 | Evals: 86.7%\n\nConfiguration 1/1:\n  l1=78, num_layers=3, activation=Sigmoid, \n  optimizer=SGD, lr_unified=0.5950, alpha=0.4743\n  Validation MSE: 0.228979\nIter 5 | Best: 0.169332 | Curr: 0.228979 | Rate: 0.20 | Evals: 93.3%\n\nConfiguration 1/1:\n  l1=81, num_layers=2, activation=Sigmoid, \n  optimizer=Adam, lr_unified=0.2781, alpha=0.0168\n  Validation MSE: 0.202393\nIter 6 | Best: 0.169332 | Curr: 0.202393 | Rate: 0.17 | Evals: 100.0%\nTensorBoard writer closed. View logs with: tensorboard --logdir=runs/spotoptim_20260411_221608",
           "crumbs": [
             "Hyperparameter Tuning",
             "28  Hyperparameter Tuning for Physics-Informed Neural Networks"
      @@ -3171,7 +3171,7 @@
           "href": "diabetes_dataset.html#quick-start",
           "title": "31  Diabetes Dataset Utilities",
           "section": "31.2 Quick Start",
      -    "text": "31.2 Quick Start\n\n31.2.1 Basic Usage\n\nfrom spotoptim.data import get_diabetes_dataloaders\nfrom sklearn.datasets import load_diabetes\nfrom spotoptim.data.diabetes import DiabetesDataset\nimport numpy as np\n\n# Load data\ndiabetes = load_diabetes()\nX = diabetes.data\ny = diabetes.target.reshape(-1, 1)\n\n# Now create the dataset\ndataset = DiabetesDataset(X, y, transform=None, target_transform=None)\n# Load data with default settings\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders()\n\n# Iterate through batches\nfor batch_X, batch_y in train_loader:\n    print(f\"Batch features: {batch_X.shape}\")  # (32, 10)\n    print(f\"Batch targets: {batch_y.shape}\")   # (32, 1)\n    break\n\nBatch features: torch.Size([32, 10])\nBatch targets: torch.Size([32, 1])\n\n\n\n\n31.2.2 Training a Model\n\nimport torch\nimport torch.nn as nn\nfrom spotoptim.data import get_diabetes_dataloaders\nfrom spotoptim.nn.linear_regressor import LinearRegressor\n\n# Load data\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    test_size=0.2,\n    batch_size=32,\n    scale_features=True,\n    random_state=42\n)\n\n# Create model\nmodel = LinearRegressor(\n    input_dim=10,\n    output_dim=1,\n    l1=64,\n    num_hidden_layers=2,\n    activation=\"ReLU\"\n)\n\n# Setup training\ncriterion = nn.MSELoss()\noptimizer = model.get_optimizer(\"Adam\", lr=0.01)\n\n# Training loop\nnum_epochs = 100\nfor epoch in range(num_epochs):\n    model.train()\n    train_loss = 0.0\n    \n    for batch_X, batch_y in train_loader:\n        # Forward pass\n        predictions = model(batch_X)\n        loss = criterion(predictions, batch_y)\n        \n        # Backward pass\n        optimizer.zero_grad()\n        loss.backward()\n        optimizer.step()\n        \n        train_loss += loss.item()\n    \n    avg_train_loss = train_loss / len(train_loader)\n    \n    if (epoch + 1) % 20 == 0:\n        print(f\"Epoch {epoch+1}/{num_epochs}: Loss = {avg_train_loss:.4f}\")\n\n# Evaluation\nmodel.eval()\ntest_loss = 0.0\n\nwith torch.no_grad():\n    for batch_X, batch_y in test_loader:\n        predictions = model(batch_X)\n        loss = criterion(predictions, batch_y)\n        test_loss += loss.item()\n\navg_test_loss = test_loss / len(test_loader)\nprint(f\"Test MSE: {avg_test_loss:.4f}\")\n\nEpoch 20/100: Loss = 28259.4506\nEpoch 40/100: Loss = 34347.1385\nEpoch 60/100: Loss = 32919.4453\nEpoch 80/100: Loss = 29039.2554\nEpoch 100/100: Loss = 30956.3480\nTest MSE: 26513.4225",
      +    "text": "31.2 Quick Start\n\n31.2.1 Basic Usage\n\nfrom spotoptim.data import get_diabetes_dataloaders\nfrom sklearn.datasets import load_diabetes\nfrom spotoptim.data.diabetes import DiabetesDataset\nimport numpy as np\n\n# Load data\ndiabetes = load_diabetes()\nX = diabetes.data\ny = diabetes.target.reshape(-1, 1)\n\n# Now create the dataset\ndataset = DiabetesDataset(X, y, transform=None, target_transform=None)\n# Load data with default settings\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders()\n\n# Iterate through batches\nfor batch_X, batch_y in train_loader:\n    print(f\"Batch features: {batch_X.shape}\")  # (32, 10)\n    print(f\"Batch targets: {batch_y.shape}\")   # (32, 1)\n    break\n\nBatch features: torch.Size([32, 10])\nBatch targets: torch.Size([32, 1])\n\n\n\n\n31.2.2 Training a Model\n\nimport torch\nimport torch.nn as nn\nfrom spotoptim.data import get_diabetes_dataloaders\nfrom spotoptim.nn.linear_regressor import LinearRegressor\n\n# Load data\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    test_size=0.2,\n    batch_size=32,\n    scale_features=True,\n    random_state=42\n)\n\n# Create model\nmodel = LinearRegressor(\n    input_dim=10,\n    output_dim=1,\n    l1=64,\n    num_hidden_layers=2,\n    activation=\"ReLU\"\n)\n\n# Setup training\ncriterion = nn.MSELoss()\noptimizer = model.get_optimizer(\"Adam\", lr=0.01)\n\n# Training loop\nnum_epochs = 100\nfor epoch in range(num_epochs):\n    model.train()\n    train_loss = 0.0\n    \n    for batch_X, batch_y in train_loader:\n        # Forward pass\n        predictions = model(batch_X)\n        loss = criterion(predictions, batch_y)\n        \n        # Backward pass\n        optimizer.zero_grad()\n        loss.backward()\n        optimizer.step()\n        \n        train_loss += loss.item()\n    \n    avg_train_loss = train_loss / len(train_loader)\n    \n    if (epoch + 1) % 20 == 0:\n        print(f\"Epoch {epoch+1}/{num_epochs}: Loss = {avg_train_loss:.4f}\")\n\n# Evaluation\nmodel.eval()\ntest_loss = 0.0\n\nwith torch.no_grad():\n    for batch_X, batch_y in test_loader:\n        predictions = model(batch_X)\n        loss = criterion(predictions, batch_y)\n        test_loss += loss.item()\n\navg_test_loss = test_loss / len(test_loader)\nprint(f\"Test MSE: {avg_test_loss:.4f}\")\n\nEpoch 20/100: Loss = 30604.4867\nEpoch 40/100: Loss = 27473.2434\nEpoch 60/100: Loss = 27584.4352\nEpoch 80/100: Loss = 30723.6564\nEpoch 100/100: Loss = 27506.1375\nTest MSE: 26480.3236",
           "crumbs": [
             "Data Sets",
             "31  Diabetes Dataset Utilities"
      @@ -3182,7 +3182,7 @@
           "href": "diabetes_dataset.html#function-reference",
           "title": "31  Diabetes Dataset Utilities",
           "section": "31.3 Function Reference",
      -    "text": "31.3 Function Reference\n\n31.3.1 get_diabetes_dataloaders()\nLoads the sklearn diabetes dataset and returns configured PyTorch DataLoaders.\nSignature:\n\nget_diabetes_dataloaders(\n    test_size=0.2,\n    batch_size=32,\n    shuffle_train=True,\n    shuffle_test=False,\n    random_state=42,\n    scale_features=True,\n    num_workers=0,\n    pin_memory=False\n)\n\n(<torch.utils.data.dataloader.DataLoader at 0x124e86d70>,\n <torch.utils.data.dataloader.DataLoader at 0x124ec7e30>,\n StandardScaler())\n\n\nParameters:\n\n\n\n\n\n\n\n\n\nParameter\nType\nDefault\nDescription\n\n\n\n\ntest_size\nfloat\n0.2\nProportion of dataset for testing (0.0 to 1.0)\n\n\nbatch_size\nint\n32\nNumber of samples per batch\n\n\nshuffle_train\nbool\nTrue\nWhether to shuffle training data\n\n\nshuffle_test\nbool\nFalse\nWhether to shuffle test data\n\n\nrandom_state\nint\n42\nRandom seed for train/test split\n\n\nscale_features\nbool\nTrue\nWhether to standardize features\n\n\nnum_workers\nint\n0\nNumber of subprocesses for data loading\n\n\npin_memory\nbool\nFalse\nWhether to pin memory (useful for GPU)\n\n\n\nReturns:\n\ntrain_loader (DataLoader): Training data loader\ntest_loader (DataLoader): Test data loader\nscaler (StandardScaler or None): Fitted scaler if scale_features=True, else None\n\nExample:\n\nfrom spotoptim.data import get_diabetes_dataloaders\n\n# Custom configuration\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    test_size=0.3,\n    batch_size=64,\n    shuffle_train=True,\n    scale_features=True,\n    random_state=123\n)\n\nprint(f\"Training batches: {len(train_loader)}\")\nprint(f\"Test batches: {len(test_loader)}\")\nprint(f\"Scaler mean: {scaler.mean_[:3]}\")  # First 3 features\n\nTraining batches: 5\nTest batches: 3\nScaler mean: [-0.00056537  0.00132258  0.00027836]",
      +    "text": "31.3 Function Reference\n\n31.3.1 get_diabetes_dataloaders()\nLoads the sklearn diabetes dataset and returns configured PyTorch DataLoaders.\nSignature:\n\nget_diabetes_dataloaders(\n    test_size=0.2,\n    batch_size=32,\n    shuffle_train=True,\n    shuffle_test=False,\n    random_state=42,\n    scale_features=True,\n    num_workers=0,\n    pin_memory=False\n)\n\n(<torch.utils.data.dataloader.DataLoader at 0x12434fce0>,\n <torch.utils.data.dataloader.DataLoader at 0x1243d3ad0>,\n StandardScaler())\n\n\nParameters:\n\n\n\n\n\n\n\n\n\nParameter\nType\nDefault\nDescription\n\n\n\n\ntest_size\nfloat\n0.2\nProportion of dataset for testing (0.0 to 1.0)\n\n\nbatch_size\nint\n32\nNumber of samples per batch\n\n\nshuffle_train\nbool\nTrue\nWhether to shuffle training data\n\n\nshuffle_test\nbool\nFalse\nWhether to shuffle test data\n\n\nrandom_state\nint\n42\nRandom seed for train/test split\n\n\nscale_features\nbool\nTrue\nWhether to standardize features\n\n\nnum_workers\nint\n0\nNumber of subprocesses for data loading\n\n\npin_memory\nbool\nFalse\nWhether to pin memory (useful for GPU)\n\n\n\nReturns:\n\ntrain_loader (DataLoader): Training data loader\ntest_loader (DataLoader): Test data loader\nscaler (StandardScaler or None): Fitted scaler if scale_features=True, else None\n\nExample:\n\nfrom spotoptim.data import get_diabetes_dataloaders\n\n# Custom configuration\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    test_size=0.3,\n    batch_size=64,\n    shuffle_train=True,\n    scale_features=True,\n    random_state=123\n)\n\nprint(f\"Training batches: {len(train_loader)}\")\nprint(f\"Test batches: {len(test_loader)}\")\nprint(f\"Scaler mean: {scaler.mean_[:3]}\")  # First 3 features\n\nTraining batches: 5\nTest batches: 3\nScaler mean: [-0.00056537  0.00132258  0.00027836]",
           "crumbs": [
             "Data Sets",
             "31  Diabetes Dataset Utilities"
      @@ -3193,7 +3193,7 @@
           "href": "diabetes_dataset.html#diabetesdataset-class",
           "title": "31  Diabetes Dataset Utilities",
           "section": "31.4 DiabetesDataset Class",
      -    "text": "31.4 DiabetesDataset Class\nPyTorch Dataset implementation for the diabetes dataset.\nSignature:\n\nDiabetesDataset(X, y, transform=None, target_transform=None)\n\n<spotoptim.data.diabetes.DiabetesDataset at 0x125028350>\n\n\nParameters:\n\nX (np.ndarray): Feature matrix of shape (n_samples, n_features)\ny (np.ndarray): Target values of shape (n_samples,) or (n_samples, 1)\ntransform (callable, optional): Transform to apply to features\ntarget_transform (callable, optional): Transform to apply to targets\n\nAttributes:\n\nX (torch.Tensor): Feature tensor (n_samples, n_features)\ny (torch.Tensor): Target tensor (n_samples, 1)\nn_features (int): Number of features (10 for diabetes)\nn_samples (int): Number of samples\n\nMethods:\n\n__len__(): Returns number of samples\n__getitem__(idx): Returns tuple (features, target) for given index\n\n\n31.4.1 Manual Dataset Creation\n\nfrom spotoptim.data import DiabetesDataset\nfrom sklearn.datasets import load_diabetes\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom torch.utils.data import DataLoader\n\n# Load raw data\ndiabetes = load_diabetes()\nX, y = diabetes.data, diabetes.target\n\n# Split data\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, test_size=0.2, random_state=42\n)\n\n# Scale features\nscaler = StandardScaler()\nX_train = scaler.fit_transform(X_train)\nX_test = scaler.transform(X_test)\n\n# Create datasets\ntrain_dataset = DiabetesDataset(X_train, y_train)\ntest_dataset = DiabetesDataset(X_test, y_test)\n\n# Create dataloaders\ntrain_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\ntest_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)\n\n# Inspect dataset\nprint(f\"Dataset size: {len(train_dataset)}\")\nprint(f\"Features shape: {train_dataset.X.shape}\")\nprint(f\"Targets shape: {train_dataset.y.shape}\")\n\n# Get a sample\nfeatures, target = train_dataset[0]\nprint(f\"Sample features: {features.shape}\")  # (10,)\nprint(f\"Sample target: {target.shape}\")      # (1,)\n\nDataset size: 353\nFeatures shape: torch.Size([353, 10])\nTargets shape: torch.Size([353, 1])\nSample features: torch.Size([10])\nSample target: torch.Size([1])",
      +    "text": "31.4 DiabetesDataset Class\nPyTorch Dataset implementation for the diabetes dataset.\nSignature:\n\nDiabetesDataset(X, y, transform=None, target_transform=None)\n\n<spotoptim.data.diabetes.DiabetesDataset at 0x124508950>\n\n\nParameters:\n\nX (np.ndarray): Feature matrix of shape (n_samples, n_features)\ny (np.ndarray): Target values of shape (n_samples,) or (n_samples, 1)\ntransform (callable, optional): Transform to apply to features\ntarget_transform (callable, optional): Transform to apply to targets\n\nAttributes:\n\nX (torch.Tensor): Feature tensor (n_samples, n_features)\ny (torch.Tensor): Target tensor (n_samples, 1)\nn_features (int): Number of features (10 for diabetes)\nn_samples (int): Number of samples\n\nMethods:\n\n__len__(): Returns number of samples\n__getitem__(idx): Returns tuple (features, target) for given index\n\n\n31.4.1 Manual Dataset Creation\n\nfrom spotoptim.data import DiabetesDataset\nfrom sklearn.datasets import load_diabetes\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom torch.utils.data import DataLoader\n\n# Load raw data\ndiabetes = load_diabetes()\nX, y = diabetes.data, diabetes.target\n\n# Split data\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, test_size=0.2, random_state=42\n)\n\n# Scale features\nscaler = StandardScaler()\nX_train = scaler.fit_transform(X_train)\nX_test = scaler.transform(X_test)\n\n# Create datasets\ntrain_dataset = DiabetesDataset(X_train, y_train)\ntest_dataset = DiabetesDataset(X_test, y_test)\n\n# Create dataloaders\ntrain_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\ntest_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)\n\n# Inspect dataset\nprint(f\"Dataset size: {len(train_dataset)}\")\nprint(f\"Features shape: {train_dataset.X.shape}\")\nprint(f\"Targets shape: {train_dataset.y.shape}\")\n\n# Get a sample\nfeatures, target = train_dataset[0]\nprint(f\"Sample features: {features.shape}\")  # (10,)\nprint(f\"Sample target: {target.shape}\")      # (1,)\n\nDataset size: 353\nFeatures shape: torch.Size([353, 10])\nTargets shape: torch.Size([353, 1])\nSample features: torch.Size([10])\nSample target: torch.Size([1])",
           "crumbs": [
             "Data Sets",
             "31  Diabetes Dataset Utilities"
      @@ -3204,7 +3204,7 @@
           "href": "diabetes_dataset.html#advanced-usage",
           "title": "31  Diabetes Dataset Utilities",
           "section": "31.5 Advanced Usage",
      -    "text": "31.5 Advanced Usage\n\n31.5.1 Custom Transforms\n\nfrom spotoptim.data import DiabetesDataset\nfrom sklearn.datasets import load_diabetes\nimport torch\n\n# Define custom transforms\ndef add_noise(x):\n    \"\"\"Add Gaussian noise to features.\"\"\"\n    return x + torch.randn_like(x) * 0.01\n\ndef log_transform(y):\n    \"\"\"Apply log transform to target.\"\"\"\n    return torch.log1p(y)\n\n# Load data\ndiabetes = load_diabetes()\nX, y = diabetes.data, diabetes.target\n\n# Create dataset with transforms\ndataset = DiabetesDataset(\n    X, y,\n    transform=add_noise,\n    target_transform=log_transform\n)\n\n# Transforms are applied when accessing items\nfeatures, target = dataset[0]\n\n\n\n31.5.2 Different Train/Test Splits\n\nfrom spotoptim.data import get_diabetes_dataloaders\n\n# 70/30 split\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    test_size=0.3,\n    random_state=42\n)\nprint(f\"Training samples: {len(train_loader.dataset)}\")  # ~310\nprint(f\"Test samples: {len(test_loader.dataset)}\")       # ~132\n\n# 90/10 split\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    test_size=0.1,\n    random_state=42\n)\nprint(f\"Training samples: {len(train_loader.dataset)}\")  # ~398\nprint(f\"Test samples: {len(test_loader.dataset)}\")       # ~44\n\nTraining samples: 309\nTest samples: 133\nTraining samples: 397\nTest samples: 45\n\n\n\n\n31.5.3 Without Feature Scaling\n\nfrom spotoptim.data import get_diabetes_dataloaders\n\n# Load without scaling (useful for tree-based models)\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    scale_features=False\n)\n\nprint(f\"Scaler: {scaler}\")  # None\n\n# Data is in original scale\nfor batch_X, batch_y in train_loader:\n    print(f\"Mean: {batch_X.mean(dim=0)[:3]}\")  # Non-zero values\n    break\n\nScaler: None\nMean: tensor([ 8.3345e-03,  4.0437e-05, -9.1794e-04])\n\n\n\n\n31.5.4 Larger Batch Sizes\n\nfrom spotoptim.data import get_diabetes_dataloaders\n\n# Larger batches for faster training (if memory allows)\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    batch_size=128\n)\nprint(f\"Batches per epoch: {len(train_loader)}\")  # Fewer batches\n\n# Smaller batches for more gradient updates\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    batch_size=8\n)\nprint(f\"Batches per epoch: {len(train_loader)}\")  # More batches\n\nBatches per epoch: 3\nBatches per epoch: 45\n\n\n\n\n31.5.5 GPU Training with Pin Memory\n\nimport torch\nfrom spotoptim.data import get_diabetes_dataloaders\n\n# Enable pin_memory for faster GPU transfer\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    batch_size=32,\n    pin_memory=True  # Set to True when using GPU\n)\n\n# Move model to GPU\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel = model.to(device)\n\n# Training loop with GPU\nfor batch_X, batch_y in train_loader:\n    # Data is already pinned, faster transfer to GPU\n    batch_X = batch_X.to(device, non_blocking=True)\n    batch_y = batch_y.to(device, non_blocking=True)\n    \n    # ... training code ...\n\n/Users/bartz/workspace/spotoptim-cookbook/.venv/lib/python3.14/site-packages/torch/utils/data/dataloader.py:775: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n  super().__init__(loader)",
      +    "text": "31.5 Advanced Usage\n\n31.5.1 Custom Transforms\n\nfrom spotoptim.data import DiabetesDataset\nfrom sklearn.datasets import load_diabetes\nimport torch\n\n# Define custom transforms\ndef add_noise(x):\n    \"\"\"Add Gaussian noise to features.\"\"\"\n    return x + torch.randn_like(x) * 0.01\n\ndef log_transform(y):\n    \"\"\"Apply log transform to target.\"\"\"\n    return torch.log1p(y)\n\n# Load data\ndiabetes = load_diabetes()\nX, y = diabetes.data, diabetes.target\n\n# Create dataset with transforms\ndataset = DiabetesDataset(\n    X, y,\n    transform=add_noise,\n    target_transform=log_transform\n)\n\n# Transforms are applied when accessing items\nfeatures, target = dataset[0]\n\n\n\n31.5.2 Different Train/Test Splits\n\nfrom spotoptim.data import get_diabetes_dataloaders\n\n# 70/30 split\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    test_size=0.3,\n    random_state=42\n)\nprint(f\"Training samples: {len(train_loader.dataset)}\")  # ~310\nprint(f\"Test samples: {len(test_loader.dataset)}\")       # ~132\n\n# 90/10 split\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    test_size=0.1,\n    random_state=42\n)\nprint(f\"Training samples: {len(train_loader.dataset)}\")  # ~398\nprint(f\"Test samples: {len(test_loader.dataset)}\")       # ~44\n\nTraining samples: 309\nTest samples: 133\nTraining samples: 397\nTest samples: 45\n\n\n\n\n31.5.3 Without Feature Scaling\n\nfrom spotoptim.data import get_diabetes_dataloaders\n\n# Load without scaling (useful for tree-based models)\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    scale_features=False\n)\n\nprint(f\"Scaler: {scaler}\")  # None\n\n# Data is in original scale\nfor batch_X, batch_y in train_loader:\n    print(f\"Mean: {batch_X.mean(dim=0)[:3]}\")  # Non-zero values\n    break\n\nScaler: None\nMean: tensor([ 0.0032,  0.0060, -0.0015])\n\n\n\n\n31.5.4 Larger Batch Sizes\n\nfrom spotoptim.data import get_diabetes_dataloaders\n\n# Larger batches for faster training (if memory allows)\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    batch_size=128\n)\nprint(f\"Batches per epoch: {len(train_loader)}\")  # Fewer batches\n\n# Smaller batches for more gradient updates\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    batch_size=8\n)\nprint(f\"Batches per epoch: {len(train_loader)}\")  # More batches\n\nBatches per epoch: 3\nBatches per epoch: 45\n\n\n\n\n31.5.5 GPU Training with Pin Memory\n\nimport torch\nfrom spotoptim.data import get_diabetes_dataloaders\n\n# Enable pin_memory for faster GPU transfer\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    batch_size=32,\n    pin_memory=True  # Set to True when using GPU\n)\n\n# Move model to GPU\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel = model.to(device)\n\n# Training loop with GPU\nfor batch_X, batch_y in train_loader:\n    # Data is already pinned, faster transfer to GPU\n    batch_X = batch_X.to(device, non_blocking=True)\n    batch_y = batch_y.to(device, non_blocking=True)\n    \n    # ... training code ...\n\n/Users/bartz/workspace/spotoptim-cookbook/.venv/lib/python3.14/site-packages/torch/utils/data/dataloader.py:775: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n  super().__init__(loader)",
           "crumbs": [
             "Data Sets",
             "31  Diabetes Dataset Utilities"
      @@ -3215,7 +3215,7 @@
           "href": "diabetes_dataset.html#complete-training-example",
           "title": "31  Diabetes Dataset Utilities",
           "section": "31.6 Complete Training Example",
      -    "text": "31.6 Complete Training Example\nHere’s a complete example showing data loading, model training, and evaluation:\n\nimport torch\nimport torch.nn as nn\nfrom spotoptim.data import get_diabetes_dataloaders\nfrom spotoptim.nn.linear_regressor import LinearRegressor\n\ndef train_diabetes_model():\n    \"\"\"Train a neural network on the diabetes dataset.\"\"\"\n    \n    # Load data\n    train_loader, test_loader, scaler = get_diabetes_dataloaders(\n        test_size=0.2,\n        batch_size=32,\n        scale_features=True,\n        random_state=42\n    )\n    \n    # Create model\n    model = LinearRegressor(\n        input_dim=10,\n        output_dim=1,\n        l1=128,\n        num_hidden_layers=3,\n        activation=\"ReLU\"\n    )\n    \n    # Setup training\n    criterion = nn.MSELoss()\n    optimizer = model.get_optimizer(\"Adam\", lr=0.001, weight_decay=1e-5)\n    \n    # Training configuration\n    num_epochs = 200\n    best_test_loss = float('inf')\n    \n    print(\"Starting training...\")\n    print(f\"Training samples: {len(train_loader.dataset)}\")\n    print(f\"Test samples: {len(test_loader.dataset)}\")\n    print(f\"Batches per epoch: {len(train_loader)}\")\n    print(\"-\" * 60)\n    \n    for epoch in range(num_epochs):\n        # Training phase\n        model.train()\n        train_loss = 0.0\n        \n        for batch_X, batch_y in train_loader:\n            # Forward pass\n            predictions = model(batch_X)\n            loss = criterion(predictions, batch_y)\n            \n            # Backward pass\n            optimizer.zero_grad()\n            loss.backward()\n            optimizer.step()\n            \n            train_loss += loss.item()\n        \n        avg_train_loss = train_loss / len(train_loader)\n        \n        # Evaluation phase\n        model.eval()\n        test_loss = 0.0\n        \n        with torch.no_grad():\n            for batch_X, batch_y in test_loader:\n                predictions = model(batch_X)\n                loss = criterion(predictions, batch_y)\n                test_loss += loss.item()\n        \n        avg_test_loss = test_loss / len(test_loader)\n        \n        # Track best model\n        if avg_test_loss < best_test_loss:\n            best_test_loss = avg_test_loss\n            # Could save model here: torch.save(model.state_dict(), 'best_model.pt')\n        \n        # Print progress\n        if (epoch + 1) % 20 == 0:\n            print(f\"Epoch {epoch+1:3d}/{num_epochs}: \"\n                  f\"Train Loss = {avg_train_loss:.4f}, \"\n                  f\"Test Loss = {avg_test_loss:.4f}\")\n    \n    print(\"-\" * 60)\n    print(f\"Training complete!\")\n    print(f\"Best test loss: {best_test_loss:.4f}\")\n    \n    return model, best_test_loss\n\n# Run training\nif __name__ == \"__main__\":\n    model, best_loss = train_diabetes_model()\n\nStarting training...\nTraining samples: 353\nTest samples: 89\nBatches per epoch: 12\n------------------------------------------------------------\nEpoch  20/200: Train Loss = 28824.2376, Test Loss = 26605.7448\nEpoch  40/200: Train Loss = 27737.7015, Test Loss = 26601.3932\nEpoch  60/200: Train Loss = 28560.2100, Test Loss = 26597.0215\nEpoch  80/200: Train Loss = 32157.9419, Test Loss = 26592.6699\nEpoch 100/200: Train Loss = 28030.2559, Test Loss = 26588.2292\nEpoch 120/200: Train Loss = 30427.3665, Test Loss = 26583.7174\nEpoch 140/200: Train Loss = 27419.0477, Test Loss = 26579.0039\nEpoch 160/200: Train Loss = 27598.1221, Test Loss = 26574.1823\nEpoch 180/200: Train Loss = 29836.2547, Test Loss = 26569.1055\nEpoch 200/200: Train Loss = 34793.4126, Test Loss = 26563.8522\n------------------------------------------------------------\nTraining complete!\nBest test loss: 26563.8522",
      +    "text": "31.6 Complete Training Example\nHere’s a complete example showing data loading, model training, and evaluation:\n\nimport torch\nimport torch.nn as nn\nfrom spotoptim.data import get_diabetes_dataloaders\nfrom spotoptim.nn.linear_regressor import LinearRegressor\n\ndef train_diabetes_model():\n    \"\"\"Train a neural network on the diabetes dataset.\"\"\"\n    \n    # Load data\n    train_loader, test_loader, scaler = get_diabetes_dataloaders(\n        test_size=0.2,\n        batch_size=32,\n        scale_features=True,\n        random_state=42\n    )\n    \n    # Create model\n    model = LinearRegressor(\n        input_dim=10,\n        output_dim=1,\n        l1=128,\n        num_hidden_layers=3,\n        activation=\"ReLU\"\n    )\n    \n    # Setup training\n    criterion = nn.MSELoss()\n    optimizer = model.get_optimizer(\"Adam\", lr=0.001, weight_decay=1e-5)\n    \n    # Training configuration\n    num_epochs = 200\n    best_test_loss = float('inf')\n    \n    print(\"Starting training...\")\n    print(f\"Training samples: {len(train_loader.dataset)}\")\n    print(f\"Test samples: {len(test_loader.dataset)}\")\n    print(f\"Batches per epoch: {len(train_loader)}\")\n    print(\"-\" * 60)\n    \n    for epoch in range(num_epochs):\n        # Training phase\n        model.train()\n        train_loss = 0.0\n        \n        for batch_X, batch_y in train_loader:\n            # Forward pass\n            predictions = model(batch_X)\n            loss = criterion(predictions, batch_y)\n            \n            # Backward pass\n            optimizer.zero_grad()\n            loss.backward()\n            optimizer.step()\n            \n            train_loss += loss.item()\n        \n        avg_train_loss = train_loss / len(train_loader)\n        \n        # Evaluation phase\n        model.eval()\n        test_loss = 0.0\n        \n        with torch.no_grad():\n            for batch_X, batch_y in test_loader:\n                predictions = model(batch_X)\n                loss = criterion(predictions, batch_y)\n                test_loss += loss.item()\n        \n        avg_test_loss = test_loss / len(test_loader)\n        \n        # Track best model\n        if avg_test_loss < best_test_loss:\n            best_test_loss = avg_test_loss\n            # Could save model here: torch.save(model.state_dict(), 'best_model.pt')\n        \n        # Print progress\n        if (epoch + 1) % 20 == 0:\n            print(f\"Epoch {epoch+1:3d}/{num_epochs}: \"\n                  f\"Train Loss = {avg_train_loss:.4f}, \"\n                  f\"Test Loss = {avg_test_loss:.4f}\")\n    \n    print(\"-\" * 60)\n    print(f\"Training complete!\")\n    print(f\"Best test loss: {best_test_loss:.4f}\")\n    \n    return model, best_test_loss\n\n# Run training\nif __name__ == \"__main__\":\n    model, best_loss = train_diabetes_model()\n\nStarting training...\nTraining samples: 353\nTest samples: 89\nBatches per epoch: 12\n------------------------------------------------------------\nEpoch  20/200: Train Loss = 28813.6372, Test Loss = 26617.7513\nEpoch  40/200: Train Loss = 29518.8169, Test Loss = 26613.5111\nEpoch  60/200: Train Loss = 27642.3442, Test Loss = 26609.1048\nEpoch  80/200: Train Loss = 28447.9255, Test Loss = 26604.7337\nEpoch 100/200: Train Loss = 27935.0824, Test Loss = 26600.2474\nEpoch 120/200: Train Loss = 28834.5745, Test Loss = 26595.6836\nEpoch 140/200: Train Loss = 29086.6240, Test Loss = 26590.9368\nEpoch 160/200: Train Loss = 28568.4010, Test Loss = 26586.0553\nEpoch 180/200: Train Loss = 30818.7074, Test Loss = 26580.9805\nEpoch 200/200: Train Loss = 29725.2568, Test Loss = 26575.6862\n------------------------------------------------------------\nTraining complete!\nBest test loss: 26575.6862",
           "crumbs": [
             "Data Sets",
             "31  Diabetes Dataset Utilities"
      @@ -3237,7 +3237,7 @@
           "href": "diabetes_dataset.html#best-practices",
           "title": "31  Diabetes Dataset Utilities",
           "section": "31.8 Best Practices",
      -    "text": "31.8 Best Practices\n\n31.8.1 1. Always Use Feature Scaling\n\n# Good: Features are standardized\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    scale_features=True\n)\n\nNeural networks typically perform better with normalized inputs.\n\n\n31.8.2 2. Set Random Seeds for Reproducibility\n\n# Reproducible train/test splits\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    random_state=42\n)\n\n# Also set PyTorch seed\nimport torch\ntorch.manual_seed(42)\n\n<torch._C.Generator at 0x115400830>\n\n\n\n\n31.8.3 3. Don’t Shuffle Test Data\n\n# Good: Test data in consistent order\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    shuffle_train=True,   # Shuffle training data\n    shuffle_test=False    # Don't shuffle test data\n)\n\nThis ensures consistent evaluation metrics across runs.\n\n\n31.8.4 4. Choose Appropriate Batch Size\n\n# Small dataset (442 samples) - moderate batch size works well\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    batch_size=32  # Good balance for this dataset\n)\n\nToo large: Fewer gradient updates per epoch\nToo small: Noisy gradients, slower training\n\n\n31.8.5 5. Save the Scaler for Production\n\nimport pickle\nimport numpy as np\nfrom spotoptim.data import get_diabetes_dataloaders\n\n# Train with scaling\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    scale_features=True\n)\n\n# Save scaler for production use\nwith open('scaler.pkl', 'wb') as f:\n    pickle.dump(scaler, f)\n\n# Later: Load and use on new data\nwith open('scaler.pkl', 'rb') as f:\n    loaded_scaler = pickle.load(f)\n\n# Create some example new data (same shape as diabetes features)\nnew_data = np.random.randn(5, 10)  # 5 samples, 10 features\nnew_data_scaled = loaded_scaler.transform(new_data)\n\nprint(f\"Original data shape: {new_data.shape}\")\nprint(f\"Scaled data shape: {new_data_scaled.shape}\")\nprint(f\"Scaled data mean: {new_data_scaled.mean(axis=0)[:3]}\")  # Should be close to 0\n\nOriginal data shape: (5, 10)\nScaled data shape: (5, 10)\nScaled data mean: [7.03934676 5.01490242 3.47123618]",
      +    "text": "31.8 Best Practices\n\n31.8.1 1. Always Use Feature Scaling\n\n# Good: Features are standardized\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    scale_features=True\n)\n\nNeural networks typically perform better with normalized inputs.\n\n\n31.8.2 2. Set Random Seeds for Reproducibility\n\n# Reproducible train/test splits\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    random_state=42\n)\n\n# Also set PyTorch seed\nimport torch\ntorch.manual_seed(42)\n\n<torch._C.Generator at 0x114eb3970>\n\n\n\n\n31.8.3 3. Don’t Shuffle Test Data\n\n# Good: Test data in consistent order\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    shuffle_train=True,   # Shuffle training data\n    shuffle_test=False    # Don't shuffle test data\n)\n\nThis ensures consistent evaluation metrics across runs.\n\n\n31.8.4 4. Choose Appropriate Batch Size\n\n# Small dataset (442 samples) - moderate batch size works well\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    batch_size=32  # Good balance for this dataset\n)\n\nToo large: Fewer gradient updates per epoch\nToo small: Noisy gradients, slower training\n\n\n31.8.5 5. Save the Scaler for Production\n\nimport pickle\nimport numpy as np\nfrom spotoptim.data import get_diabetes_dataloaders\n\n# Train with scaling\ntrain_loader, test_loader, scaler = get_diabetes_dataloaders(\n    scale_features=True\n)\n\n# Save scaler for production use\nwith open('scaler.pkl', 'wb') as f:\n    pickle.dump(scaler, f)\n\n# Later: Load and use on new data\nwith open('scaler.pkl', 'rb') as f:\n    loaded_scaler = pickle.load(f)\n\n# Create some example new data (same shape as diabetes features)\nnew_data = np.random.randn(5, 10)  # 5 samples, 10 features\nnew_data_scaled = loaded_scaler.transform(new_data)\n\nprint(f\"Original data shape: {new_data.shape}\")\nprint(f\"Scaled data shape: {new_data_scaled.shape}\")\nprint(f\"Scaled data mean: {new_data_scaled.mean(axis=0)[:3]}\")  # Should be close to 0\n\nOriginal data shape: (5, 10)\nScaled data shape: (5, 10)\nScaled data mean: [7.03934676 5.01490242 3.47123618]",
           "crumbs": [
             "Data Sets",
             "31  Diabetes Dataset Utilities"
      @@ -4007,7 +4007,7 @@
           "href": "001_sampling.html#ideas-and-concepts",
           "title": "36  Sampling Plans",
           "section": "",
      -    "text": "Definition 36.1 (Sampling Plan) In the context of computer experiments, the term sampling plan refers to the set of input values, say \\(X\\),at which the computer code is evaluated.\n\n\n\n\n\n\nSampling discrete observations:\nUsing these samples to construct an approximation \\(\\hat{f}\\).\nEnsuring the surrogate model is well-posed, meaning it is mathematically valid and can generalize predictions effectively.\n\n\n\n\n\n\nExtreme Sampling: Measuring performance only at the extreme values of parameters may miss important behaviors in the center of the design space, leading to incomplete understanding.\nUneven Sampling: Concentrating samples in certain regions while neglecting others forces the model to extrapolate over unsampled areas, potentially resulting in inaccurate or misleading predictions. Additionally, in some cases, the data may come from external sources or be limited in scope, leaving little control over the sampling plan. This can further restrict the model’s ability to generalize effectively.\n\n\n36.1.1 The ‘Curse of Dimensionality’ and How to Avoid It\nThe “curse of dimensionality” refers to the exponential increase in computational complexity and data requirements as the number of dimensions (variables) in a problem grows. For a one-dimensional space, sampling \\(n\\) locations may suffice for accurate predictions. In high-dimensional spaces, the amount of data needed to maintain the same level of accuracy or coverage increases dramatically. For example, if a one-dimensional space requires \\(n\\) samples for a certain accuracy, a \\(k\\)-dimensional space would require \\(n^k\\) samples. This makes tasks like optimization, sampling, and modeling computationally expensive and often impractical in high-dimensional settings.\n\nExample 36.1 (Example: Curse of Dimensionality) Consider a simple example where we want to model the cost of a car tire based on its wheel diameter. If we have one variable (wheel diameter), we might need 10 simulations to get a good estimate of the cost. Now, if we add 8 more variables (e.g., tread pattern, rubber type, etc.), the number of simulations required increases to \\(10^8\\) (10 million). This is because the number of combinations of design variables grows exponentially with the number of dimensions. This means that the computational budget required to evaluate all combinations of design variables becomes infeasible. In this case, it would take 11,416 years to complete the simulations, making it impractical to explore the design space fully.\n\n\n\n36.1.2 Physical versus Computational Experiments\nPhysical experiments are prone to experimental errors from three main sources:\n\nHuman error: Mistakes made by the experimenter.\nRandom error: Measurement inaccuracies that vary unpredictably.\nSystematic error: Consistent bias due to flaws in the experimental setup.\n\nThe key distinction is repeatability: systematic errors remain constant across repetitions, while random errors vary.\nComputational experiments, on the other hand, are deterministic and free from random errors. However, they are still affected by:\n\nHuman error: Bugs in code or incorrect boundary conditions.\nSystematic error: Biases from model simplifications (e.g., inviscid flow approximations) or finite resolution (e.g., insufficient mesh resolution).\n\nThe term “noise” is used differently in physical and computational contexts. In physical experiments, it refers to random errors, while in computational experiments, it often refers to systematic errors.\nUnderstanding these differences is crucial for designing experiments and applying techniques like Gaussian process-based approximations. For physical experiments, replication mitigates random errors, but this is unnecessary for deterministic computational experiments.\n\n\n36.1.3 Designing Preliminary Experiments (Screening)\nMinimizing the number of design variables \\(x_1, x_2, \\dots, x_k\\) is crucial before modeling the objective function \\(f\\). This process, called screening, aims to reduce dimensionality without compromising the analysis. If \\(f\\) is at least once differentiable over the design domain \\(D\\), the partial derivative \\(\\frac{\\partial f}{\\partial x_i}\\) can be used to classify variables:\n\nNegligible Variables: If \\(\\frac{\\partial f}{\\partial x_i} = 0, \\, \\forall x \\in D\\), the variable \\(x_i\\) can be safely neglected.\nLinear Additive Variables: If \\(\\frac{\\partial f}{\\partial x_i} = \\text{constant} \\neq 0, \\, \\forall x \\in D\\), the effect of \\(x_i\\) is linear and additive.\nNonlinear Variables: If \\(\\frac{\\partial f}{\\partial x_i} = g(x_i), \\, \\forall x \\in D\\), where \\(g(x_i)\\) is a non-constant function, \\(f\\) is nonlinear in \\(x_i\\).\nInteractive Nonlinear Variables: If \\(\\frac{\\partial f}{\\partial x_i} = g(x_i, x_j, \\dots), /, \\forall x \\in D\\), where \\(g(x_i, x_j, \\dots)\\) is a function involving interactions with other variables, \\(f\\) is nonlinear in \\(x_i\\) and interacts with \\(x_j\\).\n\nMeasuring \\(\\frac{\\partial f}{\\partial x_i}\\) across the entire design space is often infeasible due to limited budgets. The percentage of time allocated to screening depends on the problem: If many variables are expected to be inactive, thorough screening can significantly improve model accuracy by reducing dimensionality. If most variables are believed to impact the objective, focus should shift to modeling instead. Screening is a trade-off between computational cost and model accuracy, and its effectiveness depends on the specific problem context.\n\n36.1.3.1 Estimating the Distribution of Elementary Effects\nIn order to simplify the presentation of what follows, we make, without loss of generality, the assumption that the design space \\(D = [0, 1]^k\\); that is, we normalize all variables into the unit cube. We shall adhere to this convention for the rest of the book and strongly urge the reader to do likewise when implementing any algorithms described here, as this step not only yields clearer mathematics in some cases but also safeguards against scaling issues.\nBefore proceeding with the description of the Morris algorithm, we need to define an important statistical concept. Let us restrict our design space \\(D\\) to a \\(k\\)-dimensional, \\(p\\)-level full factorial grid, that is,\n\\[\nx_i \\in \\{0, \\frac{1}{p-1}, \\frac{2}{p-1}, \\dots, 1\\}, \\quad \\text{ for } i = 1, \\dots, k.\n\\]\n\nDefinition 36.2 (Elementary Effect) For a given baseline value \\(x \\in D\\), let \\(d_i(x)\\) denote the elementary effect of \\(x_i\\), where:\n\\[\nd_i(x) = \\frac{f(x_1, \\dots, x_i + \\Delta, \\dots, x_k) - f(x_1, \\dots, x_i - \\Delta, \\dots, x_k)}{2\\Delta}, \\quad i = 1, \\dots, k,\n\\tag{36.1}\\] where \\(\\Delta\\) is the step size, which is defined as the distance between two adjacent levels in the grid. In other words, we have:\nwith \\[\\Delta = \\frac{\\xi}{p-1}, \\quad \\xi \\in \\mathbb{N}^*, \\quad \\text{and} \\quad x \\in D , \\text{ such that its components } x_i \\leq 1 - \\Delta.\n\\]\n\\(\\Delta\\) is the step size. The elementary effect \\(d_i(x)\\) measures the sensitivity of the function \\(f\\) to changes in the variable \\(x_i\\) at the point \\(x\\).\n\nMorris’s method aims to estimate the parameters of the distribution of elementary effects associated with each variable. A large measure of central tendency indicates that a variable has a significant influence on the objective function across the design space, while a large measure of spread suggests that the variable is involved in interactions or contributes to the nonlinearity of \\(f\\). In practice, the sample mean and standard deviation of a set of \\(d_i(x)\\) values, calculated in different parts of the design space, are used for this estimation.\nTo ensure efficiency, the preliminary sampling plan \\(X\\) should be designed so that each evaluation of the objective function \\(f\\) contributes to the calculation of two elementary effects, rather than just one (as would occur with a naive random spread of baseline \\(x\\) values and adding \\(\\Delta\\) to one variable). Additionally, the sampling plan should provide a specified number (e.g., \\(r\\)) of elementary effects for each variable, independently drawn with replacement. For a detailed discussion on constructing such a sampling plan, readers are encouraged to consult Morris’s original paper (Morris, 1991). Here, we focus on describing the process itself.\nThe random orientation of the sampling plan \\(B\\) can be constructed as follows:\n\nLet \\(B\\) be a \\((k+1) \\times k\\) matrix of 0s and 1s, where for each column \\(i\\), two rows differ only in their \\(i\\)-th entries.\nCompute a random orientation of \\(B\\), denoted \\(B^*\\):\n\n\\[\nB^* =\n\\left(\n1_{k+1,k} x^* + (\\Delta/2)\n\\left[\n(2B-1_{k+1,k})\nD^* +\n1_{k+1,k}\n\\right]\n\\right)\nP^*,\n\\]\nwhere:\n\n\\(D^*\\) is a \\(k\\)-dimensional diagonal matrix with diagonal elements \\(\\pm 1\\) (equal probability),\n\\(\\mathbf{1}\\) is a matrix of 1s,\n\\(x^*\\) is a randomly chosen point in the \\(p\\)-level design space (limited by \\(\\Delta\\)),\n\\(P^*\\) is a \\(k \\times k\\) random permutation matrix with one 1 per column and row.\n\nspotpython provides a Python implementation to compute \\(B^*\\), see https://github.com/sequential-parameter-optimization/spotPython/blob/main/src/spotpython/utils/effects.py.\nHere is the corresponding code:\n\ndef randorient(k, p, xi, seed=None):\n    # Initialize random number generator with the provided seed\n    if seed is not None:\n        rng = np.random.default_rng(seed)\n    else:\n        rng = np.random.default_rng()\n\n    # Step length\n    Delta = xi / (p - 1)\n\n    m = k + 1\n\n    # A truncated p-level grid in one dimension\n    xs = np.arange(0, 1 - Delta, 1 / (p - 1))\n    xsl = len(xs)\n    if xsl < 1:\n        print(f\"xi = {xi}.\")\n        print(f\"p = {p}.\")\n        print(f\"Delta = {Delta}.\")\n        print(f\"p - 1 = {p - 1}.\")\n        raise ValueError(f\"The number of levels xsl is {xsl}, but it must be greater than 0.\")\n\n    # Basic sampling matrix\n    B = np.vstack((np.zeros((1, k)), np.tril(np.ones((k, k)))))\n\n    # Randomization\n\n    # Matrix with +1s and -1s on the diagonal with equal probability\n    Dstar = np.diag(2 * rng.integers(0, 2, size=k) - 1)\n\n    # Random base value\n    xstar = xs[rng.integers(0, xsl, size=k)]\n\n    # Permutation matrix\n    Pstar = np.zeros((k, k))\n    rp = rng.permutation(k)\n    for i in range(k):\n        Pstar[i, rp[i]] = 1\n\n    # A random orientation of the sampling matrix\n    Bstar = (np.ones((m, 1)) @ xstar.reshape(1, -1) +\n        (Delta / 2) * ((2 * B - np.ones((m, k))) @ Dstar +\n        np.ones((m, k)))) @ Pstar\n\n    return Bstar\n\nThe code following snippet generates a random orientation of a sampling matrix Bstar using the randorient() function. The input parameters are:\n\nk = 3: The number of design variables (dimensions).\np = 3: The number of levels in the grid for each variable.\nxi = 1: A parameter used to calculate the step size Delta.\n\nStep-size calculation is performed as follows: Delta = xi / (p - 1) = 1 / (3 - 1) = 0.5, which determines the spacing between levels in the grid.\nNext, random sampling matrix construction is computed:\n\nA truncated grid is created with levels [0, 0.5] (based on Delta).\nA basic sampling matrix B is constructed, which is a lower triangular matrix with 0s and 1s.\n\nThen, randomization is applied:\n\nDstar: A diagonal matrix with random entries of +1 or -1.\nxstar: A random starting point from the grid.\nPstar: A random permutation matrix.\n\nRandom orientation is applied to the basic sampling matrix B to create Bstar. This involves scaling, shifting, and permuting the rows and columns of B.\nThe final output is the matrix Bstar, which represents a random orientation of the sampling plan. Each row corresponds to a sampled point in the design space, and each column corresponds to a design variable.\n\nExample 36.2 (Random Orientation of the Sampling Matrix in 2-D)  \n\nk = 2\np = 3\nxi = 1\nBstar = randorient(k, p, xi, seed=123)\nprint(f\"Random orientation of the sampling matrix:\\n{Bstar}\")\n\nRandom orientation of the sampling matrix:\n[[0.5 0. ]\n [0.  0. ]\n [0.  0.5]]\n\n\nWe can visualize the random orientation of the sampling matrix in 2-D as shown in Figure 36.1.\n\nplt.figure(figsize=(6, 6))\nplt.scatter(Bstar[:, 0], Bstar[:, 1], color='blue', s=50, label='Hypercube Points')\nfor i in range(Bstar.shape[0]):\n    plt.text(Bstar[i, 0] + 0.01, Bstar[i, 1] + 0.01, str(i), fontsize=9)\nplt.xlim(-0.1, 1.1)\nplt.ylim(-0.1, 1.1)\nplt.xlabel('x1')\nplt.ylabel('x2')\nplt.grid()\n\n\n\n\n\n\n\nFigure 36.1: Random orientation of the sampling matrix in 2-D. The labels indicate the row index of the points.\n\n\n\n\n\n\n\nExample 36.3 (Random Orientation of the Sampling Matrix)  \n\nk = 3\np = 3\nxi = 1\nBstar = randorient(k, p, xi)\nprint(f\"Random orientation of the sampling matrix:\\n{Bstar}\")\n\nRandom orientation of the sampling matrix:\n[[0.  0.  0.5]\n [0.  0.5 0.5]\n [0.5 0.5 0.5]\n [0.5 0.5 0. ]]\n\n\n\nTo obtain \\(r\\) elementary effects for each variable, the screening plan is built from \\(r\\) random orientations:\n\\[\nX =\n\\begin{pmatrix}\nB^*_1 \\\\\nB^*_2 \\\\\n\\vdots \\\\\nB^*_r\n\\end{pmatrix}\n\\]\nThe function screeningplan() generates a screening plan by calling the randorient() function r times. It creates a list of random orientations and then concatenates them into a single array, which represents the screening plan. The screening plan implementation in Python is as follows (see https://github.com/sequential-parameter-optimization/spotPython/blob/main/src/spotpython/utils/effects.py):\n\ndef screeningplan(k, p, xi, r):\n    # Empty list to accumulate screening plan rows\n    X = []\n    for i in range(r):\n        X.append(randorient(k, p, xi))\n    # Concatenate list of arrays into a single array\n    X = np.vstack(X)\n    return X\n\nIt works like follows:\n\nThe value of the objective function \\(f\\) is computed for each row of the screening plan matrix \\(X\\). These values are stored in a column vector \\(t\\) of size \\((r * (k + 1)) \\times 1\\), where:\n\nr is the number of random orientations.\nk is the number of design variables.\n\n\nThe elementary effects are calculated using the following formula:\n\nFor each random orientation, adjacent rows of the screening plan matrix X and their corresponding function values from t are used.\nThese values are inserted into Equation 36.1 to compute elementary effects for each variable. An elementary effect measures the sensitivity of the objective function to changes in a specific variable.\n\nResults can be used for a statistical analysis. After collecting a sample of \\(r\\) elementary effects for each variable:\n\nThe sample mean (central tendency) is computed to indicate the overall influence of the variable.\nThe sample standard deviation (spread) is computed to capture variability, which may indicate interactions or nonlinearity.\n\nThe results (sample means and standard deviations) are plotted on a chart for comparison. This helps identify which variables have the most significant impact on the objective function and whether their effects are linear or involve interactions. This is implemented in the function screening_plot() in Python, which uses the helper function _screening() to calculate the elementary effects and their statistics.\n\ndef _screening(X, fun, xi, p, labels, bounds=None) -> tuple:\n    \"\"\"Helper function to calculate elementary effects for a screening design.\n\n    Args:\n        X (np.ndarray): The screening plan matrix, typically structured\n            within a [0,1]^k box.\n        fun (object): The objective function to evaluate at each\n            design point in the screening plan.\n        xi (float): The elementary effect step length factor.\n        p (int): Number of discrete levels along each dimension.\n        labels (list of str): A list of variable names corresponding to\n            the design variables.\n        bounds (np.ndarray): A 2xk matrix where the first row contains\n            lower bounds and the second row contains upper bounds for\n            each variable.\n\n    Returns:\n        tuple: A tuple containing two arrays:\n            - sm: The mean of the elementary effects for each variable.\n            - ssd: The standard deviation of the elementary effects for\n            each variable.\n    \"\"\"\n    k = X.shape[1]\n    r = X.shape[0] // (k + 1)\n\n    # Scale each design point\n    t = np.zeros(X.shape[0])\n    for i in range(X.shape[0]):\n        if bounds is not None:\n            X[i, :] = bounds[0, :] + X[i, :] * (bounds[1, :] - bounds[0, :])\n        t[i] = np.asarray(fun(X[i, :])).item()\n\n    # Elementary effects\n    F = np.zeros((k, r))\n    for i in range(r):\n        for j in range(i * (k + 1), i * (k + 1) + k):\n            idx = np.where(X[j, :] - X[j + 1, :] != 0)[0][0]\n            F[idx, i] = (t[j + 1] - t[j]) / (xi / (p - 1))\n\n    # Statistical measures (divide by n)\n    ssd = np.std(F, axis=1, ddof=0)\n    sm = np.mean(F, axis=1)\n    return sm, ssd\n\n\ndef screening_plot(X, fun, xi, p, labels, bounds=None, show=True) -> None:\n    \"\"\"Generates a plot with elementary effect screening metrics.\n\n    This function calculates the mean and standard deviation of the\n    elementary effects for a given set of design variables and plots\n    the results.\n\n    Args:\n        X (np.ndarray):\n            The screening plan matrix, typically structured within a [0,1]^k box.\n        fun (object):\n            The objective function to evaluate at each design point in the screening plan.\n        xi (float):\n            The elementary effect step length factor.\n        p (int):\n            Number of discrete levels along each dimension.\n        labels (list of str):\n            A list of variable names corresponding to the design variables.\n        bounds (np.ndarray):\n            A 2xk matrix where the first row contains lower bounds and\n            the second row contains upper bounds for each variable.\n        show (bool):\n            If True, the plot is displayed. Defaults to True.\n\n    Returns:\n        None: The function generates a plot of the results.\n    \"\"\"\n    k = X.shape[1]\n    sm, ssd = _screening(X=X, fun=fun, xi=xi, p=p, labels=labels, bounds=bounds)\n    plt.figure()\n    for i in range(k):\n        plt.text(sm[i], ssd[i], labels[i], fontsize=10)\n    plt.axis([min(sm), 1.1 * max(sm), min(ssd), 1.1 * max(ssd)])\n    plt.xlabel(\"Sample means\")\n    plt.ylabel(\"Sample standard deviations\")\n    plt.gca().tick_params(labelsize=10)\n    plt.grid(True)\n    if show:\n        plt.show()\n\n\n\n\n36.1.4 Special Considerations When Deploying Screening Algorithms\nWhen implementing the screening algorithm described above, two specific scenarios require special attention:\n\nDuplicate Design Points: If the dimensionality \\(k\\) of the space is relatively low and you can afford a large number of elementary effects \\(r\\), we should be be aware of the increased probability of duplicate design points appearing in the sampling plan \\(X\\). *Since the responses at sample points are deterministic, there’s no value in evaluating the same point multiple times. Fortunately, this issue is relatively uncommon in practice, as screening high-dimensional spaces typically requires large numbers of elementary effects, which naturally reduces the likelihood of duplicates.\nFailed Simulations: Numerical simulation codes occasionally fail to return valid results due to meshing errors, non-convergence of partial differential equation solvers, numerical instabilities, or parameter combinations outside the stable operating range.\n\nFrom a screening perspective, this is particularly problematic because an entire random orientation \\(B^*\\) becomes compromised if even a single point within it fails to evaluate properly. Implementing error handling strategies or fallback methods to manage such cases should be considered.\nFor robust screening studies, monitoring simulation success rates and having contingency plans for failed evaluations are important aspects of the experimental design process.",
      +    "text": "Definition 36.1 (Sampling Plan) In the context of computer experiments, the term sampling plan refers to the set of input values, say \\(X\\),at which the computer code is evaluated.\n\n\n\n\n\n\nSampling discrete observations:\nUsing these samples to construct an approximation \\(\\hat{f}\\).\nEnsuring the surrogate model is well-posed, meaning it is mathematically valid and can generalize predictions effectively.\n\n\n\n\n\n\nExtreme Sampling: Measuring performance only at the extreme values of parameters may miss important behaviors in the center of the design space, leading to incomplete understanding.\nUneven Sampling: Concentrating samples in certain regions while neglecting others forces the model to extrapolate over unsampled areas, potentially resulting in inaccurate or misleading predictions. Additionally, in some cases, the data may come from external sources or be limited in scope, leaving little control over the sampling plan. This can further restrict the model’s ability to generalize effectively.\n\n\n36.1.1 The ‘Curse of Dimensionality’ and How to Avoid It\nThe “curse of dimensionality” refers to the exponential increase in computational complexity and data requirements as the number of dimensions (variables) in a problem grows. For a one-dimensional space, sampling \\(n\\) locations may suffice for accurate predictions. In high-dimensional spaces, the amount of data needed to maintain the same level of accuracy or coverage increases dramatically. For example, if a one-dimensional space requires \\(n\\) samples for a certain accuracy, a \\(k\\)-dimensional space would require \\(n^k\\) samples. This makes tasks like optimization, sampling, and modeling computationally expensive and often impractical in high-dimensional settings.\n\nExample 36.1 (Example: Curse of Dimensionality) Consider a simple example where we want to model the cost of a car tire based on its wheel diameter. If we have one variable (wheel diameter), we might need 10 simulations to get a good estimate of the cost. Now, if we add 8 more variables (e.g., tread pattern, rubber type, etc.), the number of simulations required increases to \\(10^8\\) (10 million). This is because the number of combinations of design variables grows exponentially with the number of dimensions. This means that the computational budget required to evaluate all combinations of design variables becomes infeasible. In this case, it would take 11,416 years to complete the simulations, making it impractical to explore the design space fully.\n\n\n\n36.1.2 Physical versus Computational Experiments\nPhysical experiments are prone to experimental errors from three main sources:\n\nHuman error: Mistakes made by the experimenter.\nRandom error: Measurement inaccuracies that vary unpredictably.\nSystematic error: Consistent bias due to flaws in the experimental setup.\n\nThe key distinction is repeatability: systematic errors remain constant across repetitions, while random errors vary.\nComputational experiments, on the other hand, are deterministic and free from random errors. However, they are still affected by:\n\nHuman error: Bugs in code or incorrect boundary conditions.\nSystematic error: Biases from model simplifications (e.g., inviscid flow approximations) or finite resolution (e.g., insufficient mesh resolution).\n\nThe term “noise” is used differently in physical and computational contexts. In physical experiments, it refers to random errors, while in computational experiments, it often refers to systematic errors.\nUnderstanding these differences is crucial for designing experiments and applying techniques like Gaussian process-based approximations. For physical experiments, replication mitigates random errors, but this is unnecessary for deterministic computational experiments.\n\n\n36.1.3 Designing Preliminary Experiments (Screening)\nMinimizing the number of design variables \\(x_1, x_2, \\dots, x_k\\) is crucial before modeling the objective function \\(f\\). This process, called screening, aims to reduce dimensionality without compromising the analysis. If \\(f\\) is at least once differentiable over the design domain \\(D\\), the partial derivative \\(\\frac{\\partial f}{\\partial x_i}\\) can be used to classify variables:\n\nNegligible Variables: If \\(\\frac{\\partial f}{\\partial x_i} = 0, \\, \\forall x \\in D\\), the variable \\(x_i\\) can be safely neglected.\nLinear Additive Variables: If \\(\\frac{\\partial f}{\\partial x_i} = \\text{constant} \\neq 0, \\, \\forall x \\in D\\), the effect of \\(x_i\\) is linear and additive.\nNonlinear Variables: If \\(\\frac{\\partial f}{\\partial x_i} = g(x_i), \\, \\forall x \\in D\\), where \\(g(x_i)\\) is a non-constant function, \\(f\\) is nonlinear in \\(x_i\\).\nInteractive Nonlinear Variables: If \\(\\frac{\\partial f}{\\partial x_i} = g(x_i, x_j, \\dots), /, \\forall x \\in D\\), where \\(g(x_i, x_j, \\dots)\\) is a function involving interactions with other variables, \\(f\\) is nonlinear in \\(x_i\\) and interacts with \\(x_j\\).\n\nMeasuring \\(\\frac{\\partial f}{\\partial x_i}\\) across the entire design space is often infeasible due to limited budgets. The percentage of time allocated to screening depends on the problem: If many variables are expected to be inactive, thorough screening can significantly improve model accuracy by reducing dimensionality. If most variables are believed to impact the objective, focus should shift to modeling instead. Screening is a trade-off between computational cost and model accuracy, and its effectiveness depends on the specific problem context.\n\n36.1.3.1 Estimating the Distribution of Elementary Effects\nIn order to simplify the presentation of what follows, we make, without loss of generality, the assumption that the design space \\(D = [0, 1]^k\\); that is, we normalize all variables into the unit cube. We shall adhere to this convention for the rest of the book and strongly urge the reader to do likewise when implementing any algorithms described here, as this step not only yields clearer mathematics in some cases but also safeguards against scaling issues.\nBefore proceeding with the description of the Morris algorithm, we need to define an important statistical concept. Let us restrict our design space \\(D\\) to a \\(k\\)-dimensional, \\(p\\)-level full factorial grid, that is,\n\\[\nx_i \\in \\{0, \\frac{1}{p-1}, \\frac{2}{p-1}, \\dots, 1\\}, \\quad \\text{ for } i = 1, \\dots, k.\n\\]\n\nDefinition 36.2 (Elementary Effect) For a given baseline value \\(x \\in D\\), let \\(d_i(x)\\) denote the elementary effect of \\(x_i\\), where:\n\\[\nd_i(x) = \\frac{f(x_1, \\dots, x_i + \\Delta, \\dots, x_k) - f(x_1, \\dots, x_i - \\Delta, \\dots, x_k)}{2\\Delta}, \\quad i = 1, \\dots, k,\n\\tag{36.1}\\] where \\(\\Delta\\) is the step size, which is defined as the distance between two adjacent levels in the grid. In other words, we have:\nwith \\[\\Delta = \\frac{\\xi}{p-1}, \\quad \\xi \\in \\mathbb{N}^*, \\quad \\text{and} \\quad x \\in D , \\text{ such that its components } x_i \\leq 1 - \\Delta.\n\\]\n\\(\\Delta\\) is the step size. The elementary effect \\(d_i(x)\\) measures the sensitivity of the function \\(f\\) to changes in the variable \\(x_i\\) at the point \\(x\\).\n\nMorris’s method aims to estimate the parameters of the distribution of elementary effects associated with each variable. A large measure of central tendency indicates that a variable has a significant influence on the objective function across the design space, while a large measure of spread suggests that the variable is involved in interactions or contributes to the nonlinearity of \\(f\\). In practice, the sample mean and standard deviation of a set of \\(d_i(x)\\) values, calculated in different parts of the design space, are used for this estimation.\nTo ensure efficiency, the preliminary sampling plan \\(X\\) should be designed so that each evaluation of the objective function \\(f\\) contributes to the calculation of two elementary effects, rather than just one (as would occur with a naive random spread of baseline \\(x\\) values and adding \\(\\Delta\\) to one variable). Additionally, the sampling plan should provide a specified number (e.g., \\(r\\)) of elementary effects for each variable, independently drawn with replacement. For a detailed discussion on constructing such a sampling plan, readers are encouraged to consult Morris’s original paper (Morris, 1991). Here, we focus on describing the process itself.\nThe random orientation of the sampling plan \\(B\\) can be constructed as follows:\n\nLet \\(B\\) be a \\((k+1) \\times k\\) matrix of 0s and 1s, where for each column \\(i\\), two rows differ only in their \\(i\\)-th entries.\nCompute a random orientation of \\(B\\), denoted \\(B^*\\):\n\n\\[\nB^* =\n\\left(\n1_{k+1,k} x^* + (\\Delta/2)\n\\left[\n(2B-1_{k+1,k})\nD^* +\n1_{k+1,k}\n\\right]\n\\right)\nP^*,\n\\]\nwhere:\n\n\\(D^*\\) is a \\(k\\)-dimensional diagonal matrix with diagonal elements \\(\\pm 1\\) (equal probability),\n\\(\\mathbf{1}\\) is a matrix of 1s,\n\\(x^*\\) is a randomly chosen point in the \\(p\\)-level design space (limited by \\(\\Delta\\)),\n\\(P^*\\) is a \\(k \\times k\\) random permutation matrix with one 1 per column and row.\n\nspotpython provides a Python implementation to compute \\(B^*\\), see https://github.com/sequential-parameter-optimization/spotPython/blob/main/src/spotpython/utils/effects.py.\nHere is the corresponding code:\n\ndef randorient(k, p, xi, seed=None):\n    # Initialize random number generator with the provided seed\n    if seed is not None:\n        rng = np.random.default_rng(seed)\n    else:\n        rng = np.random.default_rng()\n\n    # Step length\n    Delta = xi / (p - 1)\n\n    m = k + 1\n\n    # A truncated p-level grid in one dimension\n    xs = np.arange(0, 1 - Delta, 1 / (p - 1))\n    xsl = len(xs)\n    if xsl < 1:\n        print(f\"xi = {xi}.\")\n        print(f\"p = {p}.\")\n        print(f\"Delta = {Delta}.\")\n        print(f\"p - 1 = {p - 1}.\")\n        raise ValueError(f\"The number of levels xsl is {xsl}, but it must be greater than 0.\")\n\n    # Basic sampling matrix\n    B = np.vstack((np.zeros((1, k)), np.tril(np.ones((k, k)))))\n\n    # Randomization\n\n    # Matrix with +1s and -1s on the diagonal with equal probability\n    Dstar = np.diag(2 * rng.integers(0, 2, size=k) - 1)\n\n    # Random base value\n    xstar = xs[rng.integers(0, xsl, size=k)]\n\n    # Permutation matrix\n    Pstar = np.zeros((k, k))\n    rp = rng.permutation(k)\n    for i in range(k):\n        Pstar[i, rp[i]] = 1\n\n    # A random orientation of the sampling matrix\n    Bstar = (np.ones((m, 1)) @ xstar.reshape(1, -1) +\n        (Delta / 2) * ((2 * B - np.ones((m, k))) @ Dstar +\n        np.ones((m, k)))) @ Pstar\n\n    return Bstar\n\nThe code following snippet generates a random orientation of a sampling matrix Bstar using the randorient() function. The input parameters are:\n\nk = 3: The number of design variables (dimensions).\np = 3: The number of levels in the grid for each variable.\nxi = 1: A parameter used to calculate the step size Delta.\n\nStep-size calculation is performed as follows: Delta = xi / (p - 1) = 1 / (3 - 1) = 0.5, which determines the spacing between levels in the grid.\nNext, random sampling matrix construction is computed:\n\nA truncated grid is created with levels [0, 0.5] (based on Delta).\nA basic sampling matrix B is constructed, which is a lower triangular matrix with 0s and 1s.\n\nThen, randomization is applied:\n\nDstar: A diagonal matrix with random entries of +1 or -1.\nxstar: A random starting point from the grid.\nPstar: A random permutation matrix.\n\nRandom orientation is applied to the basic sampling matrix B to create Bstar. This involves scaling, shifting, and permuting the rows and columns of B.\nThe final output is the matrix Bstar, which represents a random orientation of the sampling plan. Each row corresponds to a sampled point in the design space, and each column corresponds to a design variable.\n\nExample 36.2 (Random Orientation of the Sampling Matrix in 2-D)  \n\nk = 2\np = 3\nxi = 1\nBstar = randorient(k, p, xi, seed=123)\nprint(f\"Random orientation of the sampling matrix:\\n{Bstar}\")\n\nRandom orientation of the sampling matrix:\n[[0.5 0. ]\n [0.  0. ]\n [0.  0.5]]\n\n\nWe can visualize the random orientation of the sampling matrix in 2-D as shown in Figure 36.1.\n\nplt.figure(figsize=(6, 6))\nplt.scatter(Bstar[:, 0], Bstar[:, 1], color='blue', s=50, label='Hypercube Points')\nfor i in range(Bstar.shape[0]):\n    plt.text(Bstar[i, 0] + 0.01, Bstar[i, 1] + 0.01, str(i), fontsize=9)\nplt.xlim(-0.1, 1.1)\nplt.ylim(-0.1, 1.1)\nplt.xlabel('x1')\nplt.ylabel('x2')\nplt.grid()\n\n\n\n\n\n\n\nFigure 36.1: Random orientation of the sampling matrix in 2-D. The labels indicate the row index of the points.\n\n\n\n\n\n\n\nExample 36.3 (Random Orientation of the Sampling Matrix)  \n\nk = 3\np = 3\nxi = 1\nBstar = randorient(k, p, xi)\nprint(f\"Random orientation of the sampling matrix:\\n{Bstar}\")\n\nRandom orientation of the sampling matrix:\n[[0.5 0.  0.5]\n [0.5 0.5 0.5]\n [0.  0.5 0.5]\n [0.  0.5 0. ]]\n\n\n\nTo obtain \\(r\\) elementary effects for each variable, the screening plan is built from \\(r\\) random orientations:\n\\[\nX =\n\\begin{pmatrix}\nB^*_1 \\\\\nB^*_2 \\\\\n\\vdots \\\\\nB^*_r\n\\end{pmatrix}\n\\]\nThe function screeningplan() generates a screening plan by calling the randorient() function r times. It creates a list of random orientations and then concatenates them into a single array, which represents the screening plan. The screening plan implementation in Python is as follows (see https://github.com/sequential-parameter-optimization/spotPython/blob/main/src/spotpython/utils/effects.py):\n\ndef screeningplan(k, p, xi, r):\n    # Empty list to accumulate screening plan rows\n    X = []\n    for i in range(r):\n        X.append(randorient(k, p, xi))\n    # Concatenate list of arrays into a single array\n    X = np.vstack(X)\n    return X\n\nIt works like follows:\n\nThe value of the objective function \\(f\\) is computed for each row of the screening plan matrix \\(X\\). These values are stored in a column vector \\(t\\) of size \\((r * (k + 1)) \\times 1\\), where:\n\nr is the number of random orientations.\nk is the number of design variables.\n\n\nThe elementary effects are calculated using the following formula:\n\nFor each random orientation, adjacent rows of the screening plan matrix X and their corresponding function values from t are used.\nThese values are inserted into Equation 36.1 to compute elementary effects for each variable. An elementary effect measures the sensitivity of the objective function to changes in a specific variable.\n\nResults can be used for a statistical analysis. After collecting a sample of \\(r\\) elementary effects for each variable:\n\nThe sample mean (central tendency) is computed to indicate the overall influence of the variable.\nThe sample standard deviation (spread) is computed to capture variability, which may indicate interactions or nonlinearity.\n\nThe results (sample means and standard deviations) are plotted on a chart for comparison. This helps identify which variables have the most significant impact on the objective function and whether their effects are linear or involve interactions. This is implemented in the function screening_plot() in Python, which uses the helper function _screening() to calculate the elementary effects and their statistics.\n\ndef _screening(X, fun, xi, p, labels, bounds=None) -> tuple:\n    \"\"\"Helper function to calculate elementary effects for a screening design.\n\n    Args:\n        X (np.ndarray): The screening plan matrix, typically structured\n            within a [0,1]^k box.\n        fun (object): The objective function to evaluate at each\n            design point in the screening plan.\n        xi (float): The elementary effect step length factor.\n        p (int): Number of discrete levels along each dimension.\n        labels (list of str): A list of variable names corresponding to\n            the design variables.\n        bounds (np.ndarray): A 2xk matrix where the first row contains\n            lower bounds and the second row contains upper bounds for\n            each variable.\n\n    Returns:\n        tuple: A tuple containing two arrays:\n            - sm: The mean of the elementary effects for each variable.\n            - ssd: The standard deviation of the elementary effects for\n            each variable.\n    \"\"\"\n    k = X.shape[1]\n    r = X.shape[0] // (k + 1)\n\n    # Scale each design point\n    t = np.zeros(X.shape[0])\n    for i in range(X.shape[0]):\n        if bounds is not None:\n            X[i, :] = bounds[0, :] + X[i, :] * (bounds[1, :] - bounds[0, :])\n        t[i] = np.asarray(fun(X[i, :])).item()\n\n    # Elementary effects\n    F = np.zeros((k, r))\n    for i in range(r):\n        for j in range(i * (k + 1), i * (k + 1) + k):\n            idx = np.where(X[j, :] - X[j + 1, :] != 0)[0][0]\n            F[idx, i] = (t[j + 1] - t[j]) / (xi / (p - 1))\n\n    # Statistical measures (divide by n)\n    ssd = np.std(F, axis=1, ddof=0)\n    sm = np.mean(F, axis=1)\n    return sm, ssd\n\n\ndef screening_plot(X, fun, xi, p, labels, bounds=None, show=True) -> None:\n    \"\"\"Generates a plot with elementary effect screening metrics.\n\n    This function calculates the mean and standard deviation of the\n    elementary effects for a given set of design variables and plots\n    the results.\n\n    Args:\n        X (np.ndarray):\n            The screening plan matrix, typically structured within a [0,1]^k box.\n        fun (object):\n            The objective function to evaluate at each design point in the screening plan.\n        xi (float):\n            The elementary effect step length factor.\n        p (int):\n            Number of discrete levels along each dimension.\n        labels (list of str):\n            A list of variable names corresponding to the design variables.\n        bounds (np.ndarray):\n            A 2xk matrix where the first row contains lower bounds and\n            the second row contains upper bounds for each variable.\n        show (bool):\n            If True, the plot is displayed. Defaults to True.\n\n    Returns:\n        None: The function generates a plot of the results.\n    \"\"\"\n    k = X.shape[1]\n    sm, ssd = _screening(X=X, fun=fun, xi=xi, p=p, labels=labels, bounds=bounds)\n    plt.figure()\n    for i in range(k):\n        plt.text(sm[i], ssd[i], labels[i], fontsize=10)\n    plt.axis([min(sm), 1.1 * max(sm), min(ssd), 1.1 * max(ssd)])\n    plt.xlabel(\"Sample means\")\n    plt.ylabel(\"Sample standard deviations\")\n    plt.gca().tick_params(labelsize=10)\n    plt.grid(True)\n    if show:\n        plt.show()\n\n\n\n\n36.1.4 Special Considerations When Deploying Screening Algorithms\nWhen implementing the screening algorithm described above, two specific scenarios require special attention:\n\nDuplicate Design Points: If the dimensionality \\(k\\) of the space is relatively low and you can afford a large number of elementary effects \\(r\\), we should be be aware of the increased probability of duplicate design points appearing in the sampling plan \\(X\\). *Since the responses at sample points are deterministic, there’s no value in evaluating the same point multiple times. Fortunately, this issue is relatively uncommon in practice, as screening high-dimensional spaces typically requires large numbers of elementary effects, which naturally reduces the likelihood of duplicates.\nFailed Simulations: Numerical simulation codes occasionally fail to return valid results due to meshing errors, non-convergence of partial differential equation solvers, numerical instabilities, or parameter combinations outside the stable operating range.\n\nFrom a screening perspective, this is particularly problematic because an entire random orientation \\(B^*\\) becomes compromised if even a single point within it fails to evaluate properly. Implementing error handling strategies or fallback methods to manage such cases should be considered.\nFor robust screening studies, monitoring simulation success rates and having contingency plans for failed evaluations are important aspects of the experimental design process.",
           "crumbs": [
             "Numerical Methods",
             "36  Sampling Plans"
      @@ -4029,7 +4029,7 @@
           "href": "001_sampling.html#designing-a-sampling-plan",
           "title": "36  Sampling Plans",
           "section": "36.3 Designing a Sampling Plan",
      -    "text": "36.3 Designing a Sampling Plan\n\n36.3.1 Stratification\nA feature shared by all of the approximation models discussed in Forrester et al. (2008) is that they are more accurate in the vicinity of the points where we have evaluated the objective function. In later chapters we will delve into the laws that quantify our decaying trust in the model as we move away from a known, sampled point, but for the purposes of the present discussion we shall merely draw the intuitive conclusion that a uniform level of model accuracy throughout the design space requires a uniform spread of points. A sampling plan possessing this feature is said to be space-filling.\nThe most straightforward way of sampling a design space in a uniform fashion is by means of a rectangular grid of points. This is the full factorial sampling technique.\nHere is the simplified version of a Python function that will sample the unit hypercube at all levels in all dimensions, with the \\(k\\)-vector \\(q\\) containing the number of points required along each dimension, see https://github.com/sequential-parameter-optimization/spotPython/blob/main/src/spotpython/utils/sampling.py.\nThe variable Edges specifies whether we want the points to be equally spaced from edge to edge (Edges=1) or we want them to be in the centres of \\(n = q_1 \\times q_2 \\times \\ldots \\times q_k\\) bins filling the unit hypercube (for any other value of Edges).\n\ndef fullfactorial(q_param, Edges=1) -> np.ndarray:\n    \"\"\"Generates a full factorial sampling plan in the unit cube.\n\n    Args:\n        q (list or np.ndarray):\n            A list or array containing the number of points along each dimension (k-vector).\n        Edges (int, optional):\n            Determines spacing of points. If `Edges=1`, points are equally spaced from edge to edge (default).\n            Otherwise, points will be in the centers of n = q[0]*q[1]*...*q[k-1] bins filling the unit cube.\n\n    Returns:\n        (np.ndarray): Full factorial sampling plan as an array of shape (n, k), where n is the total number of points and k is the number of dimensions.\n\n    Raises:\n        ValueError: If any dimension in `q` is less than 2.\n    \"\"\"\n    q_levels = np.array(q_param) # Use a distinct variable for original levels\n    if np.min(q_levels) < 2:\n        raise ValueError(\"You must have at least two points per dimension.\")\n    \n    n = np.prod(q_levels)\n    k = len(q_levels)\n    X = np.zeros((n, k))\n    \n    # q_for_prod_calc is used for calculating repetitions, includes the phantom element.\n    # This matches the logic of the user-provided snippet where 'q' was modified.\n    q_for_prod_calc = np.append(q_levels, 1)\n\n    for j in range(k): # k is the original number of dimensions\n        # current_dim_levels is the number of levels for the current dimension j\n        # In the user's snippet, q[j] correctly refers to the original level count\n        # as j ranges from 0 to k-1, and q_for_prod_calc[j] = q_levels[j] for this range.\n        current_dim_levels = q_for_prod_calc[j] \n        \n        if Edges == 1:\n            one_d_slice = np.linspace(0, 1, int(current_dim_levels))\n        else:\n            # Corrected calculation for bin centers\n            if current_dim_levels == 1: # Should not be hit if np.min(q_levels) >= 2\n                one_d_slice = np.array([0.5])\n            else:\n                one_d_slice = np.linspace(1 / (2 * current_dim_levels), \n                                          1 - 1 / (2 * current_dim_levels), \n                                          int(current_dim_levels))\n        \n        column = np.array([])\n        # The product q_for_prod_calc[j + 1 : k] correctly calculates \n        # the product of remaining original dimensions' levels.\n        num_consecutive_repeats = np.prod(q_for_prod_calc[j + 1 : k])\n        \n        # This loop structure replicates the logic from the user's snippet\n        while len(column) < n:\n            for ll_idx in range(int(current_dim_levels)): # Iterate through levels of current dimension\n                val_to_repeat = one_d_slice[ll_idx]\n                column = np.append(column, np.ones(int(num_consecutive_repeats)) * val_to_repeat)\n        X[:, j] = column\n    return X\n\n\nq = [3, 2]\nX = fullfactorial(q, Edges=0)\nprint(X)\n\n[[0.16666667 0.25      ]\n [0.16666667 0.75      ]\n [0.5        0.25      ]\n [0.5        0.75      ]\n [0.83333333 0.25      ]\n [0.83333333 0.75      ]]\n\n\nFigure 36.3 shows the points in the unit hypercube for the case of 3x2 points.\n\n\n\n\n\n\n\n\nFigure 36.3: 2D Full Factorial Sampling (3x2 Points). Edges = 0\n\n\n\n\n\n\nX = fullfactorial(q, Edges=1)\nprint(X)\n\n[[0.  0. ]\n [0.  1. ]\n [0.5 0. ]\n [0.5 1. ]\n [1.  0. ]\n [1.  1. ]]\n\n\nFigure 36.4 shows the points in the unit hypercube for the case of 3x2 points with edges.\n\n\n\n\n\n\n\n\nFigure 36.4: 2D Full Factorial Sampling (3x2 Points). Edges = 1\n\n\n\n\n\nThe full factorial sampling plan method generates a uniform sampling design by creating a grid of points across all dimensions. For example, calling fullfactorial([3, 4, 5], 1) produces a three-dimensional sampling plan with 3, 4, and 5 levels along each dimension, respectively. While this approach satisfies the uniformity criterion, it has two significant limitations:\n\nRestricted Design Sizes: The method only works for designs where the total number of points \\(n\\) can be expressed as the product of the number of levels in each dimension, i.e., \\(n = q_1 \\times q_2 \\times \\cdots \\times q_k\\).\nOverlapping Projections: When the sampling points are projected onto individual axes, sets of points may overlap, reducing the effectiveness of the sampling plan. This can lead to non-uniform coverage in the projections, which may not fully represent the design space.\n\n\n\n36.3.2 Latin Squares and Random Latin Hypercubes\nTo improve the uniformity of projections for any individual variable, the range of that variable can be divided into a large number of equal-sized bins, and random subsamples of equal size can be generated within these bins. This method is called stratified random sampling. Extending this idea to all dimensions results in a stratified sampling plan, commonly implemented using Latin hypercube sampling.\n\nDefinition 36.3 (Latin Squares and Hypercubes) In the context of statistical sampling, a square grid containing sample positions is a Latin square if (and only if) there is only one sample in each row and each column. A Latin hypercube is the generalisation of this concept to an arbitrary number of dimensions, whereby each sample is the only one in each axis-aligned hyperplane containing it\n\nFor two-dimensional discrete variables, a Latin square ensures uniform projections. An \\((n \\times n)\\) Latin square is constructed by filling each row and column with a permutation of \\(\\{1, 2, \\dots, n\\}\\), ensuring each number appears only once per row and column.\n\nExample 36.4 (Latin Square) For \\(n = 4\\), a Latin square might look like this:\n2   1   3   4\n3   2   4   1\n1   4   2   3\n4   3   1   2\n\nLatin Hypercubes are the multidimensional extension of Latin squares. The design space is divided into equal-sized hypercubes (bins), and one point is placed in each bin. The placement ensures that moving along any axis from an occupied bin does not encounter another occupied bin. This guarantees uniform projections across all dimensions. To construct a Latin hypercube, the following steps are taken:\n\nRepresent the sampling plan as an \\(n \\times k\\) matrix \\(X\\), where \\(n\\) is the number of points and \\(k\\) is the number of dimensions.\nFill each column of \\(X\\) with random permutations of \\(\\{1, 2, \\dots, n\\}\\).\nNormalize the plan into the unit hypercube \\([0, 1]^k\\).\n\nThis approach ensures multidimensional stratification and uniformity in projections. Here is the code:\n\ndef rlh(n: int, k: int, edges: int = 0) -> np.ndarray:\n    # Initialize array\n    X = np.zeros((n, k), dtype=float)\n\n    # Fill with random permutations\n    for i in range(k):\n        X[:, i] = np.random.permutation(n)\n\n    # Adjust normalization based on the edges flag\n    if edges == 1:\n        # [X=0..n-1] -> [0..1]\n        X = X / (n - 1)\n    else:\n        # Points at true midpoints\n        # [X=0..n-1] -> [0.5/n..(n-0.5)/n]\n        X = (X + 0.5) / n\n\n    return X\n\n\nExample 36.5 (Random Latin Hypercube) The following code can be used to generate a 2D Latin hypercube with 5 points and edges=0:\n\nX = rlh(n=5, k=2, edges=0)\nprint(X)\n\n[[0.3 0.3]\n [0.1 0.5]\n [0.5 0.1]\n [0.9 0.9]\n [0.7 0.7]]\n\n\nFigure 36.5 shows the points in the unit hypercube for the case of 5 points with edges=0.\n\n\n\n\n\n\n\n\nFigure 36.5: 2D Latin Hypercube Sampling (5 Points, Edges=0)\n\n\n\n\n\n\n\nExample 36.6 (Random Latin Hypercube with Edges) The following code can be used to generate a 2D Latin hypercube with 5 points and edges=1:\n\nX = rlh(n=5, k=2, edges=1)\nprint(X)\n\n[[0.75 0.5 ]\n [0.5  0.25]\n [0.   0.  ]\n [1.   1.  ]\n [0.25 0.75]]\n\n\nFigure 36.6 shows the points in the unit hypercube for the case of 5 points with edges=1.\n\n\n\n\n\n\n\n\nFigure 36.6: 2D Latin Hypercube Sampling (5 Points, Edges=1)\n\n\n\n\n\n\n\n\n36.3.3 Space-filling Designs: Maximin Plans\nA widely adopted measure for assessing the uniformity, or ‘space-fillingness’, of a sampling plan is the maximin metric, initially proposed by Johnson et al. (1990). This criterion can be formally defined as follows.\nConsider a sampling plan \\(X\\). Let \\(d_1, d_2, \\ldots, d_m\\) represent the unique distances between all possible pairs of points within \\(X\\), arranged in ascending order. Furthermore, let \\(J_1, J_2, \\ldots, J_m\\) be defined such that \\(J_j\\) denotes the count of point pairs in \\(X\\) separated by the distance \\(d_j\\).\n\nDefinition 36.4 (Maximin plan) A sampling plan \\(X\\) is considered a maximin plan if, among all candidate plans, it maximizes the smallest inter-point distance \\(d_1\\). Among plans that satisfy this condition, it further minimizes \\(J_1\\), the number of pairs separated by this minimum distance.\n\nWhile this definition is broadly applicable to any collection of sampling plans, our focus is narrowed to Latin hypercube designs to preserve their desirable stratification properties. However, even within this restricted class, Definition 36.4 may identify multiple equivalent maximin designs. To address this, a more comprehensive ‘tie-breaker’ definition, as proposed by Morris and Mitchell (1995), is employed:\n\nDefinition 36.5 (Maximin plan with tie-breaker) A sampling plan \\(X\\) is designated as the maximin plan if it sequentially optimizes the following conditions: it maximizes \\(d_1\\); among those, it minimizes \\(J_1\\); among those, it maximizes \\(d_2\\); among those, it minimizes \\(J_2\\); and so forth, concluding with minimizing \\(J_m\\).\n\nJohnson et al. (1990) established that the maximin criterion (Definition 36.4) is equivalent to the D-optimality criterion used in linear regression. However, the extended maximin criterion incorporating a tie-breaker (Definition 36.5) is often preferred due to its intuitive nature and practical utility. Given that the sampling plans under consideration make no assumptions about model structure, the latter criterion (Definition 36.5) will be employed.\nTo proceed, a precise definition of ‘distance’ within these contexts is necessary. The p-norm is the most widely adopted metric for this purpose:\n\nDefinition 36.6 (p-norm) The p-norm of a vector \\(\\vec{x} = (x_1, x_2, \\ldots, x_k)\\) is defined as:\n\\[\nd_p(\\vec{x}^{(i_1)}, \\vec{x}^{(i_2)}) = \\left( \\sum_{j=1}^k |x_j^{(i_1)} - x_j^{(i_2)}|^p \\right)^{1/p}.\n\\tag{36.2}\\]\n\nWhen \\(p = 1\\), Equation 36.2 defines the rectangular distance, occasionally referred to as the Manhattan norm (an allusion to a grid-like city layout). Setting \\(p = 2\\) yields the Euclidean norm. The existing literature offers limited evidence to suggest the superiority of one norm over the other for evaluating sampling plans when no model structure assumptions are made. It is important to note, however, that the rectangular distance is considerably less computationally demanding. This advantage can be quite significant, particularly when evaluating large sampling plans.\nFor the computational implementation of Definition 36.5, the initial step involves constructing the vectors \\(d_1, d_2, \\ldots, d_m\\) and \\(J_1, J_2, \\ldots, J_m\\). The jd function facilitates this task.\n\n36.3.3.1 The Function jd\nThe function jd computes the distinct p-norm distances between all pairs of points in a given set and counts their occurrences. It returns two arrays: one for the distinct distances and another for their multiplicities.\n\ndef jd(X: np.ndarray, p: float = 1.0) -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"\n    Args:\n        X (np.ndarray):\n            A 2D array of shape (n, d) representing n points\n            in d-dimensional space.\n        p (float, optional):\n            The distance norm to use.\n            p=1 uses the Manhattan (L1) norm, while p=2 uses the\n            Euclidean (L2) norm. Defaults to 1.0 (Manhattan norm).\n\n    Returns:\n        (np.ndarray, np.ndarray):\n            A tuple (J, distinct_d), where:\n            - distinct_d is a 1D float array of unique,\n            sorted distances between points.\n            - J is a 1D integer array that provides\n            the multiplicity (occurrence count)\n            of each distance in distinct_d.\n    \"\"\"\n    n = X.shape[0]\n\n    # Allocate enough space for all pairwise distances\n    # (n*(n-1))/2 pairs for an n-point set\n    pair_count = n * (n - 1) // 2\n    d = np.zeros(pair_count, dtype=float)\n\n    # Fill the distance array\n    idx = 0\n    for i in range(n - 1):\n        for j in range(i + 1, n):\n            # Compute the p-norm distance\n            d[idx] = np.linalg.norm(X[i] - X[j], ord=p)\n            idx += 1\n\n    # Find unique distances and their multiplicities\n    distinct_d = np.unique(d)\n    J = np.zeros_like(distinct_d, dtype=int)\n    for i, val in enumerate(distinct_d):\n        J[i] = np.sum(d == val)\n    return J, distinct_d\n\n\nExample 36.7 (The Function jd) Consider a small 3-point set in 2D space, with points located at (0,0), (1,1), and (2,2) as shown in Figure 36.7. The distinct distances and their occurrences can be computed using the jd function, as shown in the following code:\n\n\n\n\n\n\n\n\nFigure 36.7: 3-Point Set in 2D Space\n\n\n\n\n\n\nJ, distinct_d = jd(X, p=2.0)\nprint(\"Distinct distances (d_i):\", distinct_d)\nprint(\"Occurrences (J_i):\", J)\n\nDistinct distances (d_i): [1.41421356 2.82842712]\nOccurrences (J_i): [2 1]\n\n\n\n\n\n\n36.3.4 Memory Management\nA computationally intensive part of the calculation performed with the jd-function is the creation of the vector \\(\\vec{d}\\) containing all pairwise distances. This is particularly true for large sampling plans; for instance, a 1000-point plan requires nearly half a million distance calculations.\n\nDefinition 36.7 (Pre-allocation of Memory) Pre-allocation of memory is a programming technique where a fixed amount of memory is reserved for a data structure (like an array or vector) before it is actually filled with data. This is done to avoid the computational overhead associated with dynamic memory allocation, which involves repeatedly requesting and resizing memory as new elements are added.\n\nConsequently, pre-allocating memory for the distance vector \\(\\vec{d}\\) is essential. This necessitates a slightly less direct method for computing the indices of \\(\\vec{d}\\), rather than appending each new element, which would involve costly dynamic memory allocation.\nThe implementation of Definition 36.5 is now required. Finding the most space-filling design involves pairwise comparisons. This problem can be approached using a ‘divide and conquer’ strategy, simplifying it to the task of selecting the better of two sampling plans. The function mm(X1,X2,p) is designed for this purpose. It returns an index indicating which of the two designs is more space-filling, or 0 if they are equally space-filling, based on the \\(p\\)-norm for distance computation.\n\n36.3.4.1 The Function mm\nThe function mm compares two sampling plans based on the Morris-Mitchell criterion. It uses the jd function to compute the distances and multiplicities, constructs vectors for comparison, and determines which plan is more space-filling.\n\ndef mm(X1: np.ndarray, X2: np.ndarray, p: Optional[float] = 1.0) -> int:\n    \"\"\"\n    Args:\n        X1 (np.ndarray): A 2D array representing the first sampling plan.\n        X2 (np.ndarray): A 2D array representing the second sampling plan.\n        p (float, optional): The distance metric. p=1 uses Manhattan (L1) distance,\n            while p=2 uses Euclidean (L2). Defaults to 1.0.\n\n    Returns:\n        int:\n            - 0 if both plans are identical or equally space-filling\n            - 1 if X1 is more space-filling\n            - 2 if X2 is more space-filling\n    \"\"\"\n    X1_sorted = X1[np.lexsort(np.rot90(X1))]\n    X2_sorted = X2[np.lexsort(np.rot90(X2))]\n    if np.array_equal(X1_sorted, X2_sorted):\n        return 0  # Identical sampling plans\n\n    # Compute distance multiplicities for each plan\n    J1, d1 = jd(X1, p)\n    J2, d2 = jd(X2, p)\n    m1, m2 = len(d1), len(d2)\n\n    # Construct V1 and V2: alternate distance and negative multiplicity\n    V1 = np.zeros(2 * m1)\n    V1[0::2] = d1\n    V1[1::2] = -J1\n\n    V2 = np.zeros(2 * m2)\n    V2[0::2] = d2\n    V2[1::2] = -J2\n\n    # Trim the longer vector to match the size of the shorter\n    m = min(m1, m2)\n    V1 = V1[:m]\n    V2 = V2[:m]\n\n    # Compare element-by-element:\n    # c[i] = 1 if V1[i] > V2[i], 2 if V1[i] < V2[i], 0 otherwise.\n    c = (V1 > V2).astype(int) + 2 * (V1 < V2).astype(int)\n\n    if np.sum(c) == 0:\n        # Equally space-filling\n        return 0\n    else:\n        # The first non-zero entry indicates which plan is better\n        idx = np.argmax(c != 0)\n        return c[idx]\n\n\nExample 36.8 (The Function mm) We can use the mm function to compare two sampling plans. The following code creates two 3-point sampling plans in 2D (shown in Figure 36.8) and compares them using the Morris-Mitchell criterion:\n\nX1 = np.array([[0.0, 0.0],[0.5, 0.5],[0.0, 1.0], [1.0, 1.0]])\nX2 = np.array([[0.1, 0.1],[0.4, 0.6],[0.1, 0.9], [0.9, 0.9]])\n\n\n\n\n\n\n\n\n\nFigure 36.8: Comparison of Two Sampling Plans\n\n\n\n\n\nWe can compare which plan has better space-filling (Morris-Mitchell). The output is either 0, 1, or 2 depending on which plan is more space-filling.\n\nbetter = mm(X1, X2, p=2.0)\nprint(f\"Plan {better} is more space-filling.\")\n\nPlan 1 is more space-filling.\n\n\n\n\n\n36.3.4.2 The Function mmphi\nSearching across a space of potential sampling plans can be accomplished by pairwise comparisons. An optimization algorithm could, in theory, be written with mm as the comparative objective. However, experimental evidence (Morris and Mitchell 1995) suggests that the resulting optimization landscape can be quite deceptive, making it difficult to search reliably. This difficulty arises because the comparison process terminates upon finding the first non-zero element in the comparison array c. Consequently, the remaining values in the distance (\\(d_1, d_2, ..., d_m\\)) and multiplicity (\\(J_1, J_2, ..., J_m\\)) arrays are disregarded. These disregarded values, however, might contain potentially useful ‘slope’ information about the global landscape for the optimization process.\nTo address this, Morris and Mitchell (1995) defined the following scalar-valued criterion function, which is used to rank competing sampling plans. This function, while based on the logic of Definition 36.5, incorporates the complete vectors \\(d_1, d_2, ..., d_m\\) and \\(J_1, J_2, ..., J_m\\).\n\nDefinition 36.8 (Morris-Mitchell Criterion) The Morris-Mitchell criterion is defined as:\n\\[\n\\Phi_q (X) = \\left(\\sum_{j=1}^m J_j d_j^{-q}\\right)^{1/q},\n\\tag{36.3}\\]\nwhere \\(X\\) is the sampling plan, \\(d_j\\) is the distance between points, \\(J_j\\) is the multiplicity of that distance, and \\(q\\) is a user-defined exponent. The parameter \\(q\\) can be adjusted to control the influence of smaller distances on the overall metric.\n\nThe smaller the value of \\(\\Phi_q\\), the better the space-filling properties of \\(X\\) will be.\nThe function mmphi computes the Morris-Mitchell sampling plan quality criterion for a given sampling plan. It takes a 2D array of points and calculates the space-fillingness metric based on the distances between points. This can be implemented in Python as follows:\n\ndef mmphi(X: np.ndarray,\n          q: Optional[float] = 2.0,\n          p: Optional[float] = 1.0) -> float:\n    \"\"\"\n    Args:\n        X (np.ndarray):\n            A 2D array representing the sampling plan,\n            where each row is a point in\n            d-dimensional space (shape: (n, d)).\n        q (float, optional):\n            Exponent used in the computation of the metric.\n            Defaults to 2.0.\n        p (float, optional):\n            The distance norm to use.\n            For example, p=1 is Manhattan (L1),\n            p=2 is Euclidean (L2). Defaults to 1.0.\n\n    Returns:\n        float:\n            The space-fillingness metric Phiq. Larger values typically indicate a more\n            space-filling plan according to the Morris-Mitchell criterion.\n    \"\"\"\n    # Compute the distance multiplicities: J, and unique distances: d\n    J, d = jd(X, p)\n    # Summation of J[i] * d[i]^(-q), then raised to 1/q\n    # This follows the Morris-Mitchell definition.\n    Phiq = np.sum(J * (d ** (-q))) ** (1.0 / q)\n    return Phiq\n\n\nExample 36.9 (The Function mmphi) We can use the mmphi function to evaluate the space-filling quality of the two sampling plans from Example 36.8. The following code uses these two 3-point sampling plans in 2D and computes their quality using the Morris-Mitchell criterion:\n\n# Two simple sampling plans from above\nquality1 = mmphi(X1, q=2, p=2)\nquality2 = mmphi(X2, q=2, p=2)\nprint(f\"Quality of sampling plan X1:  {quality1}\")\nprint(f\"Quality of sampling plan X2:  {quality2}\")\n\nQuality of sampling plan X1:  2.91547594742265\nQuality of sampling plan X2:  3.917162046269215\n\n\n\nThis equation provides a more compact representation of the maximin criterion, but the selection of the \\(q\\) value is an important consideration. Larger values of \\(q\\) ensure that terms in the sum corresponding to smaller inter-point distances (the \\(d_j\\) values, which are sorted in ascending order) have a dominant influence. As a result, \\(\\Phi_q\\) will rank sampling plans in a way that closely emulates the original maximin definition (Definition 36.5). This implies that the optimization landscape might retain the challenging characteristics that the \\(\\Phi_q\\) metric, especially with smaller \\(q\\) values, is intended to alleviate. Conversely, smaller \\(q\\) values tend to produce a \\(\\Phi_q\\) landscape that, while not perfectly aligning with the original definition, is generally more conducive to optimization.\nTo illustrate the relationship between Equation 36.3 and the maximin criterion of Definition 36.5, sets of 50 random Latin hypercubes of varying sizes and dimensionalities were considered by Forrester et al. (2008). The correlation plots from this analysis suggest that as the sampling plan size increases, a smaller \\(q\\) value is needed for the \\(\\Phi_q\\)-based ranking to closely match the ranking derived from Definition 36.5.\nRankings based on both the direct maximin comparison (mm) and the \\(\\Phi_q\\) metric (mmphi), determined using a simple bubble sort algorithm, are implemented in the Python function mmsort.\n\n\n36.3.4.3 The Function mmsort\nThe function mmsort is designed to rank multiple sampling plans based on their space-filling properties using the Morris-Mitchell criterion. It takes a 3D array of sampling plans and returns the indices of the plans sorted in ascending order of their space-filling quality.\n\ndef mmsort(X3D: np.ndarray, p: Optional[float] = 1.0) -> np.ndarray:\n    \"\"\"\n    Args:\n        X3D (np.ndarray):\n            A 3D NumPy array of shape (n, d, m), where m is the number of\n            sampling plans, and each plan is an (n, d) matrix of points.\n        p (float, optional):\n            The distance metric to use. p=1 for Manhattan (L1), p=2 for\n            Euclidean (L2). Defaults to 1.0.\n\n    Returns:\n        np.ndarray:\n            A 1D integer array of length m that holds the plan indices in\n            ascending order of space-filling quality. The first index in the\n            returned array corresponds to the most space-filling plan.\n    \"\"\"\n    # Number of plans (m)\n    m = X3D.shape[2]\n\n    # Create index array (1-based to match original MATLAB convention)\n    Index = np.arange(1, m + 1)\n\n    swap_flag = True\n    while swap_flag:\n        swap_flag = False\n        i = 0\n        while i < m - 1:\n            # Compare plan at Index[i] vs. Index[i+1] using mm()\n            # Note: subtract 1 from each index to convert to 0-based array indexing\n            if mm(X3D[:, :, Index[i] - 1], X3D[:, :, Index[i + 1] - 1], p) == 2:\n                # Swap indices if the second plan is more space-filling\n                Index[i], Index[i + 1] = Index[i + 1], Index[i]\n                swap_flag = True\n            i += 1\n\n    return Index\n\n\nExample 36.10 (The Function mmsort) The mmsort function can be used to rank multiple sampling plans based on their space-filling properties. The following code demonstrates how to use mmsort to compare two 3-point sampling plans in 3D space:\nSuppose we have two 3-point sampling plans X1 and X1 from above. They are sorted using the Morris-Mitchell criterion with \\(p=2.0\\). For example, the output [1, 2] indicates that X1 is more space-filling than X2:\n\nX3D = np.stack([X1, X2], axis=2)\nranking = mmsort(X3D, p=2.0)\nprint(ranking)\n\n[1 2]\n\n\n\nTo determine the optimal Latin hypercube for a specific application, a recommended approach by Morris and Mitchell (1995) involves minimizing \\(\\Phi_q\\) for a set of \\(q\\) values (1, 2, 5, 10, 20, 50, and 100). Subsequently, the best plan from these results is selected based on the actual maximin definition. The mmsort function can be utilized for this purpose: a 3D matrix, X3D, can be constructed where each 2D slice represents the best sampling plan found for each \\(\\Phi_q\\). Applying mmsort(X3D,1) then ranks these plans according to Definition 36.5, using the rectangular distance metric. The subsequent discussion will address the methods for finding these optimized \\(\\Phi_q\\) designs.\n\n\n36.3.4.4 The Function phisort\nphisort only differs from mmsort in having \\(q\\) as an additional argument, as well as the comparison line being:\nif mmphi(X3D[:, :, Index[i] - 1], q=q, p=p) >\n    mmphi(X3D[:, :, Index[i + 1] - 1], q=q, p=p):\n\ndef phisort(X3D: np.ndarray,\n            q: Optional[float] = 2.0,\n            p: Optional[float] = 1.0) -> np.ndarray:\n    \"\"\"\n    Args:\n        X3D (np.ndarray):\n            A 3D array of shape (n, d, m),\n            where m is the number of sampling plans.\n        q (float, optional):\n            Exponent for the mmphi metric. Defaults to 2.0.\n        p (float, optional):\n            Distance norm for mmphi.\n            p=1 is Manhattan; p=2 is Euclidean.\n            Defaults to 1.0.\n\n    Returns:\n        np.ndarray:\n            A 1D integer array of length m, giving the plan indices in ascending\n            order of mmphi. The first index in the returned array corresponds\n            to the numerically lowest mmphi value.\n    \"\"\"\n    # Number of 2D sampling plans\n    m = X3D.shape[2]\n    # Create a 1-based index array\n    Index = np.arange(1, m + 1)\n    # Bubble-sort: plan with lower mmphi() climbs toward the front\n    swap_flag = True\n    while swap_flag:\n        swap_flag = False\n        for i in range(m - 1):\n            # Retrieve mmphi values for consecutive plans\n            val_i = mmphi(X3D[:, :, Index[i] - 1], q=q, p=p)\n            val_j = mmphi(X3D[:, :, Index[i + 1] - 1], q=q, p=p)\n\n            # Swap if the left plan's mmphi is larger (i.e. 'worse')\n            if val_i > val_j:\n                Index[i], Index[i + 1] = Index[i + 1], Index[i]\n                swap_flag = True\n    return Index\n\n\nExample 36.11 (The Function phisort) The phisort function can be used to rank multiple sampling plans based on the Morris-Mitchell criterion. The following code demonstrates how to use phisort to compare two 3-point sampling plans in 3D space:\n\nX1 = bestlh(n=5, k=2, population=5, iterations=10)\nX2 = bestlh(n=5, k=2, population=15, iterations=20)\nX3 = bestlh(n=5, k=2, population=25, iterations=30)\n# Map X1 and X2 so that X3D has the two sampling plans\n# in X3D[:, :, 0] and X3D[:, :, 1]\nX3D = np.array([X1, X2])\nprint(phisort(X3D))\nX3D = np.array([X3, X2])\nprint(phisort(X3D))\n\n[2 1]\n[2 1]\n\n\n\n\n\n\n36.3.5 Optimizing the Morris-Mitchell Criterion \\(\\Phi_q\\)\nOnce a criterion for assessing the quality of a Latin hypercube sampling plan has been established, a systematic method for optimizing this metric across the space of Latin hypercubes is required. This task is non-trivial; as the reader may recall from the earlier discussion on Latin squares, this search space is vast. In fact, its vastness means that for many practical applications, locating the globally optimal solution is often infeasible. Therefore, the objective becomes finding the best possible sampling plan achievable within a specific computational time budget.\nThis budget is influenced by the computational cost associated with obtaining each objective function value. Determining the optimal allocation of total computational effort—between generating the sampling plan and actually evaluating the objective function at the selected points—remains an open research question. However, it is typical for no more than approximately 5% of the total available time to be allocated to the task of generating the sampling plan itself.\nForrester et al. (2008) draw an analogy to the process of devising a revision timetable before an exam. While a well-structured timetable enhances the effectiveness of revision, an excessive amount of the revision time itself should not be consumed by the planning phase.\nA significant challenge in devising a sampling plan optimizer is ensuring that the search process remains confined to the space of valid Latin hypercubes. As previously discussed, the defining characteristic of a Latin hypercube \\(X\\) is that each of its columns represents a permutation of the possible levels for the corresponding variable. Consequently, the smallest modification that can be applied to a Latin hypercube—without compromising its crucial multidimensional stratification property—involves swapping two elements within any single column of \\(X\\). A Python implementation for ‘mutating’ a Latin hypercube through such an operation, generalized to accommodate random changes applied to multiple sites, is provided below:\n\n36.3.5.1 The Function perturb()\nThe function perturb randomly swaps elements in a Latin hypercube sampling plan. It takes a 2D array representing the sampling plan and performs a specified number of random element swaps, ensuring that the result remains a valid Latin hypercube.\n\ndef perturb(X: np.ndarray,\n            PertNum: Optional[int] = 1) -> np.ndarray:\n    \"\"\"\n    Args:\n        X (np.ndarray):\n            A 2D array (sampling plan) of shape (n, k),\n            where each row is a point\n            and each column is a dimension.\n        PertNum (int, optional):\n            The number of element swaps (perturbations)\n            to perform. Defaults to 1.\n\n    Returns:\n        np.ndarray:\n            The perturbed sampling plan,\n            identical in shape to the input, with\n            one or more random column swaps executed.\n    \"\"\"\n    # Get dimensions of the plan\n    n, k = X.shape\n    if n < 2 or k < 2:\n        raise ValueError(\"Latin hypercubes require at least 2 points and 2 dimensions\")\n    for _ in range(PertNum):\n        # Pick a random column\n        col = int(np.floor(np.random.rand() * k))\n        # Pick two distinct row indices\n        el1, el2 = 0, 0\n        while el1 == el2:\n            el1 = int(np.floor(np.random.rand() * n))\n            el2 = int(np.floor(np.random.rand() * n))\n        # Swap the two selected elements in the chosen column\n        X[el1, col], X[el2, col] = X[el2, col], X[el1, col]\n    return X\n\n\nExample 36.12 (The Function perturb()) The perturb function can be used to randomly swap elements in a Latin hypercube sampling plan. The following code demonstrates how to use perturb to create a perturbed version of a 4x2 sampling plan:\n\nX_original = np.array([[1, 3],[2, 4],[3, 1],[4, 2]])\nprint(\"Original Sampling Plan:\")\nprint(X_original)\nprint(\"Perturbed Sampling Plan:\")\nX_perturbed = perturb(X_original, PertNum=1)\nprint(X_perturbed)\n\nOriginal Sampling Plan:\n[[1 3]\n [2 4]\n [3 1]\n [4 2]]\nPerturbed Sampling Plan:\n[[2 3]\n [1 4]\n [3 1]\n [4 2]]\n\n\n\nForrester et al. (2008) uses the term ‘mutation’, because this problem lends itself to nature-inspired computation. Morris and Mitchell (1995) use a simulated annealing algorithm, the detailed pseudocode of which can be found in their paper. As an alternative, a method based on evolutionary operation (EVOP) is offered by Forrester et al. (2008).\n\n\n\n36.3.6 Evolutionary Operation\nAs introduced by Box (1957), evolutionary operation was designed to optimize chemical processes. The current parameters of the reaction would be recorded in a box at the centre of a board, with a series of ‘offspring’ boxes along the edges containing values of the parameters slightly altered with respect to the central, ‘parent’ values. Once the reaction was completed for all of these sets of variable values and the corresponding yields recorded, the contents of the central box would be replaced with that of the setup with the highest yield and this would then become the parent of a new set of peripheral boxes.\nThis is generally viewed as a local search procedure, though this depends on the mutation step sizes, that is on the differences between the parent box and its offspring. The longer these steps, the more global is the scope of the search.\nFor the purposes of the Latin hypercube search, a variable scope strategy is applied. The process starts with a long step length (that is a relatively large number of swaps within the columns) and, as the search progresses, the current best basin of attraction is gradually approached by reducing the step length to a single change.\nIn each generation the parent is mutated (randomly, using the perturb function) a pertnum number of times. The sampling plan that yields the smallest \\(\\Phi_q\\) value (as per the Morris-Mitchell criterion, calculated usingmmphi) among all offspring and the parent is then selected; in evolutionary computation parlance this selection philosophy is referred to as elitism.\nThe EVOP based search for space-filling Latin hypercubes is thus a truly evolutionary process: the optimized sampling plan results from the nonrandom survival of random variations.\n\n\n36.3.7 Putting it all Together\nAll the pieces of the optimum Latin hypercube sampling process puzzle are now in place: the random hypercube generator as a starting point for the optimization process, the ‘spacefillingness’ metric that needs to be optimized, the optimization engine that performs this task and the comparison function that selects the best of the optima found for the various \\(q\\)’s. These pieces just need to be put into a sequence. Here is the Python embodiment of the completed puzzle. It results in a function bestlh that uses the function mmlhs to find the best Latin hypercube sampling plan for a given set of parameters.\n\n36.3.7.1 The Function mmlhs\nPerforms an evolutionary search (using perturbations) to find a Morris-Mitchell optimal Latin hypercube, starting from an initial plan X_start.\nThis function does the following:\n\nInitializes a “best” Latin hypercube (X_best) from the provided X_start.\nIteratively perturbs X_best to create offspring.\nEvaluates the space-fillingness of each offspring via the Morris-Mitchell metric (using mmphi).\nUpdates the best plan whenever a better offspring is found.\n\n\ndef mmlhs(X_start: np.ndarray,\n          population: int,\n          iterations: int,\n          q: Optional[float] = 2.0,\n          plot=False) -> np.ndarray:\n    \"\"\"\n    Args:\n        X_start (np.ndarray):\n            A 2D array of shape (n, k) providing the initial Latin hypercube\n            (n points in k dimensions).\n        population (int):\n            Number of offspring to create in each generation.\n        iterations (int):\n            Total number of generations to run the evolutionary search.\n        q (float, optional):\n            The exponent used by the Morris-Mitchell space-filling criterion.\n            Defaults to 2.0.\n        plot (bool, optional):\n            If True, a simple scatter plot of the first two dimensions will be\n            displayed at each iteration. Only if k >= 2. Defaults to False.\n\n    Returns:\n        np.ndarray:\n            A 2D array representing the most space-filling Latin hypercube found\n            after all iterations, of the same shape as X_start.\n    \"\"\"\n    n = X_start.shape[0]\n    if n < 2:\n        raise ValueError(\"Latin hypercubes require at least 2 points\")\n    k = X_start.shape[1]\n    if k < 2:\n        raise ValueError(\"Latin hypercubes are not defined for dim k < 2\")\n    # Initialize best plan and its metric\n    X_best = X_start.copy()\n    Phi_best = mmphi(X_best, q=q)\n    # After 85% of iterations, reduce the mutation rate to 1\n    leveloff = int(np.floor(0.85 * iterations))\n    for it in range(1, iterations + 1):\n        # Decrease number of mutations over time\n        if it < leveloff:\n            mutations = int(round(1 + (0.5 * n - 1) * (leveloff - it) / (leveloff - 1)))\n        else:\n            mutations = 1\n        X_improved = X_best.copy()\n        Phi_improved = Phi_best\n        # Create offspring, evaluate, and keep the best\n        for _ in range(population):\n            X_try = perturb(X_best.copy(), mutations)\n            Phi_try = mmphi(X_try, q=q)\n\n            if Phi_try < Phi_improved:\n                X_improved = X_try\n                Phi_improved = Phi_try\n        # Update the global best if we found a better plan\n        if Phi_improved < Phi_best:\n            X_best = X_improved\n            Phi_best = Phi_improved\n        # Simple visualization of the first two dimensions\n        if plot and (X_best.shape[1] >= 2):\n            plt.clf()\n            plt.scatter(X_best[:, 0], X_best[:, 1], marker=\"o\")\n            plt.grid(True)\n            plt.title(f\"Iteration {it} - Current Best Plan\")\n            plt.pause(0.01)\n    return X_best\n\n\nExample 36.13 (The Function mmlhs) The mmlhs function can be used to optimize a Latin hypercube sampling plan. The following code demonstrates how to use mmlhs to optimize a 4x2 Latin hypercube starting from an initial plan:\n\n# Suppose we have an initial 4x2 plan\nX_start = np.array([[0.1, 0.3],[.1, .4],[.2, .9],[.9, .2]])\nprint(\"Initial plan:\")\nprint(X_start)\n# Search for a more space-filling plan\nX_opt = mmlhs(X_start, population=10, iterations=100, q=2)\nprint(\"Optimized plan:\")\nprint(X_opt)\n\nInitial plan:\n[[0.1 0.3]\n [0.1 0.4]\n [0.2 0.9]\n [0.9 0.2]]\nOptimized plan:\n[[0.2 0.4]\n [0.9 0.3]\n [0.1 0.2]\n [0.1 0.9]]\n\n\nFigure 36.9 shows the initial and optimized plans in 2D. The blue points represent the initial plan, while the red points represent the optimized plan.\n\n\n\n\n\n\n\n\nFigure 36.9: Comparison of the initial and optimized plans in 2D.\n\n\n\n\n\n\n\n\n36.3.7.2 The Function bestlh\nGenerates an optimized Latin hypercube by evolving the Morris-Mitchell criterion across multiple exponents (q values) and selecting the best plan.\n\ndef bestlh(n: int,\n           k: int,\n           population: int,\n           iterations: int,\n           p=1,\n           plot=False,\n           verbosity=0,\n           edges=0,\n           q_list=[1, 2, 5, 10, 20, 50, 100]) -> np.ndarray:\n    \"\"\"\n    Args:\n        n (int):\n            Number of points required in the Latin hypercube.\n        k (int):\n            Number of design variables (dimensions).\n        population (int):\n            Number of offspring in each generation of the evolutionary search.\n        iterations (int):\n            Number of generations for the evolutionary search.\n        p (int, optional):\n            The distance norm to use. p=1 for Manhattan (L1), p=2 for Euclidean (L2).\n            Defaults to 1 (faster than 2).\n        plot (bool, optional):\n            If True, a scatter plot of the optimized plan in the first two dimensions\n            will be displayed. Only if k>=2.  Defaults to False.\n        verbosity (int, optional):\n            Verbosity level. 0 is silent, 1 prints the best q value found. Defaults to 0.\n        edges (int, optional):\n            If 1, places centers of the extreme bins at the domain edges ([0,1]).\n            Otherwise, bins are fully contained within the domain, i.e. midpoints.\n            Defaults to 0.\n        q_list (list, optional):\n            A list of q values to optimize. Defaults to [1, 2, 5, 10, 20, 50, 100].\n            These values are used to evaluate the space-fillingness of the Latin\n            hypercube. The best plan is selected based on the lowest mmphi value.\n\n    Returns:\n        np.ndarray:\n            A 2D array of shape (n, k) representing an optimized Latin hypercube.\n    \"\"\"\n    if n < 2:\n        raise ValueError(\"Latin hypercubes require at least 2 points\")\n    if k < 2:\n        raise ValueError(\"Latin hypercubes are not defined for dim k < 2\")\n\n    # A list of exponents (q) to optimize\n\n    # Start with a random Latin hypercube\n    X_start = rlh(n, k, edges=edges)\n\n    # Allocate a 3D array to store the results for each q\n    # (shape: (n, k, number_of_q_values))\n    X3D = np.zeros((n, k, len(q_list)))\n\n    # Evolve the plan for each q in q_list\n    for i, q_val in enumerate(q_list):\n        if verbosity > 0:\n            print(f\"Now optimizing for q={q_val}...\")\n        X3D[:, :, i] = mmlhs(X_start, population, iterations, q_val)\n\n    # Sort the set of evolved plans according to the Morris-Mitchell criterion\n    index_order = mmsort(X3D, p=p)\n\n    # index_order is a 1-based array of plan indices; the first element is the best\n    best_idx = index_order[0] - 1\n    if verbosity > 0:\n        print(f\"Best lh found using q={q_list[best_idx]}...\")\n\n    # The best plan in 3D array order\n    X = X3D[:, :, best_idx]\n\n    # Plot the first two dimensions\n    if plot and (k >= 2):\n        plt.scatter(X[:, 0], X[:, 1], c=\"r\", marker=\"o\")\n        plt.title(f\"Morris-Mitchell optimum plan found using q={q_list[best_idx]}\")\n        plt.xlabel(\"x_1\")\n        plt.ylabel(\"x_2\")\n        plt.grid(True)\n        plt.show()\n\n    return X\n\n\nExample 36.14 (The Function bestlh) The bestlh function can be used to generate an optimized Latin hypercube sampling plan. The following code demonstrates how to use bestlh to create a 5x2 Latin hypercube with a population of 5 and 10 iterations:\n\nXbestlh= bestlh(n=5, k=2, population=5, iterations=10)\n\nFigure 36.10 shows the best Latin hypercube sampling in 2D. The red points represent the optimized plan.\n\n\n\n\n\n\n\n\nFigure 36.10: Best Latin Hypercube Sampling\n\n\n\n\n\n\nSorting all candidate plans in ascending order is not strictly necessary - after all, only the best one is truly of interest. Nonetheless, the added computational complexity is minimal (the vector will only ever contain as many elements as there are candidate \\(q\\) values, and only an index array is sorted, not the actual repository of plans). This sorting gives the reader the opportunity to compare, if desired, how different choices of \\(q\\) influence the resulting plans.",
      +    "text": "36.3 Designing a Sampling Plan\n\n36.3.1 Stratification\nA feature shared by all of the approximation models discussed in Forrester et al. (2008) is that they are more accurate in the vicinity of the points where we have evaluated the objective function. In later chapters we will delve into the laws that quantify our decaying trust in the model as we move away from a known, sampled point, but for the purposes of the present discussion we shall merely draw the intuitive conclusion that a uniform level of model accuracy throughout the design space requires a uniform spread of points. A sampling plan possessing this feature is said to be space-filling.\nThe most straightforward way of sampling a design space in a uniform fashion is by means of a rectangular grid of points. This is the full factorial sampling technique.\nHere is the simplified version of a Python function that will sample the unit hypercube at all levels in all dimensions, with the \\(k\\)-vector \\(q\\) containing the number of points required along each dimension, see https://github.com/sequential-parameter-optimization/spotPython/blob/main/src/spotpython/utils/sampling.py.\nThe variable Edges specifies whether we want the points to be equally spaced from edge to edge (Edges=1) or we want them to be in the centres of \\(n = q_1 \\times q_2 \\times \\ldots \\times q_k\\) bins filling the unit hypercube (for any other value of Edges).\n\ndef fullfactorial(q_param, Edges=1) -> np.ndarray:\n    \"\"\"Generates a full factorial sampling plan in the unit cube.\n\n    Args:\n        q (list or np.ndarray):\n            A list or array containing the number of points along each dimension (k-vector).\n        Edges (int, optional):\n            Determines spacing of points. If `Edges=1`, points are equally spaced from edge to edge (default).\n            Otherwise, points will be in the centers of n = q[0]*q[1]*...*q[k-1] bins filling the unit cube.\n\n    Returns:\n        (np.ndarray): Full factorial sampling plan as an array of shape (n, k), where n is the total number of points and k is the number of dimensions.\n\n    Raises:\n        ValueError: If any dimension in `q` is less than 2.\n    \"\"\"\n    q_levels = np.array(q_param) # Use a distinct variable for original levels\n    if np.min(q_levels) < 2:\n        raise ValueError(\"You must have at least two points per dimension.\")\n    \n    n = np.prod(q_levels)\n    k = len(q_levels)\n    X = np.zeros((n, k))\n    \n    # q_for_prod_calc is used for calculating repetitions, includes the phantom element.\n    # This matches the logic of the user-provided snippet where 'q' was modified.\n    q_for_prod_calc = np.append(q_levels, 1)\n\n    for j in range(k): # k is the original number of dimensions\n        # current_dim_levels is the number of levels for the current dimension j\n        # In the user's snippet, q[j] correctly refers to the original level count\n        # as j ranges from 0 to k-1, and q_for_prod_calc[j] = q_levels[j] for this range.\n        current_dim_levels = q_for_prod_calc[j] \n        \n        if Edges == 1:\n            one_d_slice = np.linspace(0, 1, int(current_dim_levels))\n        else:\n            # Corrected calculation for bin centers\n            if current_dim_levels == 1: # Should not be hit if np.min(q_levels) >= 2\n                one_d_slice = np.array([0.5])\n            else:\n                one_d_slice = np.linspace(1 / (2 * current_dim_levels), \n                                          1 - 1 / (2 * current_dim_levels), \n                                          int(current_dim_levels))\n        \n        column = np.array([])\n        # The product q_for_prod_calc[j + 1 : k] correctly calculates \n        # the product of remaining original dimensions' levels.\n        num_consecutive_repeats = np.prod(q_for_prod_calc[j + 1 : k])\n        \n        # This loop structure replicates the logic from the user's snippet\n        while len(column) < n:\n            for ll_idx in range(int(current_dim_levels)): # Iterate through levels of current dimension\n                val_to_repeat = one_d_slice[ll_idx]\n                column = np.append(column, np.ones(int(num_consecutive_repeats)) * val_to_repeat)\n        X[:, j] = column\n    return X\n\n\nq = [3, 2]\nX = fullfactorial(q, Edges=0)\nprint(X)\n\n[[0.16666667 0.25      ]\n [0.16666667 0.75      ]\n [0.5        0.25      ]\n [0.5        0.75      ]\n [0.83333333 0.25      ]\n [0.83333333 0.75      ]]\n\n\nFigure 36.3 shows the points in the unit hypercube for the case of 3x2 points.\n\n\n\n\n\n\n\n\nFigure 36.3: 2D Full Factorial Sampling (3x2 Points). Edges = 0\n\n\n\n\n\n\nX = fullfactorial(q, Edges=1)\nprint(X)\n\n[[0.  0. ]\n [0.  1. ]\n [0.5 0. ]\n [0.5 1. ]\n [1.  0. ]\n [1.  1. ]]\n\n\nFigure 36.4 shows the points in the unit hypercube for the case of 3x2 points with edges.\n\n\n\n\n\n\n\n\nFigure 36.4: 2D Full Factorial Sampling (3x2 Points). Edges = 1\n\n\n\n\n\nThe full factorial sampling plan method generates a uniform sampling design by creating a grid of points across all dimensions. For example, calling fullfactorial([3, 4, 5], 1) produces a three-dimensional sampling plan with 3, 4, and 5 levels along each dimension, respectively. While this approach satisfies the uniformity criterion, it has two significant limitations:\n\nRestricted Design Sizes: The method only works for designs where the total number of points \\(n\\) can be expressed as the product of the number of levels in each dimension, i.e., \\(n = q_1 \\times q_2 \\times \\cdots \\times q_k\\).\nOverlapping Projections: When the sampling points are projected onto individual axes, sets of points may overlap, reducing the effectiveness of the sampling plan. This can lead to non-uniform coverage in the projections, which may not fully represent the design space.\n\n\n\n36.3.2 Latin Squares and Random Latin Hypercubes\nTo improve the uniformity of projections for any individual variable, the range of that variable can be divided into a large number of equal-sized bins, and random subsamples of equal size can be generated within these bins. This method is called stratified random sampling. Extending this idea to all dimensions results in a stratified sampling plan, commonly implemented using Latin hypercube sampling.\n\nDefinition 36.3 (Latin Squares and Hypercubes) In the context of statistical sampling, a square grid containing sample positions is a Latin square if (and only if) there is only one sample in each row and each column. A Latin hypercube is the generalisation of this concept to an arbitrary number of dimensions, whereby each sample is the only one in each axis-aligned hyperplane containing it\n\nFor two-dimensional discrete variables, a Latin square ensures uniform projections. An \\((n \\times n)\\) Latin square is constructed by filling each row and column with a permutation of \\(\\{1, 2, \\dots, n\\}\\), ensuring each number appears only once per row and column.\n\nExample 36.4 (Latin Square) For \\(n = 4\\), a Latin square might look like this:\n2   1   3   4\n3   2   4   1\n1   4   2   3\n4   3   1   2\n\nLatin Hypercubes are the multidimensional extension of Latin squares. The design space is divided into equal-sized hypercubes (bins), and one point is placed in each bin. The placement ensures that moving along any axis from an occupied bin does not encounter another occupied bin. This guarantees uniform projections across all dimensions. To construct a Latin hypercube, the following steps are taken:\n\nRepresent the sampling plan as an \\(n \\times k\\) matrix \\(X\\), where \\(n\\) is the number of points and \\(k\\) is the number of dimensions.\nFill each column of \\(X\\) with random permutations of \\(\\{1, 2, \\dots, n\\}\\).\nNormalize the plan into the unit hypercube \\([0, 1]^k\\).\n\nThis approach ensures multidimensional stratification and uniformity in projections. Here is the code:\n\ndef rlh(n: int, k: int, edges: int = 0) -> np.ndarray:\n    # Initialize array\n    X = np.zeros((n, k), dtype=float)\n\n    # Fill with random permutations\n    for i in range(k):\n        X[:, i] = np.random.permutation(n)\n\n    # Adjust normalization based on the edges flag\n    if edges == 1:\n        # [X=0..n-1] -> [0..1]\n        X = X / (n - 1)\n    else:\n        # Points at true midpoints\n        # [X=0..n-1] -> [0.5/n..(n-0.5)/n]\n        X = (X + 0.5) / n\n\n    return X\n\n\nExample 36.5 (Random Latin Hypercube) The following code can be used to generate a 2D Latin hypercube with 5 points and edges=0:\n\nX = rlh(n=5, k=2, edges=0)\nprint(X)\n\n[[0.3 0.5]\n [0.5 0.9]\n [0.7 0.3]\n [0.9 0.7]\n [0.1 0.1]]\n\n\nFigure 36.5 shows the points in the unit hypercube for the case of 5 points with edges=0.\n\n\n\n\n\n\n\n\nFigure 36.5: 2D Latin Hypercube Sampling (5 Points, Edges=0)\n\n\n\n\n\n\n\nExample 36.6 (Random Latin Hypercube with Edges) The following code can be used to generate a 2D Latin hypercube with 5 points and edges=1:\n\nX = rlh(n=5, k=2, edges=1)\nprint(X)\n\n[[0.25 0.  ]\n [0.   0.75]\n [1.   1.  ]\n [0.5  0.5 ]\n [0.75 0.25]]\n\n\nFigure 36.6 shows the points in the unit hypercube for the case of 5 points with edges=1.\n\n\n\n\n\n\n\n\nFigure 36.6: 2D Latin Hypercube Sampling (5 Points, Edges=1)\n\n\n\n\n\n\n\n\n36.3.3 Space-filling Designs: Maximin Plans\nA widely adopted measure for assessing the uniformity, or ‘space-fillingness’, of a sampling plan is the maximin metric, initially proposed by Johnson et al. (1990). This criterion can be formally defined as follows.\nConsider a sampling plan \\(X\\). Let \\(d_1, d_2, \\ldots, d_m\\) represent the unique distances between all possible pairs of points within \\(X\\), arranged in ascending order. Furthermore, let \\(J_1, J_2, \\ldots, J_m\\) be defined such that \\(J_j\\) denotes the count of point pairs in \\(X\\) separated by the distance \\(d_j\\).\n\nDefinition 36.4 (Maximin plan) A sampling plan \\(X\\) is considered a maximin plan if, among all candidate plans, it maximizes the smallest inter-point distance \\(d_1\\). Among plans that satisfy this condition, it further minimizes \\(J_1\\), the number of pairs separated by this minimum distance.\n\nWhile this definition is broadly applicable to any collection of sampling plans, our focus is narrowed to Latin hypercube designs to preserve their desirable stratification properties. However, even within this restricted class, Definition 36.4 may identify multiple equivalent maximin designs. To address this, a more comprehensive ‘tie-breaker’ definition, as proposed by Morris and Mitchell (1995), is employed:\n\nDefinition 36.5 (Maximin plan with tie-breaker) A sampling plan \\(X\\) is designated as the maximin plan if it sequentially optimizes the following conditions: it maximizes \\(d_1\\); among those, it minimizes \\(J_1\\); among those, it maximizes \\(d_2\\); among those, it minimizes \\(J_2\\); and so forth, concluding with minimizing \\(J_m\\).\n\nJohnson et al. (1990) established that the maximin criterion (Definition 36.4) is equivalent to the D-optimality criterion used in linear regression. However, the extended maximin criterion incorporating a tie-breaker (Definition 36.5) is often preferred due to its intuitive nature and practical utility. Given that the sampling plans under consideration make no assumptions about model structure, the latter criterion (Definition 36.5) will be employed.\nTo proceed, a precise definition of ‘distance’ within these contexts is necessary. The p-norm is the most widely adopted metric for this purpose:\n\nDefinition 36.6 (p-norm) The p-norm of a vector \\(\\vec{x} = (x_1, x_2, \\ldots, x_k)\\) is defined as:\n\\[\nd_p(\\vec{x}^{(i_1)}, \\vec{x}^{(i_2)}) = \\left( \\sum_{j=1}^k |x_j^{(i_1)} - x_j^{(i_2)}|^p \\right)^{1/p}.\n\\tag{36.2}\\]\n\nWhen \\(p = 1\\), Equation 36.2 defines the rectangular distance, occasionally referred to as the Manhattan norm (an allusion to a grid-like city layout). Setting \\(p = 2\\) yields the Euclidean norm. The existing literature offers limited evidence to suggest the superiority of one norm over the other for evaluating sampling plans when no model structure assumptions are made. It is important to note, however, that the rectangular distance is considerably less computationally demanding. This advantage can be quite significant, particularly when evaluating large sampling plans.\nFor the computational implementation of Definition 36.5, the initial step involves constructing the vectors \\(d_1, d_2, \\ldots, d_m\\) and \\(J_1, J_2, \\ldots, J_m\\). The jd function facilitates this task.\n\n36.3.3.1 The Function jd\nThe function jd computes the distinct p-norm distances between all pairs of points in a given set and counts their occurrences. It returns two arrays: one for the distinct distances and another for their multiplicities.\n\ndef jd(X: np.ndarray, p: float = 1.0) -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"\n    Args:\n        X (np.ndarray):\n            A 2D array of shape (n, d) representing n points\n            in d-dimensional space.\n        p (float, optional):\n            The distance norm to use.\n            p=1 uses the Manhattan (L1) norm, while p=2 uses the\n            Euclidean (L2) norm. Defaults to 1.0 (Manhattan norm).\n\n    Returns:\n        (np.ndarray, np.ndarray):\n            A tuple (J, distinct_d), where:\n            - distinct_d is a 1D float array of unique,\n            sorted distances between points.\n            - J is a 1D integer array that provides\n            the multiplicity (occurrence count)\n            of each distance in distinct_d.\n    \"\"\"\n    n = X.shape[0]\n\n    # Allocate enough space for all pairwise distances\n    # (n*(n-1))/2 pairs for an n-point set\n    pair_count = n * (n - 1) // 2\n    d = np.zeros(pair_count, dtype=float)\n\n    # Fill the distance array\n    idx = 0\n    for i in range(n - 1):\n        for j in range(i + 1, n):\n            # Compute the p-norm distance\n            d[idx] = np.linalg.norm(X[i] - X[j], ord=p)\n            idx += 1\n\n    # Find unique distances and their multiplicities\n    distinct_d = np.unique(d)\n    J = np.zeros_like(distinct_d, dtype=int)\n    for i, val in enumerate(distinct_d):\n        J[i] = np.sum(d == val)\n    return J, distinct_d\n\n\nExample 36.7 (The Function jd) Consider a small 3-point set in 2D space, with points located at (0,0), (1,1), and (2,2) as shown in Figure 36.7. The distinct distances and their occurrences can be computed using the jd function, as shown in the following code:\n\n\n\n\n\n\n\n\nFigure 36.7: 3-Point Set in 2D Space\n\n\n\n\n\n\nJ, distinct_d = jd(X, p=2.0)\nprint(\"Distinct distances (d_i):\", distinct_d)\nprint(\"Occurrences (J_i):\", J)\n\nDistinct distances (d_i): [1.41421356 2.82842712]\nOccurrences (J_i): [2 1]\n\n\n\n\n\n\n36.3.4 Memory Management\nA computationally intensive part of the calculation performed with the jd-function is the creation of the vector \\(\\vec{d}\\) containing all pairwise distances. This is particularly true for large sampling plans; for instance, a 1000-point plan requires nearly half a million distance calculations.\n\nDefinition 36.7 (Pre-allocation of Memory) Pre-allocation of memory is a programming technique where a fixed amount of memory is reserved for a data structure (like an array or vector) before it is actually filled with data. This is done to avoid the computational overhead associated with dynamic memory allocation, which involves repeatedly requesting and resizing memory as new elements are added.\n\nConsequently, pre-allocating memory for the distance vector \\(\\vec{d}\\) is essential. This necessitates a slightly less direct method for computing the indices of \\(\\vec{d}\\), rather than appending each new element, which would involve costly dynamic memory allocation.\nThe implementation of Definition 36.5 is now required. Finding the most space-filling design involves pairwise comparisons. This problem can be approached using a ‘divide and conquer’ strategy, simplifying it to the task of selecting the better of two sampling plans. The function mm(X1,X2,p) is designed for this purpose. It returns an index indicating which of the two designs is more space-filling, or 0 if they are equally space-filling, based on the \\(p\\)-norm for distance computation.\n\n36.3.4.1 The Function mm\nThe function mm compares two sampling plans based on the Morris-Mitchell criterion. It uses the jd function to compute the distances and multiplicities, constructs vectors for comparison, and determines which plan is more space-filling.\n\ndef mm(X1: np.ndarray, X2: np.ndarray, p: Optional[float] = 1.0) -> int:\n    \"\"\"\n    Args:\n        X1 (np.ndarray): A 2D array representing the first sampling plan.\n        X2 (np.ndarray): A 2D array representing the second sampling plan.\n        p (float, optional): The distance metric. p=1 uses Manhattan (L1) distance,\n            while p=2 uses Euclidean (L2). Defaults to 1.0.\n\n    Returns:\n        int:\n            - 0 if both plans are identical or equally space-filling\n            - 1 if X1 is more space-filling\n            - 2 if X2 is more space-filling\n    \"\"\"\n    X1_sorted = X1[np.lexsort(np.rot90(X1))]\n    X2_sorted = X2[np.lexsort(np.rot90(X2))]\n    if np.array_equal(X1_sorted, X2_sorted):\n        return 0  # Identical sampling plans\n\n    # Compute distance multiplicities for each plan\n    J1, d1 = jd(X1, p)\n    J2, d2 = jd(X2, p)\n    m1, m2 = len(d1), len(d2)\n\n    # Construct V1 and V2: alternate distance and negative multiplicity\n    V1 = np.zeros(2 * m1)\n    V1[0::2] = d1\n    V1[1::2] = -J1\n\n    V2 = np.zeros(2 * m2)\n    V2[0::2] = d2\n    V2[1::2] = -J2\n\n    # Trim the longer vector to match the size of the shorter\n    m = min(m1, m2)\n    V1 = V1[:m]\n    V2 = V2[:m]\n\n    # Compare element-by-element:\n    # c[i] = 1 if V1[i] > V2[i], 2 if V1[i] < V2[i], 0 otherwise.\n    c = (V1 > V2).astype(int) + 2 * (V1 < V2).astype(int)\n\n    if np.sum(c) == 0:\n        # Equally space-filling\n        return 0\n    else:\n        # The first non-zero entry indicates which plan is better\n        idx = np.argmax(c != 0)\n        return c[idx]\n\n\nExample 36.8 (The Function mm) We can use the mm function to compare two sampling plans. The following code creates two 3-point sampling plans in 2D (shown in Figure 36.8) and compares them using the Morris-Mitchell criterion:\n\nX1 = np.array([[0.0, 0.0],[0.5, 0.5],[0.0, 1.0], [1.0, 1.0]])\nX2 = np.array([[0.1, 0.1],[0.4, 0.6],[0.1, 0.9], [0.9, 0.9]])\n\n\n\n\n\n\n\n\n\nFigure 36.8: Comparison of Two Sampling Plans\n\n\n\n\n\nWe can compare which plan has better space-filling (Morris-Mitchell). The output is either 0, 1, or 2 depending on which plan is more space-filling.\n\nbetter = mm(X1, X2, p=2.0)\nprint(f\"Plan {better} is more space-filling.\")\n\nPlan 1 is more space-filling.\n\n\n\n\n\n36.3.4.2 The Function mmphi\nSearching across a space of potential sampling plans can be accomplished by pairwise comparisons. An optimization algorithm could, in theory, be written with mm as the comparative objective. However, experimental evidence (Morris and Mitchell 1995) suggests that the resulting optimization landscape can be quite deceptive, making it difficult to search reliably. This difficulty arises because the comparison process terminates upon finding the first non-zero element in the comparison array c. Consequently, the remaining values in the distance (\\(d_1, d_2, ..., d_m\\)) and multiplicity (\\(J_1, J_2, ..., J_m\\)) arrays are disregarded. These disregarded values, however, might contain potentially useful ‘slope’ information about the global landscape for the optimization process.\nTo address this, Morris and Mitchell (1995) defined the following scalar-valued criterion function, which is used to rank competing sampling plans. This function, while based on the logic of Definition 36.5, incorporates the complete vectors \\(d_1, d_2, ..., d_m\\) and \\(J_1, J_2, ..., J_m\\).\n\nDefinition 36.8 (Morris-Mitchell Criterion) The Morris-Mitchell criterion is defined as:\n\\[\n\\Phi_q (X) = \\left(\\sum_{j=1}^m J_j d_j^{-q}\\right)^{1/q},\n\\tag{36.3}\\]\nwhere \\(X\\) is the sampling plan, \\(d_j\\) is the distance between points, \\(J_j\\) is the multiplicity of that distance, and \\(q\\) is a user-defined exponent. The parameter \\(q\\) can be adjusted to control the influence of smaller distances on the overall metric.\n\nThe smaller the value of \\(\\Phi_q\\), the better the space-filling properties of \\(X\\) will be.\nThe function mmphi computes the Morris-Mitchell sampling plan quality criterion for a given sampling plan. It takes a 2D array of points and calculates the space-fillingness metric based on the distances between points. This can be implemented in Python as follows:\n\ndef mmphi(X: np.ndarray,\n          q: Optional[float] = 2.0,\n          p: Optional[float] = 1.0) -> float:\n    \"\"\"\n    Args:\n        X (np.ndarray):\n            A 2D array representing the sampling plan,\n            where each row is a point in\n            d-dimensional space (shape: (n, d)).\n        q (float, optional):\n            Exponent used in the computation of the metric.\n            Defaults to 2.0.\n        p (float, optional):\n            The distance norm to use.\n            For example, p=1 is Manhattan (L1),\n            p=2 is Euclidean (L2). Defaults to 1.0.\n\n    Returns:\n        float:\n            The space-fillingness metric Phiq. Larger values typically indicate a more\n            space-filling plan according to the Morris-Mitchell criterion.\n    \"\"\"\n    # Compute the distance multiplicities: J, and unique distances: d\n    J, d = jd(X, p)\n    # Summation of J[i] * d[i]^(-q), then raised to 1/q\n    # This follows the Morris-Mitchell definition.\n    Phiq = np.sum(J * (d ** (-q))) ** (1.0 / q)\n    return Phiq\n\n\nExample 36.9 (The Function mmphi) We can use the mmphi function to evaluate the space-filling quality of the two sampling plans from Example 36.8. The following code uses these two 3-point sampling plans in 2D and computes their quality using the Morris-Mitchell criterion:\n\n# Two simple sampling plans from above\nquality1 = mmphi(X1, q=2, p=2)\nquality2 = mmphi(X2, q=2, p=2)\nprint(f\"Quality of sampling plan X1:  {quality1}\")\nprint(f\"Quality of sampling plan X2:  {quality2}\")\n\nQuality of sampling plan X1:  2.91547594742265\nQuality of sampling plan X2:  3.917162046269215\n\n\n\nThis equation provides a more compact representation of the maximin criterion, but the selection of the \\(q\\) value is an important consideration. Larger values of \\(q\\) ensure that terms in the sum corresponding to smaller inter-point distances (the \\(d_j\\) values, which are sorted in ascending order) have a dominant influence. As a result, \\(\\Phi_q\\) will rank sampling plans in a way that closely emulates the original maximin definition (Definition 36.5). This implies that the optimization landscape might retain the challenging characteristics that the \\(\\Phi_q\\) metric, especially with smaller \\(q\\) values, is intended to alleviate. Conversely, smaller \\(q\\) values tend to produce a \\(\\Phi_q\\) landscape that, while not perfectly aligning with the original definition, is generally more conducive to optimization.\nTo illustrate the relationship between Equation 36.3 and the maximin criterion of Definition 36.5, sets of 50 random Latin hypercubes of varying sizes and dimensionalities were considered by Forrester et al. (2008). The correlation plots from this analysis suggest that as the sampling plan size increases, a smaller \\(q\\) value is needed for the \\(\\Phi_q\\)-based ranking to closely match the ranking derived from Definition 36.5.\nRankings based on both the direct maximin comparison (mm) and the \\(\\Phi_q\\) metric (mmphi), determined using a simple bubble sort algorithm, are implemented in the Python function mmsort.\n\n\n36.3.4.3 The Function mmsort\nThe function mmsort is designed to rank multiple sampling plans based on their space-filling properties using the Morris-Mitchell criterion. It takes a 3D array of sampling plans and returns the indices of the plans sorted in ascending order of their space-filling quality.\n\ndef mmsort(X3D: np.ndarray, p: Optional[float] = 1.0) -> np.ndarray:\n    \"\"\"\n    Args:\n        X3D (np.ndarray):\n            A 3D NumPy array of shape (n, d, m), where m is the number of\n            sampling plans, and each plan is an (n, d) matrix of points.\n        p (float, optional):\n            The distance metric to use. p=1 for Manhattan (L1), p=2 for\n            Euclidean (L2). Defaults to 1.0.\n\n    Returns:\n        np.ndarray:\n            A 1D integer array of length m that holds the plan indices in\n            ascending order of space-filling quality. The first index in the\n            returned array corresponds to the most space-filling plan.\n    \"\"\"\n    # Number of plans (m)\n    m = X3D.shape[2]\n\n    # Create index array (1-based to match original MATLAB convention)\n    Index = np.arange(1, m + 1)\n\n    swap_flag = True\n    while swap_flag:\n        swap_flag = False\n        i = 0\n        while i < m - 1:\n            # Compare plan at Index[i] vs. Index[i+1] using mm()\n            # Note: subtract 1 from each index to convert to 0-based array indexing\n            if mm(X3D[:, :, Index[i] - 1], X3D[:, :, Index[i + 1] - 1], p) == 2:\n                # Swap indices if the second plan is more space-filling\n                Index[i], Index[i + 1] = Index[i + 1], Index[i]\n                swap_flag = True\n            i += 1\n\n    return Index\n\n\nExample 36.10 (The Function mmsort) The mmsort function can be used to rank multiple sampling plans based on their space-filling properties. The following code demonstrates how to use mmsort to compare two 3-point sampling plans in 3D space:\nSuppose we have two 3-point sampling plans X1 and X1 from above. They are sorted using the Morris-Mitchell criterion with \\(p=2.0\\). For example, the output [1, 2] indicates that X1 is more space-filling than X2:\n\nX3D = np.stack([X1, X2], axis=2)\nranking = mmsort(X3D, p=2.0)\nprint(ranking)\n\n[1 2]\n\n\n\nTo determine the optimal Latin hypercube for a specific application, a recommended approach by Morris and Mitchell (1995) involves minimizing \\(\\Phi_q\\) for a set of \\(q\\) values (1, 2, 5, 10, 20, 50, and 100). Subsequently, the best plan from these results is selected based on the actual maximin definition. The mmsort function can be utilized for this purpose: a 3D matrix, X3D, can be constructed where each 2D slice represents the best sampling plan found for each \\(\\Phi_q\\). Applying mmsort(X3D,1) then ranks these plans according to Definition 36.5, using the rectangular distance metric. The subsequent discussion will address the methods for finding these optimized \\(\\Phi_q\\) designs.\n\n\n36.3.4.4 The Function phisort\nphisort only differs from mmsort in having \\(q\\) as an additional argument, as well as the comparison line being:\nif mmphi(X3D[:, :, Index[i] - 1], q=q, p=p) >\n    mmphi(X3D[:, :, Index[i + 1] - 1], q=q, p=p):\n\ndef phisort(X3D: np.ndarray,\n            q: Optional[float] = 2.0,\n            p: Optional[float] = 1.0) -> np.ndarray:\n    \"\"\"\n    Args:\n        X3D (np.ndarray):\n            A 3D array of shape (n, d, m),\n            where m is the number of sampling plans.\n        q (float, optional):\n            Exponent for the mmphi metric. Defaults to 2.0.\n        p (float, optional):\n            Distance norm for mmphi.\n            p=1 is Manhattan; p=2 is Euclidean.\n            Defaults to 1.0.\n\n    Returns:\n        np.ndarray:\n            A 1D integer array of length m, giving the plan indices in ascending\n            order of mmphi. The first index in the returned array corresponds\n            to the numerically lowest mmphi value.\n    \"\"\"\n    # Number of 2D sampling plans\n    m = X3D.shape[2]\n    # Create a 1-based index array\n    Index = np.arange(1, m + 1)\n    # Bubble-sort: plan with lower mmphi() climbs toward the front\n    swap_flag = True\n    while swap_flag:\n        swap_flag = False\n        for i in range(m - 1):\n            # Retrieve mmphi values for consecutive plans\n            val_i = mmphi(X3D[:, :, Index[i] - 1], q=q, p=p)\n            val_j = mmphi(X3D[:, :, Index[i + 1] - 1], q=q, p=p)\n\n            # Swap if the left plan's mmphi is larger (i.e. 'worse')\n            if val_i > val_j:\n                Index[i], Index[i + 1] = Index[i + 1], Index[i]\n                swap_flag = True\n    return Index\n\n\nExample 36.11 (The Function phisort) The phisort function can be used to rank multiple sampling plans based on the Morris-Mitchell criterion. The following code demonstrates how to use phisort to compare two 3-point sampling plans in 3D space:\n\nX1 = bestlh(n=5, k=2, population=5, iterations=10)\nX2 = bestlh(n=5, k=2, population=15, iterations=20)\nX3 = bestlh(n=5, k=2, population=25, iterations=30)\n# Map X1 and X2 so that X3D has the two sampling plans\n# in X3D[:, :, 0] and X3D[:, :, 1]\nX3D = np.array([X1, X2])\nprint(phisort(X3D))\nX3D = np.array([X3, X2])\nprint(phisort(X3D))\n\n[2 1]\n[2 1]\n\n\n\n\n\n\n36.3.5 Optimizing the Morris-Mitchell Criterion \\(\\Phi_q\\)\nOnce a criterion for assessing the quality of a Latin hypercube sampling plan has been established, a systematic method for optimizing this metric across the space of Latin hypercubes is required. This task is non-trivial; as the reader may recall from the earlier discussion on Latin squares, this search space is vast. In fact, its vastness means that for many practical applications, locating the globally optimal solution is often infeasible. Therefore, the objective becomes finding the best possible sampling plan achievable within a specific computational time budget.\nThis budget is influenced by the computational cost associated with obtaining each objective function value. Determining the optimal allocation of total computational effort—between generating the sampling plan and actually evaluating the objective function at the selected points—remains an open research question. However, it is typical for no more than approximately 5% of the total available time to be allocated to the task of generating the sampling plan itself.\nForrester et al. (2008) draw an analogy to the process of devising a revision timetable before an exam. While a well-structured timetable enhances the effectiveness of revision, an excessive amount of the revision time itself should not be consumed by the planning phase.\nA significant challenge in devising a sampling plan optimizer is ensuring that the search process remains confined to the space of valid Latin hypercubes. As previously discussed, the defining characteristic of a Latin hypercube \\(X\\) is that each of its columns represents a permutation of the possible levels for the corresponding variable. Consequently, the smallest modification that can be applied to a Latin hypercube—without compromising its crucial multidimensional stratification property—involves swapping two elements within any single column of \\(X\\). A Python implementation for ‘mutating’ a Latin hypercube through such an operation, generalized to accommodate random changes applied to multiple sites, is provided below:\n\n36.3.5.1 The Function perturb()\nThe function perturb randomly swaps elements in a Latin hypercube sampling plan. It takes a 2D array representing the sampling plan and performs a specified number of random element swaps, ensuring that the result remains a valid Latin hypercube.\n\ndef perturb(X: np.ndarray,\n            PertNum: Optional[int] = 1) -> np.ndarray:\n    \"\"\"\n    Args:\n        X (np.ndarray):\n            A 2D array (sampling plan) of shape (n, k),\n            where each row is a point\n            and each column is a dimension.\n        PertNum (int, optional):\n            The number of element swaps (perturbations)\n            to perform. Defaults to 1.\n\n    Returns:\n        np.ndarray:\n            The perturbed sampling plan,\n            identical in shape to the input, with\n            one or more random column swaps executed.\n    \"\"\"\n    # Get dimensions of the plan\n    n, k = X.shape\n    if n < 2 or k < 2:\n        raise ValueError(\"Latin hypercubes require at least 2 points and 2 dimensions\")\n    for _ in range(PertNum):\n        # Pick a random column\n        col = int(np.floor(np.random.rand() * k))\n        # Pick two distinct row indices\n        el1, el2 = 0, 0\n        while el1 == el2:\n            el1 = int(np.floor(np.random.rand() * n))\n            el2 = int(np.floor(np.random.rand() * n))\n        # Swap the two selected elements in the chosen column\n        X[el1, col], X[el2, col] = X[el2, col], X[el1, col]\n    return X\n\n\nExample 36.12 (The Function perturb()) The perturb function can be used to randomly swap elements in a Latin hypercube sampling plan. The following code demonstrates how to use perturb to create a perturbed version of a 4x2 sampling plan:\n\nX_original = np.array([[1, 3],[2, 4],[3, 1],[4, 2]])\nprint(\"Original Sampling Plan:\")\nprint(X_original)\nprint(\"Perturbed Sampling Plan:\")\nX_perturbed = perturb(X_original, PertNum=1)\nprint(X_perturbed)\n\nOriginal Sampling Plan:\n[[1 3]\n [2 4]\n [3 1]\n [4 2]]\nPerturbed Sampling Plan:\n[[1 3]\n [2 2]\n [3 1]\n [4 4]]\n\n\n\nForrester et al. (2008) uses the term ‘mutation’, because this problem lends itself to nature-inspired computation. Morris and Mitchell (1995) use a simulated annealing algorithm, the detailed pseudocode of which can be found in their paper. As an alternative, a method based on evolutionary operation (EVOP) is offered by Forrester et al. (2008).\n\n\n\n36.3.6 Evolutionary Operation\nAs introduced by Box (1957), evolutionary operation was designed to optimize chemical processes. The current parameters of the reaction would be recorded in a box at the centre of a board, with a series of ‘offspring’ boxes along the edges containing values of the parameters slightly altered with respect to the central, ‘parent’ values. Once the reaction was completed for all of these sets of variable values and the corresponding yields recorded, the contents of the central box would be replaced with that of the setup with the highest yield and this would then become the parent of a new set of peripheral boxes.\nThis is generally viewed as a local search procedure, though this depends on the mutation step sizes, that is on the differences between the parent box and its offspring. The longer these steps, the more global is the scope of the search.\nFor the purposes of the Latin hypercube search, a variable scope strategy is applied. The process starts with a long step length (that is a relatively large number of swaps within the columns) and, as the search progresses, the current best basin of attraction is gradually approached by reducing the step length to a single change.\nIn each generation the parent is mutated (randomly, using the perturb function) a pertnum number of times. The sampling plan that yields the smallest \\(\\Phi_q\\) value (as per the Morris-Mitchell criterion, calculated usingmmphi) among all offspring and the parent is then selected; in evolutionary computation parlance this selection philosophy is referred to as elitism.\nThe EVOP based search for space-filling Latin hypercubes is thus a truly evolutionary process: the optimized sampling plan results from the nonrandom survival of random variations.\n\n\n36.3.7 Putting it all Together\nAll the pieces of the optimum Latin hypercube sampling process puzzle are now in place: the random hypercube generator as a starting point for the optimization process, the ‘spacefillingness’ metric that needs to be optimized, the optimization engine that performs this task and the comparison function that selects the best of the optima found for the various \\(q\\)’s. These pieces just need to be put into a sequence. Here is the Python embodiment of the completed puzzle. It results in a function bestlh that uses the function mmlhs to find the best Latin hypercube sampling plan for a given set of parameters.\n\n36.3.7.1 The Function mmlhs\nPerforms an evolutionary search (using perturbations) to find a Morris-Mitchell optimal Latin hypercube, starting from an initial plan X_start.\nThis function does the following:\n\nInitializes a “best” Latin hypercube (X_best) from the provided X_start.\nIteratively perturbs X_best to create offspring.\nEvaluates the space-fillingness of each offspring via the Morris-Mitchell metric (using mmphi).\nUpdates the best plan whenever a better offspring is found.\n\n\ndef mmlhs(X_start: np.ndarray,\n          population: int,\n          iterations: int,\n          q: Optional[float] = 2.0,\n          plot=False) -> np.ndarray:\n    \"\"\"\n    Args:\n        X_start (np.ndarray):\n            A 2D array of shape (n, k) providing the initial Latin hypercube\n            (n points in k dimensions).\n        population (int):\n            Number of offspring to create in each generation.\n        iterations (int):\n            Total number of generations to run the evolutionary search.\n        q (float, optional):\n            The exponent used by the Morris-Mitchell space-filling criterion.\n            Defaults to 2.0.\n        plot (bool, optional):\n            If True, a simple scatter plot of the first two dimensions will be\n            displayed at each iteration. Only if k >= 2. Defaults to False.\n\n    Returns:\n        np.ndarray:\n            A 2D array representing the most space-filling Latin hypercube found\n            after all iterations, of the same shape as X_start.\n    \"\"\"\n    n = X_start.shape[0]\n    if n < 2:\n        raise ValueError(\"Latin hypercubes require at least 2 points\")\n    k = X_start.shape[1]\n    if k < 2:\n        raise ValueError(\"Latin hypercubes are not defined for dim k < 2\")\n    # Initialize best plan and its metric\n    X_best = X_start.copy()\n    Phi_best = mmphi(X_best, q=q)\n    # After 85% of iterations, reduce the mutation rate to 1\n    leveloff = int(np.floor(0.85 * iterations))\n    for it in range(1, iterations + 1):\n        # Decrease number of mutations over time\n        if it < leveloff:\n            mutations = int(round(1 + (0.5 * n - 1) * (leveloff - it) / (leveloff - 1)))\n        else:\n            mutations = 1\n        X_improved = X_best.copy()\n        Phi_improved = Phi_best\n        # Create offspring, evaluate, and keep the best\n        for _ in range(population):\n            X_try = perturb(X_best.copy(), mutations)\n            Phi_try = mmphi(X_try, q=q)\n\n            if Phi_try < Phi_improved:\n                X_improved = X_try\n                Phi_improved = Phi_try\n        # Update the global best if we found a better plan\n        if Phi_improved < Phi_best:\n            X_best = X_improved\n            Phi_best = Phi_improved\n        # Simple visualization of the first two dimensions\n        if plot and (X_best.shape[1] >= 2):\n            plt.clf()\n            plt.scatter(X_best[:, 0], X_best[:, 1], marker=\"o\")\n            plt.grid(True)\n            plt.title(f\"Iteration {it} - Current Best Plan\")\n            plt.pause(0.01)\n    return X_best\n\n\nExample 36.13 (The Function mmlhs) The mmlhs function can be used to optimize a Latin hypercube sampling plan. The following code demonstrates how to use mmlhs to optimize a 4x2 Latin hypercube starting from an initial plan:\n\n# Suppose we have an initial 4x2 plan\nX_start = np.array([[0.1, 0.3],[.1, .4],[.2, .9],[.9, .2]])\nprint(\"Initial plan:\")\nprint(X_start)\n# Search for a more space-filling plan\nX_opt = mmlhs(X_start, population=10, iterations=100, q=2)\nprint(\"Optimized plan:\")\nprint(X_opt)\n\nInitial plan:\n[[0.1 0.3]\n [0.1 0.4]\n [0.2 0.9]\n [0.9 0.2]]\nOptimized plan:\n[[0.9 0.3]\n [0.2 0.4]\n [0.1 0.2]\n [0.1 0.9]]\n\n\nFigure 36.9 shows the initial and optimized plans in 2D. The blue points represent the initial plan, while the red points represent the optimized plan.\n\n\n\n\n\n\n\n\nFigure 36.9: Comparison of the initial and optimized plans in 2D.\n\n\n\n\n\n\n\n\n36.3.7.2 The Function bestlh\nGenerates an optimized Latin hypercube by evolving the Morris-Mitchell criterion across multiple exponents (q values) and selecting the best plan.\n\ndef bestlh(n: int,\n           k: int,\n           population: int,\n           iterations: int,\n           p=1,\n           plot=False,\n           verbosity=0,\n           edges=0,\n           q_list=[1, 2, 5, 10, 20, 50, 100]) -> np.ndarray:\n    \"\"\"\n    Args:\n        n (int):\n            Number of points required in the Latin hypercube.\n        k (int):\n            Number of design variables (dimensions).\n        population (int):\n            Number of offspring in each generation of the evolutionary search.\n        iterations (int):\n            Number of generations for the evolutionary search.\n        p (int, optional):\n            The distance norm to use. p=1 for Manhattan (L1), p=2 for Euclidean (L2).\n            Defaults to 1 (faster than 2).\n        plot (bool, optional):\n            If True, a scatter plot of the optimized plan in the first two dimensions\n            will be displayed. Only if k>=2.  Defaults to False.\n        verbosity (int, optional):\n            Verbosity level. 0 is silent, 1 prints the best q value found. Defaults to 0.\n        edges (int, optional):\n            If 1, places centers of the extreme bins at the domain edges ([0,1]).\n            Otherwise, bins are fully contained within the domain, i.e. midpoints.\n            Defaults to 0.\n        q_list (list, optional):\n            A list of q values to optimize. Defaults to [1, 2, 5, 10, 20, 50, 100].\n            These values are used to evaluate the space-fillingness of the Latin\n            hypercube. The best plan is selected based on the lowest mmphi value.\n\n    Returns:\n        np.ndarray:\n            A 2D array of shape (n, k) representing an optimized Latin hypercube.\n    \"\"\"\n    if n < 2:\n        raise ValueError(\"Latin hypercubes require at least 2 points\")\n    if k < 2:\n        raise ValueError(\"Latin hypercubes are not defined for dim k < 2\")\n\n    # A list of exponents (q) to optimize\n\n    # Start with a random Latin hypercube\n    X_start = rlh(n, k, edges=edges)\n\n    # Allocate a 3D array to store the results for each q\n    # (shape: (n, k, number_of_q_values))\n    X3D = np.zeros((n, k, len(q_list)))\n\n    # Evolve the plan for each q in q_list\n    for i, q_val in enumerate(q_list):\n        if verbosity > 0:\n            print(f\"Now optimizing for q={q_val}...\")\n        X3D[:, :, i] = mmlhs(X_start, population, iterations, q_val)\n\n    # Sort the set of evolved plans according to the Morris-Mitchell criterion\n    index_order = mmsort(X3D, p=p)\n\n    # index_order is a 1-based array of plan indices; the first element is the best\n    best_idx = index_order[0] - 1\n    if verbosity > 0:\n        print(f\"Best lh found using q={q_list[best_idx]}...\")\n\n    # The best plan in 3D array order\n    X = X3D[:, :, best_idx]\n\n    # Plot the first two dimensions\n    if plot and (k >= 2):\n        plt.scatter(X[:, 0], X[:, 1], c=\"r\", marker=\"o\")\n        plt.title(f\"Morris-Mitchell optimum plan found using q={q_list[best_idx]}\")\n        plt.xlabel(\"x_1\")\n        plt.ylabel(\"x_2\")\n        plt.grid(True)\n        plt.show()\n\n    return X\n\n\nExample 36.14 (The Function bestlh) The bestlh function can be used to generate an optimized Latin hypercube sampling plan. The following code demonstrates how to use bestlh to create a 5x2 Latin hypercube with a population of 5 and 10 iterations:\n\nXbestlh= bestlh(n=5, k=2, population=5, iterations=10)\n\nFigure 36.10 shows the best Latin hypercube sampling in 2D. The red points represent the optimized plan.\n\n\n\n\n\n\n\n\nFigure 36.10: Best Latin Hypercube Sampling\n\n\n\n\n\n\nSorting all candidate plans in ascending order is not strictly necessary - after all, only the best one is truly of interest. Nonetheless, the added computational complexity is minimal (the vector will only ever contain as many elements as there are candidate \\(q\\) values, and only an index array is sorted, not the actual repository of plans). This sorting gives the reader the opportunity to compare, if desired, how different choices of \\(q\\) influence the resulting plans.",
           "crumbs": [
             "Numerical Methods",
             "36  Sampling Plans"
      @@ -4810,7 +4810,7 @@
           "href": "100_ddmo_eda.html#exploratory-data-analysis",
           "title": "45  Basic Statistics and Data Analysis",
           "section": "",
      -    "text": "45.1.1 Histograms\nCreating a histogram and calculating the probabilities from a dataset can be approached with scientific precision\n\nData Collection: Obtain the dataset you wish to analyze. This dataset could represent any quantitative measure, such to examine its distribution.\nDecide on the Number of Bins: The number of bins influences the histogram’s granularity. There are several statistical rules to determine an optimal number of bins:\n\nSquare-root rule: suggests using the square root of the number of data points as the number of bins.\nSturges’ formula: \\(k = 1 + 3.322 \\log_{10}(n)\\), where \\(n\\) is the number of data points and \\(k\\) is the suggested number of bins.\nFreedman-Diaconis rule: uses the interquartile range (IQR) and the cube root of the number of data points \\(n\\) to calculate bin width as \\(2 \\dfrac{IQR}{n^{1/3}}\\).\n\nDetermine Range and Bin Width: Calculate the range of data by subtracting the minimum data point value from the maximum. Divide this range by the number of bins to determine the width of each bin.\nAllocate Data Points to Bins: Iterate through the data, sorting each data point into the appropriate bin based on its value.\nDraw the Histogram: Use a histogram to visualize the frequency or relative frequency (probability) of data points within each bin.\nCalculate Probabilities: The relative frequency of data within each bin represents the probability of a randomly selected data point falling within that bin’s range.\n\nBelow is a Python script that demonstrates how to generate a histogram and compute probabilities using the matplotlib library for visualization and numpy for data manipulation.\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Sample data: Randomly generated for demonstration\ndata = np.random.normal(0, 1, 1000)  # 1000 data points with a normal distribution\n\n# Step 2: Decide on the number of bins\nnum_bins = int(np.ceil(1 + 3.322 * np.log10(len(data))))  # Sturges' formula\n\n# Step 3: Determine range and bin width -- handled internally by matplotlib\n\n# Steps 4 & 5: Sort data into bins and draw the histogram\nfig, ax = plt.subplots()\nn, bins, patches = ax.hist(data, bins=num_bins, density=True, alpha=0.75, edgecolor='black')\n\n# Calculate probabilities (relative frequencies) manually, if needed\nbin_width = np.diff(bins)  # np.diff finds the difference between adjacent bin boundaries\nprobabilities = n * bin_width  # n is already normalized to form a probability density if `density=True`\n\n# Adding labels and title for clarity\nax.set_xlabel('Data Value')\nax.set_ylabel('Probability Density')\nax.set_title('Histogram with Probability Density')\n\n\n\n\n\n\nText(0.5, 1.0, 'Histogram with Probability Density')\n\n\n(a) Histogram with Probability Density\n\n\n\n\n\n\n\n\n\n\n(b)\n\n\n\n\n\n\nFigure 45.1\n\n\n\n\n\nfor i, prob in enumerate(probabilities):\n    print(f\"Bin {i+1} Probability: {prob:.4f}\")\n\n# Ensure probabilities sum to 1 (or very close, due to floating-point arithmetic)\nprint(f\"Sum of probabilities: {np.sum(probabilities)}\")\n\nBin 1 Probability: 0.0030\nBin 2 Probability: 0.0080\nBin 3 Probability: 0.0480\nBin 4 Probability: 0.0920\nBin 5 Probability: 0.1830\nBin 6 Probability: 0.2290\nBin 7 Probability: 0.2160\nBin 8 Probability: 0.1370\nBin 9 Probability: 0.0570\nBin 10 Probability: 0.0230\nBin 11 Probability: 0.0040\nSum of probabilities: 1.0\n\n\nThis code segment goes through the necessary steps to generate a histogram and calculate probabilities for a synthetic dataset. It demonstrates important scientific and computational practices including binning, visualization, and probability calculation in Python.\nKey Points:\n\nThe histogram represents the distribution of data, with the histogram’s bins outlining the data’s spread and density.\nThe option density=True in ax.hist() normalizes the histogram so that the total area under the histogram sums to 1, thereby converting frequencies to probability densities.\nThe choice of bin number and width has a significant influence on the histogram’s shape and the insights that can be drawn from it, highlighting the importance of selecting appropriate binning strategies based on the dataset’s characteristics and the analysis objectives.\nVideo: Histograms, Clearly Explained\n\n\n\n45.1.2 Boxplots\n\nVideo: Boxplots are Awesome",
      +    "text": "45.1.1 Histograms\nCreating a histogram and calculating the probabilities from a dataset can be approached with scientific precision\n\nData Collection: Obtain the dataset you wish to analyze. This dataset could represent any quantitative measure, such to examine its distribution.\nDecide on the Number of Bins: The number of bins influences the histogram’s granularity. There are several statistical rules to determine an optimal number of bins:\n\nSquare-root rule: suggests using the square root of the number of data points as the number of bins.\nSturges’ formula: \\(k = 1 + 3.322 \\log_{10}(n)\\), where \\(n\\) is the number of data points and \\(k\\) is the suggested number of bins.\nFreedman-Diaconis rule: uses the interquartile range (IQR) and the cube root of the number of data points \\(n\\) to calculate bin width as \\(2 \\dfrac{IQR}{n^{1/3}}\\).\n\nDetermine Range and Bin Width: Calculate the range of data by subtracting the minimum data point value from the maximum. Divide this range by the number of bins to determine the width of each bin.\nAllocate Data Points to Bins: Iterate through the data, sorting each data point into the appropriate bin based on its value.\nDraw the Histogram: Use a histogram to visualize the frequency or relative frequency (probability) of data points within each bin.\nCalculate Probabilities: The relative frequency of data within each bin represents the probability of a randomly selected data point falling within that bin’s range.\n\nBelow is a Python script that demonstrates how to generate a histogram and compute probabilities using the matplotlib library for visualization and numpy for data manipulation.\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Sample data: Randomly generated for demonstration\ndata = np.random.normal(0, 1, 1000)  # 1000 data points with a normal distribution\n\n# Step 2: Decide on the number of bins\nnum_bins = int(np.ceil(1 + 3.322 * np.log10(len(data))))  # Sturges' formula\n\n# Step 3: Determine range and bin width -- handled internally by matplotlib\n\n# Steps 4 & 5: Sort data into bins and draw the histogram\nfig, ax = plt.subplots()\nn, bins, patches = ax.hist(data, bins=num_bins, density=True, alpha=0.75, edgecolor='black')\n\n# Calculate probabilities (relative frequencies) manually, if needed\nbin_width = np.diff(bins)  # np.diff finds the difference between adjacent bin boundaries\nprobabilities = n * bin_width  # n is already normalized to form a probability density if `density=True`\n\n# Adding labels and title for clarity\nax.set_xlabel('Data Value')\nax.set_ylabel('Probability Density')\nax.set_title('Histogram with Probability Density')\n\n\n\n\n\n\nText(0.5, 1.0, 'Histogram with Probability Density')\n\n\n(a) Histogram with Probability Density\n\n\n\n\n\n\n\n\n\n\n(b)\n\n\n\n\n\n\nFigure 45.1\n\n\n\n\n\nfor i, prob in enumerate(probabilities):\n    print(f\"Bin {i+1} Probability: {prob:.4f}\")\n\n# Ensure probabilities sum to 1 (or very close, due to floating-point arithmetic)\nprint(f\"Sum of probabilities: {np.sum(probabilities)}\")\n\nBin 1 Probability: 0.0020\nBin 2 Probability: 0.0170\nBin 3 Probability: 0.0500\nBin 4 Probability: 0.1180\nBin 5 Probability: 0.2120\nBin 6 Probability: 0.2330\nBin 7 Probability: 0.1790\nBin 8 Probability: 0.1330\nBin 9 Probability: 0.0370\nBin 10 Probability: 0.0120\nBin 11 Probability: 0.0070\nSum of probabilities: 1.0\n\n\nThis code segment goes through the necessary steps to generate a histogram and calculate probabilities for a synthetic dataset. It demonstrates important scientific and computational practices including binning, visualization, and probability calculation in Python.\nKey Points:\n\nThe histogram represents the distribution of data, with the histogram’s bins outlining the data’s spread and density.\nThe option density=True in ax.hist() normalizes the histogram so that the total area under the histogram sums to 1, thereby converting frequencies to probability densities.\nThe choice of bin number and width has a significant influence on the histogram’s shape and the insights that can be drawn from it, highlighting the importance of selecting appropriate binning strategies based on the dataset’s characteristics and the analysis objectives.\nVideo: Histograms, Clearly Explained\n\n\n\n45.1.2 Boxplots\n\nVideo: Boxplots are Awesome",
           "crumbs": [
             "Data-Driven Modeling and Optimization",
             "45  Basic Statistics and Data Analysis"
      @@ -4898,7 +4898,7 @@
           "href": "100_ddmo_eda.html#the-uniform-distribution-2",
           "title": "45  Basic Statistics and Data Analysis",
           "section": "45.9 The Uniform Distribution",
      -    "text": "45.9 The Uniform Distribution\nThis variable is defined in the interval \\([a,b]\\). We write it as \\(X \\sim U[a,b]\\). Its density and cumulative distribution functions are, respectively, \\[\nf_X(x) = \\frac{I_{[a,b]}(x)}{b-a},  \\quad\\quad F_X(x) = \\frac{1}{b-a}\\int\\limits_{-\\infty}\\limits^x I_{[a,b]}(t) \\mathrm{d}t = \\frac{x-a}{b-a},\n\\] where \\(I_{[a,b]}(\\cdot)\\) is the indicator function of the interval \\([a,b]\\). Note that, if we set \\(a=0\\) and \\(b=1\\), we obtain \\(F_X(x) = x\\), \\(x\\) \\(\\in\\) \\([0,1]\\).\nA typical example is the following: the cdf of a continuous r.v. is uniformly distributed in \\([0,1]\\). The proof of this statement is as follows: For \\(u\\) \\(\\in\\) \\([0,1]\\), we have \\[\\begin{eqnarray*}\n\\Pr\\{F_X(X) \\leq u\\} &=& \\Pr\\{F_X^{-1}(F_X(X)) \\leq F_X^{-1}(u)\\} = \\Pr\\{X \\leq F_X^{-1}(u)\\} \\\\\n                      &=& F_X(F_X^{-1}(u)) = u.     \n\\end{eqnarray*}\\] This means that, when \\(X\\) is continuous, there is a one-to-one relationship (given by the cdf) between \\(x\\) \\(\\in\\) \\(D_X\\) and \\(u\\) \\(\\in\\) \\([0,1]\\).\nThe has a constant density over a specified interval, say \\([a,b]\\). The uniform \\(U(a,b)\\) distribution has density \\[\\begin{equation}\nf(x) =\n\\left\\{\n  \\begin{array}{ll}\n  1/(b-a) & \\textrm{ if } a < x < b,\\\\\n  0 & \\textrm{ otherwise}\n  \\end{array}\n  \\right.\n  \\end{equation}\\]\n\n45.9.1 The Normal Distribution\nA normally distributed random variable is a random variable whose associated probability distribution is the normal (or Gaussian) distribution. The normal distribution is a continuous probability distribution characterized by a symmetric bell-shaped curve.\nThe distribution is defined by two parameters: the mean \\(\\mu\\) and the standard deviation \\(\\sigma\\). The mean indicates the center of the distribution, while the standard deviation measures the spread or dispersion of the distribution.\nThis distribution is widely used in statistics and the natural and social sciences as a simple model for random variables with unknown distributions.\n\nDefinition 45.14 (The Normal Distribution) The probability density function of the normal distribution is defined as: \\[\nf_X(x) = \\frac{1}{\\sqrt{2\\pi \\sigma^2}} \\exp\\left(-\\frac{1}{2} \\left(\\frac{x-\\mu}{\\sigma}\\right)^2\\right),\n\\tag{45.1}\\] where: \\(\\mu\\) is the mean; \\(\\sigma\\) is the standard deviation.\n\nTo generate ten random numbers from a normal distribution, the following command can be used.\n\nimport numpy as np\nrng = np.random.default_rng()\nn = 10\nmu, sigma = 2, 0.1\nx = rng.normal(mu, sigma, n)\nx\n\narray([1.96714185, 1.85465936, 1.9792697 , 1.94488593, 1.97432874,\n       2.07284846, 2.00841563, 2.18059008, 2.08972059, 1.78420194])\n\n\nVerify the mean:\n\nabs(mu - np.mean(x))\n\nnp.float64(0.014393771709440006)\n\n\nNote: To verify the standard deviation, we use ddof = 1 (empirical standard deviation):\n\nabs(sigma - np.std(x, ddof=1))\n\nnp.float64(0.01395279622436188)\n\n\n\nplot_normal_distribution(mu=0, sigma=1, num_samples=10000)\n\n\n\n\n\n\n\n\n\n\n45.9.2 Visualization of the Standard Deviation\nThe standard deviation of normal distributed can be visualized in terms of the histogram of \\(X\\):\n\nabout 68% of the values will lie in the interval within one standard deviation of the mean\n95% lie within two standard deviation of the mean\nand 99.9% lie within 3 standard deviations of the mean.\n\n\n\n\n\n\n\n\n\n\n\n\n45.9.3 Realizations of a Normal Distribution\nRealizations of a normal distribution refers to the actual values that you get when you draw samples from a normal distribution. Each sample drawn from the distribution is a realization of that distribution.\n\nExample 45.5 (Realizations of a Normal Distribution) If you have a normal distribution with a mean of 0 and a standard deviation of 1, each number you draw from that distribution is a realization. Here is a Python example that generates 10 realizations of a normal distribution with a mean of 0 and a standard deviation of 1:\n\nimport numpy as np\nmu = 0\nsigma = 1\nrealizations = np.random.normal(mu, sigma, 10)\nprint(realizations)\n\n[ 0.48951662  0.23879586 -0.44811181 -0.610795   -2.02994507  0.60794659\n -0.35410888  0.15258149  0.50127485 -0.78640277]\n\n\nIn this code, np.random.normal generates ten realizations of a normal distribution with a mean of 0 and a standard deviation of 1. The realizations array contains the actual values drawn from the distribution.",
      +    "text": "45.9 The Uniform Distribution\nThis variable is defined in the interval \\([a,b]\\). We write it as \\(X \\sim U[a,b]\\). Its density and cumulative distribution functions are, respectively, \\[\nf_X(x) = \\frac{I_{[a,b]}(x)}{b-a},  \\quad\\quad F_X(x) = \\frac{1}{b-a}\\int\\limits_{-\\infty}\\limits^x I_{[a,b]}(t) \\mathrm{d}t = \\frac{x-a}{b-a},\n\\] where \\(I_{[a,b]}(\\cdot)\\) is the indicator function of the interval \\([a,b]\\). Note that, if we set \\(a=0\\) and \\(b=1\\), we obtain \\(F_X(x) = x\\), \\(x\\) \\(\\in\\) \\([0,1]\\).\nA typical example is the following: the cdf of a continuous r.v. is uniformly distributed in \\([0,1]\\). The proof of this statement is as follows: For \\(u\\) \\(\\in\\) \\([0,1]\\), we have \\[\\begin{eqnarray*}\n\\Pr\\{F_X(X) \\leq u\\} &=& \\Pr\\{F_X^{-1}(F_X(X)) \\leq F_X^{-1}(u)\\} = \\Pr\\{X \\leq F_X^{-1}(u)\\} \\\\\n                      &=& F_X(F_X^{-1}(u)) = u.     \n\\end{eqnarray*}\\] This means that, when \\(X\\) is continuous, there is a one-to-one relationship (given by the cdf) between \\(x\\) \\(\\in\\) \\(D_X\\) and \\(u\\) \\(\\in\\) \\([0,1]\\).\nThe has a constant density over a specified interval, say \\([a,b]\\). The uniform \\(U(a,b)\\) distribution has density \\[\\begin{equation}\nf(x) =\n\\left\\{\n  \\begin{array}{ll}\n  1/(b-a) & \\textrm{ if } a < x < b,\\\\\n  0 & \\textrm{ otherwise}\n  \\end{array}\n  \\right.\n  \\end{equation}\\]\n\n45.9.1 The Normal Distribution\nA normally distributed random variable is a random variable whose associated probability distribution is the normal (or Gaussian) distribution. The normal distribution is a continuous probability distribution characterized by a symmetric bell-shaped curve.\nThe distribution is defined by two parameters: the mean \\(\\mu\\) and the standard deviation \\(\\sigma\\). The mean indicates the center of the distribution, while the standard deviation measures the spread or dispersion of the distribution.\nThis distribution is widely used in statistics and the natural and social sciences as a simple model for random variables with unknown distributions.\n\nDefinition 45.14 (The Normal Distribution) The probability density function of the normal distribution is defined as: \\[\nf_X(x) = \\frac{1}{\\sqrt{2\\pi \\sigma^2}} \\exp\\left(-\\frac{1}{2} \\left(\\frac{x-\\mu}{\\sigma}\\right)^2\\right),\n\\tag{45.1}\\] where: \\(\\mu\\) is the mean; \\(\\sigma\\) is the standard deviation.\n\nTo generate ten random numbers from a normal distribution, the following command can be used.\n\nimport numpy as np\nrng = np.random.default_rng()\nn = 10\nmu, sigma = 2, 0.1\nx = rng.normal(mu, sigma, n)\nx\n\narray([1.83956491, 1.96316573, 1.91946089, 1.9424839 , 2.00858252,\n       1.8554255 , 1.70403878, 2.12573916, 2.00071284, 2.08990745])\n\n\nVerify the mean:\n\nabs(mu - np.mean(x))\n\nnp.float64(0.05509183154874364)\n\n\nNote: To verify the standard deviation, we use ddof = 1 (empirical standard deviation):\n\nabs(sigma - np.std(x, ddof=1))\n\nnp.float64(0.024276758967269524)\n\n\n\nplot_normal_distribution(mu=0, sigma=1, num_samples=10000)\n\n\n\n\n\n\n\n\n\n\n45.9.2 Visualization of the Standard Deviation\nThe standard deviation of normal distributed can be visualized in terms of the histogram of \\(X\\):\n\nabout 68% of the values will lie in the interval within one standard deviation of the mean\n95% lie within two standard deviation of the mean\nand 99.9% lie within 3 standard deviations of the mean.\n\n\n\n\n\n\n\n\n\n\n\n\n45.9.3 Realizations of a Normal Distribution\nRealizations of a normal distribution refers to the actual values that you get when you draw samples from a normal distribution. Each sample drawn from the distribution is a realization of that distribution.\n\nExample 45.5 (Realizations of a Normal Distribution) If you have a normal distribution with a mean of 0 and a standard deviation of 1, each number you draw from that distribution is a realization. Here is a Python example that generates 10 realizations of a normal distribution with a mean of 0 and a standard deviation of 1:\n\nimport numpy as np\nmu = 0\nsigma = 1\nrealizations = np.random.normal(mu, sigma, 10)\nprint(realizations)\n\n[ 0.48951662  0.23879586 -0.44811181 -0.610795   -2.02994507  0.60794659\n -0.35410888  0.15258149  0.50127485 -0.78640277]\n\n\nIn this code, np.random.normal generates ten realizations of a normal distribution with a mean of 0 and a standard deviation of 1. The realizations array contains the actual values drawn from the distribution.",
           "crumbs": [
             "Data-Driven Modeling and Optimization",
             "45  Basic Statistics and Data Analysis"
      @@ -5107,7 +5107,7 @@
           "href": "100_ddmo_pca.html#sec-fit-ols",
           "title": "47  Addressing Multicollinearity: Principle Component Analysis (PCA) and Factor Analysis (FA)",
           "section": "47.3 Fit the Linear Regression Model",
      -    "text": "47.3 Fit the Linear Regression Model\nAn Ordinary Least Squares (OLS) regression model is fitted using the preprocessed and combined features (X_encoded).\n\nX_encoded_with_const = sm.add_constant(X_encoded) # Adds a constant term (intercept) to the model\nmodel = sm.OLS(df['ln_sales'], X_encoded_with_const).fit()\n\n\n47.3.1 Model Summary and Interpretation\n\n47.3.1.1 Model Summary (ANOVA Table)\nThe ANOVA table shows a significant F-value (Prob (F-statistic) close to zero), indicating that the model is statistically significant and better than simply estimating the mean. The Adj. R-squared value, close to 0.40, suggests that nearly 40% of the variation in ln_sales is explained by the model.\n\nprint(model.summary())\n\n                            OLS Regression Results                            \n==============================================================================\nDep. Variable:               ln_sales   R-squared:                       0.485\nModel:                            OLS   Adj. R-squared:                  0.449\nMethod:                 Least Squares   F-statistic:                     13.73\nDate:                Sat, 04 Apr 2026   Prob (F-statistic):           7.69e-17\nTime:                        10:42:21   Log-Likelihood:                -213.62\nNo. Observations:                 157   AIC:                             449.2\nDf Residuals:                     146   BIC:                             482.9\nDf Model:                          10                                         \nCovariance Type:            nonrobust                                         \n==============================================================================\n                 coef    std err          t      P>|t|      [0.025      0.975]\n------------------------------------------------------------------------------\nconst          3.0678      0.114     26.962      0.000       2.843       3.293\nprice         -0.6451      0.177     -3.655      0.000      -0.994      -0.296\nengine_s       0.3557      0.192      1.854      0.066      -0.023       0.735\nhorsepow      -0.1364      0.229     -0.596      0.552      -0.589       0.316\nwheelbas       0.3166      0.174      1.816      0.071      -0.028       0.661\nwidth         -0.0763      0.140     -0.547      0.586      -0.352       0.200\nlength         0.2029      0.185      1.099      0.273      -0.162       0.568\ncurb_wgt       0.0842      0.211      0.399      0.691      -0.333       0.501\nfuel_cap      -0.2284      0.179     -1.276      0.204      -0.582       0.125\nmpg            0.3232      0.167      1.941      0.054      -0.006       0.652\ntype_1         0.8735      0.317      2.756      0.007       0.247       1.500\n==============================================================================\nOmnibus:                       41.296   Durbin-Watson:                   1.423\nProb(Omnibus):                  0.000   Jarque-Bera (JB):              107.145\nSkew:                          -1.064   Prob(JB):                     5.42e-24\nKurtosis:                       6.442   Cond. No.                         11.4\n==============================================================================\n\nNotes:\n[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n\n\nDespite the positive model fit, many predictors show non-significant coefficients (P>|t| much larger than 0.05), suggesting they contribute little to the model.",
      +    "text": "47.3 Fit the Linear Regression Model\nAn Ordinary Least Squares (OLS) regression model is fitted using the preprocessed and combined features (X_encoded).\n\nX_encoded_with_const = sm.add_constant(X_encoded) # Adds a constant term (intercept) to the model\nmodel = sm.OLS(df['ln_sales'], X_encoded_with_const).fit()\n\n\n47.3.1 Model Summary and Interpretation\n\n47.3.1.1 Model Summary (ANOVA Table)\nThe ANOVA table shows a significant F-value (Prob (F-statistic) close to zero), indicating that the model is statistically significant and better than simply estimating the mean. The Adj. R-squared value, close to 0.40, suggests that nearly 40% of the variation in ln_sales is explained by the model.\n\nprint(model.summary())\n\n                            OLS Regression Results                            \n==============================================================================\nDep. Variable:               ln_sales   R-squared:                       0.485\nModel:                            OLS   Adj. R-squared:                  0.449\nMethod:                 Least Squares   F-statistic:                     13.73\nDate:                Sat, 11 Apr 2026   Prob (F-statistic):           7.69e-17\nTime:                        22:39:01   Log-Likelihood:                -213.62\nNo. Observations:                 157   AIC:                             449.2\nDf Residuals:                     146   BIC:                             482.9\nDf Model:                          10                                         \nCovariance Type:            nonrobust                                         \n==============================================================================\n                 coef    std err          t      P>|t|      [0.025      0.975]\n------------------------------------------------------------------------------\nconst          3.0678      0.114     26.962      0.000       2.843       3.293\nprice         -0.6451      0.177     -3.655      0.000      -0.994      -0.296\nengine_s       0.3557      0.192      1.854      0.066      -0.023       0.735\nhorsepow      -0.1364      0.229     -0.596      0.552      -0.589       0.316\nwheelbas       0.3166      0.174      1.816      0.071      -0.028       0.661\nwidth         -0.0763      0.140     -0.547      0.586      -0.352       0.200\nlength         0.2029      0.185      1.099      0.273      -0.162       0.568\ncurb_wgt       0.0842      0.211      0.399      0.691      -0.333       0.501\nfuel_cap      -0.2284      0.179     -1.276      0.204      -0.582       0.125\nmpg            0.3232      0.167      1.941      0.054      -0.006       0.652\ntype_1         0.8735      0.317      2.756      0.007       0.247       1.500\n==============================================================================\nOmnibus:                       41.296   Durbin-Watson:                   1.423\nProb(Omnibus):                  0.000   Jarque-Bera (JB):              107.145\nSkew:                          -1.064   Prob(JB):                     5.42e-24\nKurtosis:                       6.442   Cond. No.                         11.4\n==============================================================================\n\nNotes:\n[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n\n\nDespite the positive model fit, many predictors show non-significant coefficients (P>|t| much larger than 0.05), suggesting they contribute little to the model.",
           "crumbs": [
             "Data-Driven Modeling and Optimization",
             "47  Addressing Multicollinearity: Principle Component Analysis (PCA) and Factor Analysis (FA)"
      @@ -5129,7 +5129,7 @@
           "href": "100_ddmo_pca.html#sec-pca",
           "title": "47  Addressing Multicollinearity: Principle Component Analysis (PCA) and Factor Analysis (FA)",
           "section": "47.5 Addressing Multicollinearity with Principal Component Analysis (PCA)",
      -    "text": "47.5 Addressing Multicollinearity with Principal Component Analysis (PCA)\n\nDefinition 47.2 (Multicollinearity and Multicorrelation) Multicorrelation is a general term that describes correlation between multiple variables. Multicollinearity is a specific problem in regression models caused by strong correlations between independent variables, making model interpretation difficult.\n\n\n47.5.1 Introduction to PCA\nPrincipal Component Analysis (PCA) is a popular unsupervised dimensionality reduction technique. It transforms a set of possibly correlated variables into a set of linearly uncorrelated variables called principal components. The first principal component accounts for as much of the variability in the data as possible, and each succeeding component accounts for as much of the remaining variability as possible. PCA is primarily used for data compression and simplifying complex datasets.\n\n\n47.5.2 Application of PCA in Regression Problems:\n\nDimensionality Reduction: PCA reduces the number of explanatory variables by transforming original variables into a smaller set of uncorrelated principal components, making regression algorithms less prone to overfitting, especially with many features.\nReducing Multicollinearity: PCA effectively eliminates multicollinearity in linear regression models because the resulting principal components are orthogonal (uncorrelated) to each other, leading to more stable coefficient estimates.\nHandling High-Dimensional Data: It can reduce the dimensions of datasets with many variables to a manageable level before regression.\nReduced Overfitting Tendencies: By removing redundant and highly correlated variables, PCA helps reduce the risk of overfitting by focusing the model on the most influential features.\nImproved Model Performance: Performing regression on the most important principal components often leads to better generalization and improved model performance on new data.\nInterpretation of Feature Importance: PCA provides insights into the importance of original features through the variance explained by each principal component, which can identify combinations of variables best representing the data.\n\n\n\n47.5.3 Scree Plot\n\nDefinition 47.3 (Scree Plot) A scree plot is a graphical representation of the eigenvalues of a covariance or correlation matrix in descending order. It is used to determine the number of significant components or factors in dimensionality reduction techniques.\nMathematically, the eigenvalues \\(\\lambda_1, \\lambda_2, \\dots, \\lambda_p\\) are plotted against their corresponding component or factor indices \\(i = 1, 2, \\dots, p\\), where \\(p\\) is the total number of components or factors.\nThe eigenvalues are defined as:\n\\[\n\\lambda_i = \\text{Var}(\\mathbf{z}_i),\n\\]\nwhere \\(\\mathbf{z}_i\\) is the \\(i\\)-th principal component or factor, and \\(\\text{Var}(\\mathbf{z}_i)\\) is its variance.\nThe scree plot is constructed by plotting the points \\((i, \\lambda_i)\\) for \\(i = 1, 2, \\dots, p\\). The “elbow” in the plot, where the eigenvalues start to level off, indicates the optimal number of components or factors to retain.\n\n\n\n47.5.4 Loading Scores (for PCA)\nLoading scores in the context of Principal Component Analysis (PCA) represent the correlation or relationship between the original variables and the principal components.\n\nDefinition 47.4 (Loading Scores) The loading score for the \\(j\\)-th variable on the \\(i\\)-th principal component is defined as:\n\\[\nL_{ij} = \\mathbf{a}_i^\\top \\mathbf{x}_j,\n\\]\nwhere:\n\\(\\mathbf{a}_i\\) is the eigenvector corresponding to the \\(i\\)-th principal component, \\(\\mathbf{x}_j\\) is the standardized value of the \\(j\\)-th variable.\n\nIn PCA, the loading scores indicate how much each original variable contributes to a given principal component. High absolute values of \\(L_{ij}\\) suggest that the \\(j\\)-th variable strongly influences the \\(i\\)-th principal component. In PCA, loading scores can be viewed as directional vectors in the feature space. The magnitude of the score indicates how dominant the variable is in a component, while the sign represents the direction of the relationship. A high positive loading means a positive influence and correlation with the component, and a high negative loading indicates a negative correlation. Loading score values also show how much each original variable contributes to the explained variance in its respective principal component.\n\n\n\n\n\n\nNoteSummary of Loading Scores\n\n\n\nLoading scores are used in Principal Component Analysis (PCA).\n\nDefinition: Loading scores represent the correlation or relationship between the original variables and the principal components.\nPurpose: They indicate how much each original variable contributes to a given principal component.\nMathematical Representation: In PCA, the loading scores are the elements of the eigenvectors of the covariance (or correlation) matrix, scaled by the square root of the corresponding eigenvalues.\nInterpretation: High absolute values of loading scores suggest that the variable strongly influences the corresponding principal component.\n\n\n\nSection 47.7.3 explains the difference between loading scores in PCA and factor loadings in FA.\n\n\n47.5.5 PCA for Car Sales Example\n\n47.5.5.1 Computing the Principal Components\nThe Principal Component Analysis (PCA) is applied only to the features (X_encoded), not to the target variable. We will use functions from spotoptim.utils.pca, which are based on sklearn.decomposition.PCA to perform PCA.\nStep 1: Perform PCA and scale the data\n\npca, scaled_data, feature_names, sample_names, df_pca_components = get_pca(df=X_encoded, n_components=10)\n\nStep 2: Plot the scree plot\n\nplot_pca_scree(pca, df_name=\"Car Sales Data\", max_scree=10)\n\n\n\n\n\n\n\nFigure 47.1: Scree plot for PCA showing the explained variance ratio for each principal component.\n\n\n\n\n\nStep 3: Plot the first two principal components\n\nplot_pca1vs2(pca, df_pca_components, df_name=\"Car Sales Data\")\n\n\n\n\n\n\n\nFigure 47.2: Scatter plot of the first two principal components (PC1 vs PC2) for the Car Sales Data.\n\n\n\n\n\nStep 4: Get the top k features influencing PC1 and PC2\n\ntop_k_features_pc1, top_k_features_pc2 = get_pca_topk(pca, feature_names, k=10)\nprint(\"Top 10 features influencing PC1:\", top_k_features_pc1)\nprint(\"Top 10 features influencing PC2:\", top_k_features_pc2)\n\nTop 10 features influencing PC1: ['curb_wgt', 'engine_s', 'fuel_cap', 'mpg', 'width', 'horsepow', 'length', 'wheelbas', 'price', 'type_1']\nTop 10 features influencing PC2: ['price', 'wheelbas', 'horsepow', 'length', 'engine_s', 'width', 'fuel_cap', 'type_1', 'mpg', 'curb_wgt']\n\n\n\n\n47.5.5.2 Loading Scores for PCA (10 Components)\n\n# Get and print loading scores\nloading_scores_df = get_loading_scores(pca, X_encoded.columns)\nprint(\"PCA Loading Scores (10 Components):\\n\", loading_scores_df)\n\nPCA Loading Scores (10 Components):\n                PC1       PC2       PC3       PC4       PC5       PC6  \\\nprice     0.251214  0.568904  0.145341 -0.484049  0.335697 -0.164197   \nengine_s  0.364662  0.204801  0.120816  0.392355 -0.419180  0.432765   \nhorsepow  0.317527  0.438619  0.305988  0.022026 -0.199571 -0.072824   \nwheelbas  0.300637 -0.470803  0.154529 -0.380962 -0.080738 -0.283304   \nwidth     0.343110 -0.186317  0.239367  0.582904  0.634863 -0.193732   \nlength    0.302587 -0.398562  0.440896 -0.197738 -0.274463  0.077374   \ncurb_wgt  0.382220 -0.036672 -0.252691 -0.118033  0.162548  0.314687   \nfuel_cap  0.358821 -0.102589 -0.424633 -0.203499  0.220563  0.384133   \nmpg      -0.351143 -0.076695  0.468213 -0.176015  0.326376  0.640074   \ntype_1    0.072646 -0.095107 -0.362901  0.030564 -0.072634  0.019721   \n\n               PC7       PC8       PC9      PC10  \nprice    -0.009121 -0.098316 -0.395916  0.227288  \nengine_s  0.279081 -0.161115 -0.433737 -0.039611  \nhorsepow  0.132544  0.184063  0.718133 -0.019567  \nwheelbas  0.568127 -0.236665 -0.001192 -0.231065  \nwidth     0.030011  0.022386 -0.009774  0.091688  \nlength   -0.517766  0.214795 -0.090943  0.335102  \ncurb_wgt -0.410187 -0.591957  0.262427 -0.248584  \nfuel_cap  0.163440  0.640656 -0.004986 -0.074928  \nmpg       0.243304 -0.105455  0.148714  0.109367  \ntype_1    0.241967 -0.236377  0.200080  0.832423  \n\n\nFigure 47.3 shows the loading scores heatmap for the first 10 principal components. The heatmap visualizes how much each original feature contributes to each principal component, with darker colors indicating stronger contributions.\n\nplot_loading_scores(loading_scores_df)\n\n\n\n\n\n\n\nFigure 47.3: PCA Loading Scores Heatmap showing the influence of original features on the principal components.\n\n\n\n\n\n\n\n\n47.5.6 Creating the Regression Model with Principal Components\nNow, a linear regression model is fitted using the principal components derived from PCA. These components are uncorrelated, which should eliminate multicollinearity issues.\n\nX_pca_model_with_const = sm.add_constant(df_pca_components)\nmodel_pca = sm.OLS(y, X_pca_model_with_const).fit()\nprint(\"\\nRegression on PCA Components:\")\nprint(model_pca.summary())\n\n\nRegression on PCA Components:\n                            OLS Regression Results                            \n==============================================================================\nDep. Variable:               ln_sales   R-squared:                       0.485\nModel:                            OLS   Adj. R-squared:                  0.449\nMethod:                 Least Squares   F-statistic:                     13.73\nDate:                Sat, 04 Apr 2026   Prob (F-statistic):           7.69e-17\nTime:                        10:42:21   Log-Likelihood:                -213.62\nNo. Observations:                 157   AIC:                             449.2\nDf Residuals:                     146   BIC:                             482.9\nDf Model:                          10                                         \nCovariance Type:            nonrobust                                         \n==============================================================================\n                 coef    std err          t      P>|t|      [0.025      0.975]\n------------------------------------------------------------------------------\nconst          3.2959      0.078     42.215      0.000       3.142       3.450\nPC1           -0.0450      0.032     -1.392      0.166      -0.109       0.019\nPC2           -0.6572      0.063    -10.383      0.000      -0.782      -0.532\nPC3           -0.0624      0.091     -0.683      0.495      -0.243       0.118\nPC4            0.2500      0.135      1.856      0.065      -0.016       0.516\nPC5           -0.4628      0.157     -2.943      0.004      -0.774      -0.152\nPC6            0.3734      0.197      1.893      0.060      -0.016       0.763\nPC7            0.3777      0.205      1.847      0.067      -0.027       0.782\nPC8           -0.4887      0.225     -2.171      0.032      -0.934      -0.044\nPC9            0.2311      0.302      0.765      0.445      -0.366       0.828\nPC10           0.5885      0.361      1.631      0.105      -0.125       1.302\n==============================================================================\nOmnibus:                       41.296   Durbin-Watson:                   1.423\nProb(Omnibus):                  0.000   Jarque-Bera (JB):              107.145\nSkew:                          -1.064   Prob(JB):                     5.42e-24\nKurtosis:                       6.442   Cond. No.                         11.2\n==============================================================================\n\nNotes:\n[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n\n\nWhen all principal components are retained, the PCA regression model performs identically to the original OLS model in terms of R-squared, Adjusted R-squared, MSE, and RMSE. This is because PCA merely rotates the data, preserving all variance if all components are used. Its benefit lies in handling multicollinearity and enabling dimensionality reduction if fewer components are chosen without significant loss of information.\n\n\n47.5.7 Collinearity Diagnostics for PCA Regression Model\nConsider the eigenvalues of the PCA components to verify that they are uncorrelated. The eigenvalues should be close to 1, indicating that the components are orthogonal and do not exhibit multicollinearity.\n\nfa_temp = FactorAnalyzer(n_factors=df_pca_components.shape[1], method=\"principal\", rotation=None)\ntry:\n    fa_temp.fit(df_pca_components)\n    ev, _ = fa_temp.get_eigenvalues()\n    ev = np.sort(ev) # The source prints in ascending order\n    print(\"Eigenvalues for each component:\\n\", ev)\nexcept Exception as e:\n    print(f\"Error during factor analysis fitting: {e}\")\n    print(\"Consider reducing multicollinearity or removing problematic features.\")\n\nEigenvalues for each component:\n [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n\n\nNext, we compute the condition indices for the PCA components to confirm that they are uncorrelated.\n\ncoeffs_table = compute_coefficients_table(\n    model=model_pca, X_encoded=X_pca_model_with_const, y=y, vif_table=None\n)\nprint(\"\\nCoefficients Table:\")\nprint(coeffs_table)\n\n\nCoefficients Table:\n  Variable  Zero-Order r  Partial r  Semipartial r  Tolerance  VIF\n0      PC1     -0.082694  -0.114428      -0.082694        1.0  1.0\n1      PC2     -0.616895  -0.651723      -0.616895        1.0  1.0\n2      PC3     -0.040608  -0.056473      -0.040608        1.0  1.0\n3      PC4      0.110272   0.151818       0.110272        1.0  1.0\n4      PC5     -0.174882  -0.236673      -0.174882        1.0  1.0\n5      PC6      0.112489   0.154797       0.112489        1.0  1.0\n6      PC7      0.109722   0.151078       0.109722        1.0  1.0\n7      PC8     -0.129005  -0.176859      -0.129005        1.0  1.0\n8      PC9      0.045456   0.063189       0.045456        1.0  1.0\n9     PC10      0.096903   0.133763       0.096903        1.0  1.0\n\n\nAs expected, results indicate that there is no multicollinearity among the principal components. This confirms that PCA successfully addresses the multicollinearity problem. The R-squared and Adjusted R-squared values remain the same as the original OLS model since PCA preserves the total variance when all components are retained.\n\n\n47.5.8 PCA: Creating the Regression Model with three Principle Components only\n\n# Create a regression model using only the first three principal components\ndf_pc_reduced = df_pca_components.iloc[:, :3] # select the first three factors\nX_model_pc_reduced = sm.add_constant(df_pc_reduced)\nmodel_pc_reduced = sm.OLS(y, X_model_pc_reduced).fit()\nprint(\"\\nRegression on PCs (three PCs only):\")\nprint(model_pc_reduced.summary())\n\n# Verify collinearity statistics for reduced PCs scores\ncoeffs_table_pc_reduced = compute_coefficients_table(\n    model=model_pc_reduced, X_encoded=X_model_pc_reduced, y=y, vif_table=None\n)\nprint(\"\\nCoefficients Table (Reduced PCs Analysis Model):\")\nprint(coeffs_table_pc_reduced)\n\n# Verify condition indices for reduced FA scores\nX_cond_pc_reduced = copy.deepcopy(df_pc_reduced)\ncondition_index_df_pc_reduced = condition_index(X_cond_pc_reduced)\nprint(\"\\nCondition Index (Reduced PC Analysis Model):\")\nprint(condition_index_df_pc_reduced)\n\n\nRegression on PCs (three PCs only):\n                            OLS Regression Results                            \n==============================================================================\nDep. Variable:               ln_sales   R-squared:                       0.389\nModel:                            OLS   Adj. R-squared:                  0.377\nMethod:                 Least Squares   F-statistic:                     32.48\nDate:                Sat, 04 Apr 2026   Prob (F-statistic):           2.66e-16\nTime:                        10:42:21   Log-Likelihood:                -226.97\nNo. Observations:                 157   AIC:                             461.9\nDf Residuals:                     153   BIC:                             474.2\nDf Model:                           3                                         \nCovariance Type:            nonrobust                                         \n==============================================================================\n                 coef    std err          t      P>|t|      [0.025      0.975]\n------------------------------------------------------------------------------\nconst          3.2959      0.083     39.693      0.000       3.132       3.460\nPC1           -0.0450      0.034     -1.309      0.193      -0.113       0.023\nPC2           -0.6572      0.067     -9.762      0.000      -0.790      -0.524\nPC3           -0.0624      0.097     -0.643      0.521      -0.254       0.129\n==============================================================================\nOmnibus:                       43.520   Durbin-Watson:                   1.413\nProb(Omnibus):                  0.000   Jarque-Bera (JB):              125.210\nSkew:                          -1.081   Prob(JB):                     6.47e-28\nKurtosis:                       6.804   Cond. No.                         2.82\n==============================================================================\n\nNotes:\n[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n\nCoefficients Table (Reduced PCs Analysis Model):\n  Variable  Zero-Order r  Partial r  Semipartial r  Tolerance  VIF\n0      PC1     -0.082694  -0.105209      -0.082694        1.0  1.0\n1      PC2     -0.616895  -0.619530      -0.616895        1.0  1.0\n2      PC3     -0.040608  -0.051883      -0.040608        1.0  1.0\n\nCondition Index (Reduced PC Analysis Model):\n   Index  Eigenvalue  Condition Index\n0      0    0.736900         2.819449\n1      1    1.531162         1.955951\n2      2    5.857833         1.000000",
      +    "text": "47.5 Addressing Multicollinearity with Principal Component Analysis (PCA)\n\nDefinition 47.2 (Multicollinearity and Multicorrelation) Multicorrelation is a general term that describes correlation between multiple variables. Multicollinearity is a specific problem in regression models caused by strong correlations between independent variables, making model interpretation difficult.\n\n\n47.5.1 Introduction to PCA\nPrincipal Component Analysis (PCA) is a popular unsupervised dimensionality reduction technique. It transforms a set of possibly correlated variables into a set of linearly uncorrelated variables called principal components. The first principal component accounts for as much of the variability in the data as possible, and each succeeding component accounts for as much of the remaining variability as possible. PCA is primarily used for data compression and simplifying complex datasets.\n\n\n47.5.2 Application of PCA in Regression Problems:\n\nDimensionality Reduction: PCA reduces the number of explanatory variables by transforming original variables into a smaller set of uncorrelated principal components, making regression algorithms less prone to overfitting, especially with many features.\nReducing Multicollinearity: PCA effectively eliminates multicollinearity in linear regression models because the resulting principal components are orthogonal (uncorrelated) to each other, leading to more stable coefficient estimates.\nHandling High-Dimensional Data: It can reduce the dimensions of datasets with many variables to a manageable level before regression.\nReduced Overfitting Tendencies: By removing redundant and highly correlated variables, PCA helps reduce the risk of overfitting by focusing the model on the most influential features.\nImproved Model Performance: Performing regression on the most important principal components often leads to better generalization and improved model performance on new data.\nInterpretation of Feature Importance: PCA provides insights into the importance of original features through the variance explained by each principal component, which can identify combinations of variables best representing the data.\n\n\n\n47.5.3 Scree Plot\n\nDefinition 47.3 (Scree Plot) A scree plot is a graphical representation of the eigenvalues of a covariance or correlation matrix in descending order. It is used to determine the number of significant components or factors in dimensionality reduction techniques.\nMathematically, the eigenvalues \\(\\lambda_1, \\lambda_2, \\dots, \\lambda_p\\) are plotted against their corresponding component or factor indices \\(i = 1, 2, \\dots, p\\), where \\(p\\) is the total number of components or factors.\nThe eigenvalues are defined as:\n\\[\n\\lambda_i = \\text{Var}(\\mathbf{z}_i),\n\\]\nwhere \\(\\mathbf{z}_i\\) is the \\(i\\)-th principal component or factor, and \\(\\text{Var}(\\mathbf{z}_i)\\) is its variance.\nThe scree plot is constructed by plotting the points \\((i, \\lambda_i)\\) for \\(i = 1, 2, \\dots, p\\). The “elbow” in the plot, where the eigenvalues start to level off, indicates the optimal number of components or factors to retain.\n\n\n\n47.5.4 Loading Scores (for PCA)\nLoading scores in the context of Principal Component Analysis (PCA) represent the correlation or relationship between the original variables and the principal components.\n\nDefinition 47.4 (Loading Scores) The loading score for the \\(j\\)-th variable on the \\(i\\)-th principal component is defined as:\n\\[\nL_{ij} = \\mathbf{a}_i^\\top \\mathbf{x}_j,\n\\]\nwhere:\n\\(\\mathbf{a}_i\\) is the eigenvector corresponding to the \\(i\\)-th principal component, \\(\\mathbf{x}_j\\) is the standardized value of the \\(j\\)-th variable.\n\nIn PCA, the loading scores indicate how much each original variable contributes to a given principal component. High absolute values of \\(L_{ij}\\) suggest that the \\(j\\)-th variable strongly influences the \\(i\\)-th principal component. In PCA, loading scores can be viewed as directional vectors in the feature space. The magnitude of the score indicates how dominant the variable is in a component, while the sign represents the direction of the relationship. A high positive loading means a positive influence and correlation with the component, and a high negative loading indicates a negative correlation. Loading score values also show how much each original variable contributes to the explained variance in its respective principal component.\n\n\n\n\n\n\nNoteSummary of Loading Scores\n\n\n\nLoading scores are used in Principal Component Analysis (PCA).\n\nDefinition: Loading scores represent the correlation or relationship between the original variables and the principal components.\nPurpose: They indicate how much each original variable contributes to a given principal component.\nMathematical Representation: In PCA, the loading scores are the elements of the eigenvectors of the covariance (or correlation) matrix, scaled by the square root of the corresponding eigenvalues.\nInterpretation: High absolute values of loading scores suggest that the variable strongly influences the corresponding principal component.\n\n\n\nSection 47.7.3 explains the difference between loading scores in PCA and factor loadings in FA.\n\n\n47.5.5 PCA for Car Sales Example\n\n47.5.5.1 Computing the Principal Components\nThe Principal Component Analysis (PCA) is applied only to the features (X_encoded), not to the target variable. We will use functions from spotoptim.utils.pca, which are based on sklearn.decomposition.PCA to perform PCA.\nStep 1: Perform PCA and scale the data\n\npca, scaled_data, feature_names, sample_names, df_pca_components = get_pca(df=X_encoded, n_components=10)\n\nStep 2: Plot the scree plot\n\nplot_pca_scree(pca, df_name=\"Car Sales Data\", max_scree=10)\n\n\n\n\n\n\n\nFigure 47.1: Scree plot for PCA showing the explained variance ratio for each principal component.\n\n\n\n\n\nStep 3: Plot the first two principal components\n\nplot_pca1vs2(pca, df_pca_components, df_name=\"Car Sales Data\")\n\n\n\n\n\n\n\nFigure 47.2: Scatter plot of the first two principal components (PC1 vs PC2) for the Car Sales Data.\n\n\n\n\n\nStep 4: Get the top k features influencing PC1 and PC2\n\ntop_k_features_pc1, top_k_features_pc2 = get_pca_topk(pca, feature_names, k=10)\nprint(\"Top 10 features influencing PC1:\", top_k_features_pc1)\nprint(\"Top 10 features influencing PC2:\", top_k_features_pc2)\n\nTop 10 features influencing PC1: ['curb_wgt', 'engine_s', 'fuel_cap', 'mpg', 'width', 'horsepow', 'length', 'wheelbas', 'price', 'type_1']\nTop 10 features influencing PC2: ['price', 'wheelbas', 'horsepow', 'length', 'engine_s', 'width', 'fuel_cap', 'type_1', 'mpg', 'curb_wgt']\n\n\n\n\n47.5.5.2 Loading Scores for PCA (10 Components)\n\n# Get and print loading scores\nloading_scores_df = get_loading_scores(pca, X_encoded.columns)\nprint(\"PCA Loading Scores (10 Components):\\n\", loading_scores_df)\n\nPCA Loading Scores (10 Components):\n                PC1       PC2       PC3       PC4       PC5       PC6  \\\nprice     0.251214  0.568904  0.145341 -0.484049  0.335697 -0.164197   \nengine_s  0.364662  0.204801  0.120816  0.392355 -0.419180  0.432765   \nhorsepow  0.317527  0.438619  0.305988  0.022026 -0.199571 -0.072824   \nwheelbas  0.300637 -0.470803  0.154529 -0.380962 -0.080738 -0.283304   \nwidth     0.343110 -0.186317  0.239367  0.582904  0.634863 -0.193732   \nlength    0.302587 -0.398562  0.440896 -0.197738 -0.274463  0.077374   \ncurb_wgt  0.382220 -0.036672 -0.252691 -0.118033  0.162548  0.314687   \nfuel_cap  0.358821 -0.102589 -0.424633 -0.203499  0.220563  0.384133   \nmpg      -0.351143 -0.076695  0.468213 -0.176015  0.326376  0.640074   \ntype_1    0.072646 -0.095107 -0.362901  0.030564 -0.072634  0.019721   \n\n               PC7       PC8       PC9      PC10  \nprice    -0.009121 -0.098316 -0.395916  0.227288  \nengine_s  0.279081 -0.161115 -0.433737 -0.039611  \nhorsepow  0.132544  0.184063  0.718133 -0.019567  \nwheelbas  0.568127 -0.236665 -0.001192 -0.231065  \nwidth     0.030011  0.022386 -0.009774  0.091688  \nlength   -0.517766  0.214795 -0.090943  0.335102  \ncurb_wgt -0.410187 -0.591957  0.262427 -0.248584  \nfuel_cap  0.163440  0.640656 -0.004986 -0.074928  \nmpg       0.243304 -0.105455  0.148714  0.109367  \ntype_1    0.241967 -0.236377  0.200080  0.832423  \n\n\nFigure 47.3 shows the loading scores heatmap for the first 10 principal components. The heatmap visualizes how much each original feature contributes to each principal component, with darker colors indicating stronger contributions.\n\nplot_loading_scores(loading_scores_df)\n\n\n\n\n\n\n\nFigure 47.3: PCA Loading Scores Heatmap showing the influence of original features on the principal components.\n\n\n\n\n\n\n\n\n47.5.6 Creating the Regression Model with Principal Components\nNow, a linear regression model is fitted using the principal components derived from PCA. These components are uncorrelated, which should eliminate multicollinearity issues.\n\nX_pca_model_with_const = sm.add_constant(df_pca_components)\nmodel_pca = sm.OLS(y, X_pca_model_with_const).fit()\nprint(\"\\nRegression on PCA Components:\")\nprint(model_pca.summary())\n\n\nRegression on PCA Components:\n                            OLS Regression Results                            \n==============================================================================\nDep. Variable:               ln_sales   R-squared:                       0.485\nModel:                            OLS   Adj. R-squared:                  0.449\nMethod:                 Least Squares   F-statistic:                     13.73\nDate:                Sat, 11 Apr 2026   Prob (F-statistic):           7.69e-17\nTime:                        22:39:01   Log-Likelihood:                -213.62\nNo. Observations:                 157   AIC:                             449.2\nDf Residuals:                     146   BIC:                             482.9\nDf Model:                          10                                         \nCovariance Type:            nonrobust                                         \n==============================================================================\n                 coef    std err          t      P>|t|      [0.025      0.975]\n------------------------------------------------------------------------------\nconst          3.2959      0.078     42.215      0.000       3.142       3.450\nPC1           -0.0450      0.032     -1.392      0.166      -0.109       0.019\nPC2           -0.6572      0.063    -10.383      0.000      -0.782      -0.532\nPC3           -0.0624      0.091     -0.683      0.495      -0.243       0.118\nPC4            0.2500      0.135      1.856      0.065      -0.016       0.516\nPC5           -0.4628      0.157     -2.943      0.004      -0.774      -0.152\nPC6            0.3734      0.197      1.893      0.060      -0.016       0.763\nPC7            0.3777      0.205      1.847      0.067      -0.027       0.782\nPC8           -0.4887      0.225     -2.171      0.032      -0.934      -0.044\nPC9            0.2311      0.302      0.765      0.445      -0.366       0.828\nPC10           0.5885      0.361      1.631      0.105      -0.125       1.302\n==============================================================================\nOmnibus:                       41.296   Durbin-Watson:                   1.423\nProb(Omnibus):                  0.000   Jarque-Bera (JB):              107.145\nSkew:                          -1.064   Prob(JB):                     5.42e-24\nKurtosis:                       6.442   Cond. No.                         11.2\n==============================================================================\n\nNotes:\n[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n\n\nWhen all principal components are retained, the PCA regression model performs identically to the original OLS model in terms of R-squared, Adjusted R-squared, MSE, and RMSE. This is because PCA merely rotates the data, preserving all variance if all components are used. Its benefit lies in handling multicollinearity and enabling dimensionality reduction if fewer components are chosen without significant loss of information.\n\n\n47.5.7 Collinearity Diagnostics for PCA Regression Model\nConsider the eigenvalues of the PCA components to verify that they are uncorrelated. The eigenvalues should be close to 1, indicating that the components are orthogonal and do not exhibit multicollinearity.\n\nfa_temp = FactorAnalyzer(n_factors=df_pca_components.shape[1], method=\"principal\", rotation=None)\ntry:\n    fa_temp.fit(df_pca_components)\n    ev, _ = fa_temp.get_eigenvalues()\n    ev = np.sort(ev) # The source prints in ascending order\n    print(\"Eigenvalues for each component:\\n\", ev)\nexcept Exception as e:\n    print(f\"Error during factor analysis fitting: {e}\")\n    print(\"Consider reducing multicollinearity or removing problematic features.\")\n\nEigenvalues for each component:\n [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n\n\nNext, we compute the condition indices for the PCA components to confirm that they are uncorrelated.\n\ncoeffs_table = compute_coefficients_table(\n    model=model_pca, X_encoded=X_pca_model_with_const, y=y, vif_table=None\n)\nprint(\"\\nCoefficients Table:\")\nprint(coeffs_table)\n\n\nCoefficients Table:\n  Variable  Zero-Order r  Partial r  Semipartial r  Tolerance  VIF\n0      PC1     -0.082694  -0.114428      -0.082694        1.0  1.0\n1      PC2     -0.616895  -0.651723      -0.616895        1.0  1.0\n2      PC3     -0.040608  -0.056473      -0.040608        1.0  1.0\n3      PC4      0.110272   0.151818       0.110272        1.0  1.0\n4      PC5     -0.174882  -0.236673      -0.174882        1.0  1.0\n5      PC6      0.112489   0.154797       0.112489        1.0  1.0\n6      PC7      0.109722   0.151078       0.109722        1.0  1.0\n7      PC8     -0.129005  -0.176859      -0.129005        1.0  1.0\n8      PC9      0.045456   0.063189       0.045456        1.0  1.0\n9     PC10      0.096903   0.133763       0.096903        1.0  1.0\n\n\nAs expected, results indicate that there is no multicollinearity among the principal components. This confirms that PCA successfully addresses the multicollinearity problem. The R-squared and Adjusted R-squared values remain the same as the original OLS model since PCA preserves the total variance when all components are retained.\n\n\n47.5.8 PCA: Creating the Regression Model with three Principle Components only\n\n# Create a regression model using only the first three principal components\ndf_pc_reduced = df_pca_components.iloc[:, :3] # select the first three factors\nX_model_pc_reduced = sm.add_constant(df_pc_reduced)\nmodel_pc_reduced = sm.OLS(y, X_model_pc_reduced).fit()\nprint(\"\\nRegression on PCs (three PCs only):\")\nprint(model_pc_reduced.summary())\n\n# Verify collinearity statistics for reduced PCs scores\ncoeffs_table_pc_reduced = compute_coefficients_table(\n    model=model_pc_reduced, X_encoded=X_model_pc_reduced, y=y, vif_table=None\n)\nprint(\"\\nCoefficients Table (Reduced PCs Analysis Model):\")\nprint(coeffs_table_pc_reduced)\n\n# Verify condition indices for reduced FA scores\nX_cond_pc_reduced = copy.deepcopy(df_pc_reduced)\ncondition_index_df_pc_reduced = condition_index(X_cond_pc_reduced)\nprint(\"\\nCondition Index (Reduced PC Analysis Model):\")\nprint(condition_index_df_pc_reduced)\n\n\nRegression on PCs (three PCs only):\n                            OLS Regression Results                            \n==============================================================================\nDep. Variable:               ln_sales   R-squared:                       0.389\nModel:                            OLS   Adj. R-squared:                  0.377\nMethod:                 Least Squares   F-statistic:                     32.48\nDate:                Sat, 11 Apr 2026   Prob (F-statistic):           2.66e-16\nTime:                        22:39:01   Log-Likelihood:                -226.97\nNo. Observations:                 157   AIC:                             461.9\nDf Residuals:                     153   BIC:                             474.2\nDf Model:                           3                                         \nCovariance Type:            nonrobust                                         \n==============================================================================\n                 coef    std err          t      P>|t|      [0.025      0.975]\n------------------------------------------------------------------------------\nconst          3.2959      0.083     39.693      0.000       3.132       3.460\nPC1           -0.0450      0.034     -1.309      0.193      -0.113       0.023\nPC2           -0.6572      0.067     -9.762      0.000      -0.790      -0.524\nPC3           -0.0624      0.097     -0.643      0.521      -0.254       0.129\n==============================================================================\nOmnibus:                       43.520   Durbin-Watson:                   1.413\nProb(Omnibus):                  0.000   Jarque-Bera (JB):              125.210\nSkew:                          -1.081   Prob(JB):                     6.47e-28\nKurtosis:                       6.804   Cond. No.                         2.82\n==============================================================================\n\nNotes:\n[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n\nCoefficients Table (Reduced PCs Analysis Model):\n  Variable  Zero-Order r  Partial r  Semipartial r  Tolerance  VIF\n0      PC1     -0.082694  -0.105209      -0.082694        1.0  1.0\n1      PC2     -0.616895  -0.619530      -0.616895        1.0  1.0\n2      PC3     -0.040608  -0.051883      -0.040608        1.0  1.0\n\nCondition Index (Reduced PC Analysis Model):\n   Index  Eigenvalue  Condition Index\n0      0    0.736900         2.819449\n1      1    1.531162         1.955951\n2      2    5.857833         1.000000",
           "crumbs": [
             "Data-Driven Modeling and Optimization",
             "47  Addressing Multicollinearity: Principle Component Analysis (PCA) and Factor Analysis (FA)"
      @@ -5140,7 +5140,7 @@
           "href": "100_ddmo_pca.html#sec-fa",
           "title": "47  Addressing Multicollinearity: Principle Component Analysis (PCA) and Factor Analysis (FA)",
           "section": "47.6 Addressing Multicollinearity and Latent Structure with Factor Analysis (FA)",
      -    "text": "47.6 Addressing Multicollinearity and Latent Structure with Factor Analysis (FA)\n\n47.6.1 Introduction to Factor Analysis\nFactor Analysis (FA) is a statistical method used to describe variability among observed, correlated variables in terms of a potentially lower number of unobserved variables called factors or latent variables. Unlike PCA, which is primarily a data reduction technique focused on maximizing variance explained, FA assumes that the observed variables are linear combinations of these underlying factors plus an error term. FA’s main goal is to uncover the underlying structure that explains the correlations among observed variables.\n\n\n47.6.2 Determining the Number of Factors for Factor Analysis\nFor Factor Analysis, the number of factors to extract is a crucial decision. A common approach, consistent with the KMO measure, is to consider factors with eigenvalues greater than 1 (Kaiser’s criterion). Factor analysis is then performed, often with a rotation method like Varimax to improve factor interpretability.\n\nanz_fak = 10 # Number of factors to extract, similar to the components in PCA\nn_factors = min(anz_fak, X_encoded.shape[1])\nfa = FactorAnalyzer(n_factors=n_factors, method=\"principal\", rotation=\"varimax\")\nfa.fit(X_encoded) # Fit the Factor Analyzer\nactual_factors = fa.loadings_.shape[1] # Number of factors actually extracted\nprint(f\"actual_factors: {actual_factors}\")\nif actual_factors < n_factors:\n    print(\n        f\"\\nWarning: Only {actual_factors} factors could be extracted \"\n        f\"(requested {n_factors}).\"\n    )\nfactor_columns = [f\"Factor{i+1}\" for i in range(actual_factors)]\n\nactual_factors: 10\n\n\n\n\n47.6.3 Scree Plot for Factor Analysis\nFigure 47.4 shows the eigenvalues for each factor extracted from Factor Analysis. The scree plot helps in determining the number of factors to retain by identifying the “elbow” point where the eigenvalues start to level off, indicating diminishing returns in explained variance.\n\nplt.figure(figsize=(10, 6))\nev_fa, _ = fa.get_eigenvalues()\nplt.plot(range(1, len(ev_fa) + 1), ev_fa, marker='o', linestyle='--')\nplt.title('Scree Plot for Factor Analysis')\nplt.xlabel('Number of Factors')\nplt.ylabel('Eigenvalue')\nplt.grid(True)\nplt.xticks(range(1, len(ev_fa) + 1))\nplt.show()\n\n\n\n\n\n\n\nFigure 47.4: Scree plot for Factor Analysis showing the eigenvalues for each factor.\n\n\n\n\n\n\n\n47.6.4 Factor Loadings\nFactor Loadings indicate how strongly each original variable is correlated with the extracted factors. High absolute values suggest that the variable has a significant influence on, or is strongly associated with, that factor. Loadings help in interpreting the meaning of each underlying factor.\n\n\n\n\n\n\nNoteSummary of Factor Loadings\n\n\n\nFactor loadings are used in Factor Analysis (FA). * Definition: Factor loadings represent the correlation or relationship between the observed variables and the latent factors. * Purpose: They indicate how much each observed variable is explained by a given factor. * Mathematical Representation: In FA, factor loadings are derived from the factor model, where observed variables are expressed as linear combinations of latent factors plus error terms. * Interpretation: High absolute values of factor loadings suggest that the variable is strongly associated with the corresponding factor.\n\n\nSection 47.7.3 explains the difference between loading scores in PCA and factor loadings in FA.\n\n# Print factor loadings with 2 decimals\nfactor_loadings = fa.loadings_\nprint(\"Factor Loadings (rounded to 2 decimals):\\n\", np.round(factor_loadings, 2))\n\n# Create a DataFrame for the factor loadings for better visualization\nfactor_loadings_df = pd.DataFrame(\n    factor_loadings, index=X_encoded.columns, # Original feature names\n    columns=factor_columns # Factor names\n)\n\n# Plot the heatmap for factor loadings\nplt.figure(figsize=(10, 8))\nsns.heatmap(\n    factor_loadings_df, annot=True, # Annotate with values\n    fmt=\".2f\", # Format values to 2 decimals\n    cmap=\"coolwarm\", # Color map\n    cbar=True # Show color bar\n)\nplt.title(\"Factor Loadings Heatmap\")\nplt.xlabel(\"Factors\")\nplt.ylabel(\"Features\")\nplt.tight_layout()\nplt.show()\n\nFactor Loadings (rounded to 2 decimals):\n [[ 0.12  0.97 -0.01  0.06 -0.01  0.08  0.01  0.09  0.01 -0.15]\n [ 0.12  0.6   0.18  0.28  0.61  0.2   0.31  0.09 -0.01  0.02]\n [ 0.06  0.87 -0.01  0.19  0.26  0.16  0.19  0.01 -0.02  0.26]\n [ 0.13  0.07  0.26  0.18  0.04  0.09  0.9   0.03  0.26 -0.01]\n [ 0.14  0.25  0.12  0.77  0.15  0.14  0.51  0.08  0.    0.01]\n [ 0.09  0.12  0.    0.2   0.14  0.11  0.92  0.09 -0.23  0.03]\n [ 0.28  0.43  0.41  0.26  0.15  0.25  0.44  0.48 -0.01 -0.  ]\n [ 0.63  0.32  0.46  0.22  0.11  0.24  0.4   0.12  0.01  0.  ]\n [-0.2  -0.41 -0.46 -0.2  -0.17 -0.66 -0.25 -0.1  -0.   -0.01]\n [ 0.1  -0.06  0.98  0.06  0.05  0.12  0.1   0.05  0.01 -0.  ]]\n\n\n\n\n\n\n\n\n\n\n\n47.6.5 Factor Scores\nThe factor scores are the transformed values of the original variables based on the extracted factors. These scores represent the values of the latent factors for each observation and can be used as new features in regression models, similar to principal components in PCA.\n\nDefinition 47.5 (Factor Scores) A factor score represents the value of a latent factor for a given observation, calculated as a linear combination of the observed variables weighted by the factor score coefficients.\nMathematically, the factor score for the \\(i\\)-th factor and the \\(j\\)-th observation is defined as:\n\\[\nF_{ji} = w_{i1} x_{j1} + w_{i2} x_{j2} + \\cdots + w_{ip} x_{jp} = \\sum_{k=1}^p w_{ik} x_{jk},\n\\]\nwhere\n\n\\(F_{ji}\\) is the factor score for factor \\(i\\) and observation \\(j\\),\n\n\\(w_{ik}\\) is the factor score coefficient for variable \\(k\\) on factor \\(i\\),\n\n\\(x_{jk}\\) is the standardized value of variable \\(k\\) for observation \\(j\\), and\n\\(p\\) is the number of observed variables.\n\n\n\n# Factor scores for each row (shape: [n_samples, actual_factors])\nX_factor_scores = fa.transform(X_encoded)\nprint(f\"X_factor_scores shape: {X_factor_scores.shape}\")\n\n# Adapt the factor column names to the actual factor count\ndf_factors = pd.DataFrame(X_factor_scores, columns=factor_columns)\nprint(f\"df_factors shape: {df_factors.shape}\")\nprint(f\"df_factors head:\\n{df_factors.head()}\")\n\nX_factor_scores shape: (157, 10)\ndf_factors shape: (157, 10)\ndf_factors head:\n    Factor1   Factor2   Factor3   Factor4   Factor5   Factor6   Factor7  \\\n0 -0.647996 -0.310986 -0.395620 -0.514476 -0.753763 -0.171572 -0.691765   \n1 -0.171241  0.352069 -0.579629 -0.677204  0.113380 -0.329903  0.434305   \n2  0.077192  0.050156 -0.595317 -0.396626  0.412052 -0.688322  0.246025   \n3 -0.683708  0.820534 -0.676114 -0.796906 -0.241928  0.602161  1.058645   \n4  0.615152 -0.262258 -0.541357 -0.489288 -1.207964 -0.186946 -0.485740   \n\n    Factor8   Factor9  Factor10  \n0 -0.233725  0.567292 -0.139248  \n1  0.852994 -0.099874  1.690789  \n2  0.941176 -0.209195  2.468886  \n3  1.063771  1.022527 -1.245557  \n4  0.259073  0.073952  0.308099  \n\n\n\n\n47.6.6 Creating the Regression Model with Extracted Factors (from FA)\nA linear regression model is built using all ten extracted factors from Factor Analysis. The expectation is that these factors are uncorrelated, addressing multicollinearity.\n\nX_model_fa = sm.add_constant(df_factors)\nmodel_factors = sm.OLS(y, X_model_fa).fit()\nprint(\"\\nRegression on Factor Scores (all 10 factors):\")\nprint(model_factors.summary())\n\n# Verify collinearity statistics for Factor Analysis scores (VIF and Tolerance)\ncoeffs_table_fa = compute_coefficients_table(\n    model=model_factors, X_encoded=X_model_fa, y=y, vif_table=None\n)\nprint(\"\\nCoefficients Table (Factor Analysis Model):\")\nprint(coeffs_table_fa)\n\n# Verify condition indices\nX_cond_fa = copy.deepcopy(df_factors)\ncondition_index_df_fa = condition_index(X_cond_fa)\nprint(\"\\nCondition Index (Factor Analysis Model):\")\nprint(condition_index_df_fa)\n\n\nRegression on Factor Scores (all 10 factors):\n                            OLS Regression Results                            \n==============================================================================\nDep. Variable:               ln_sales   R-squared:                       0.485\nModel:                            OLS   Adj. R-squared:                  0.449\nMethod:                 Least Squares   F-statistic:                     13.73\nDate:                Sat, 04 Apr 2026   Prob (F-statistic):           7.69e-17\nTime:                        10:42:22   Log-Likelihood:                -213.62\nNo. Observations:                 157   AIC:                             449.2\nDf Residuals:                     146   BIC:                             482.9\nDf Model:                          10                                         \nCovariance Type:            nonrobust                                         \n==============================================================================\n                 coef    std err          t      P>|t|      [0.025      0.975]\n------------------------------------------------------------------------------\nconst          3.2959      0.078     42.215      0.000       3.142       3.450\nFactor1       -0.1366      0.078     -1.749      0.082      -0.291       0.018\nFactor2       -0.7022      0.078     -8.994      0.000      -0.856      -0.548\nFactor3        0.3035      0.078      3.888      0.000       0.149       0.458\nFactor4     9.177e-06      0.078      0.000      1.000      -0.154       0.154\nFactor5        0.1719      0.078      2.201      0.029       0.018       0.326\nFactor6       -0.1653      0.078     -2.117      0.036      -0.320      -0.011\nFactor7        0.4130      0.078      5.290      0.000       0.259       0.567\nFactor8       -0.0072      0.078     -0.092      0.927      -0.161       0.147\nFactor9        0.0317      0.078      0.407      0.685      -0.123       0.186\nFactor10       0.0665      0.078      0.852      0.396      -0.088       0.221\n==============================================================================\nOmnibus:                       41.296   Durbin-Watson:                   1.423\nProb(Omnibus):                  0.000   Jarque-Bera (JB):              107.145\nSkew:                          -1.064   Prob(JB):                     5.42e-24\nKurtosis:                       6.442   Cond. No.                         1.00\n==============================================================================\n\nNotes:\n[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n\nCoefficients Table (Factor Analysis Model):\n   Variable  Zero-Order r  Partial r  Semipartial r  Tolerance  VIF\n0   Factor1     -0.103920  -0.143257      -0.103920        1.0  1.0\n1   Factor2     -0.534367  -0.597080      -0.534367        1.0  1.0\n2   Factor3      0.231004   0.306300       0.231004        1.0  1.0\n3   Factor4      0.000007   0.000010       0.000007        1.0  1.0\n4   Factor5      0.130790   0.179228       0.130790        1.0  1.0\n5   Factor6     -0.125772  -0.172560      -0.125772        1.0  1.0\n6   Factor7      0.314284   0.401023       0.314284        1.0  1.0\n7   Factor8     -0.005478  -0.007630      -0.005478        1.0  1.0\n8   Factor9      0.024158   0.033630       0.024158        1.0  1.0\n9  Factor10      0.050594   0.070298       0.050594        1.0  1.0\n\nCondition Index (Factor Analysis Model):\n   Index  Eigenvalue  Condition Index\n0      0     1.00641              1.0\n1      1     1.00641              1.0\n2      2     1.00641              1.0\n3      3     1.00641              1.0\n4      4     1.00641              1.0\n5      5     1.00641              1.0\n6      6     1.00641              1.0\n7      7     1.00641              1.0\n8      8     1.00641              1.0\n9      9     1.00641              1.0\n\n\nAs expected, the collinearity statistics (VIF and Tolerance) for the factor values show that they are uncorrelated (VIF=1, Tolerance=1). The condition indices are also all close to 1, confirming that Factor Analysis successfully mitigates multicollinearity. The coefficient estimates are larger relative to their standard errors compared to the original model, which can lead to more factors being identified as statistically significant.\nIf the R-squared and Adjusted R-squared values for model_factors are close to those of the original model, it indicates that the regression model based on Factor Analysis performs similarly well, while successfully reducing multicollinearity. When all factors are used, the predictive performance metrics are identical to the original OLS model.\n\n\n47.6.7 Factor Analysis: Creating the Regression Model with three Extracted Factors only\n\n47.6.7.1 Setting Up the Regression Model with Reduced Factors\nTo demonstrate the effect of dimensionality reduction, a regression model is created using only the first three extracted factors from Factor Analysis.\n\n# Create a regression model using only the first three factors\ndf_factors_reduced = df_factors.iloc[:, :3] # select the first three factors\nX_model_fa_reduced = sm.add_constant(df_factors_reduced)\nmodel_factors_reduced = sm.OLS(y, X_model_fa_reduced).fit()\nprint(\"\\nRegression on Factor Scores (three factors only):\")\nprint(model_factors_reduced.summary())\n\n# Verify collinearity statistics for reduced FA scores\ncoeffs_table_fa_reduced = compute_coefficients_table(\n    model=model_factors_reduced, X_encoded=X_model_fa_reduced, y=y, vif_table=None\n)\nprint(\"\\nCoefficients Table (Reduced Factor Analysis Model):\")\nprint(coeffs_table_fa_reduced)\n\n# Verify condition indices for reduced FA scores\nX_cond_fa_reduced = copy.deepcopy(df_factors_reduced)\ncondition_index_df_fa_reduced = condition_index(X_cond_fa_reduced)\nprint(\"\\nCondition Index (Reduced Factor Analysis Model):\")\nprint(condition_index_df_fa_reduced)\n\n\nRegression on Factor Scores (three factors only):\n                            OLS Regression Results                            \n==============================================================================\nDep. Variable:               ln_sales   R-squared:                       0.350\nModel:                            OLS   Adj. R-squared:                  0.337\nMethod:                 Least Squares   F-statistic:                     27.43\nDate:                Sat, 04 Apr 2026   Prob (F-statistic):           2.99e-14\nTime:                        10:42:22   Log-Likelihood:                -231.87\nNo. Observations:                 157   AIC:                             471.7\nDf Residuals:                     153   BIC:                             484.0\nDf Model:                           3                                         \nCovariance Type:            nonrobust                                         \n==============================================================================\n                 coef    std err          t      P>|t|      [0.025      0.975]\n------------------------------------------------------------------------------\nconst          3.2959      0.086     38.474      0.000       3.127       3.465\nFactor1       -0.1366      0.086     -1.594      0.113      -0.306       0.033\nFactor2       -0.7022      0.086     -8.197      0.000      -0.871      -0.533\nFactor3        0.3035      0.086      3.543      0.001       0.134       0.473\n==============================================================================\nOmnibus:                       43.992   Durbin-Watson:                   1.418\nProb(Omnibus):                  0.000   Jarque-Bera (JB):              134.618\nSkew:                          -1.068   Prob(JB):                     5.86e-30\nKurtosis:                       7.002   Cond. No.                         1.00\n==============================================================================\n\nNotes:\n[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n\nCoefficients Table (Reduced Factor Analysis Model):\n  Variable  Zero-Order r  Partial r  Semipartial r  Tolerance  VIF\n0  Factor1     -0.103920  -0.127811      -0.103920        1.0  1.0\n1  Factor2     -0.534367  -0.552381      -0.534367        1.0  1.0\n2  Factor3      0.231004   0.275385       0.231004        1.0  1.0\n\nCondition Index (Reduced Factor Analysis Model):\n   Index  Eigenvalue  Condition Index\n0      0     1.00641              1.0\n1      1     1.00641              1.0\n2      2     1.00641              1.0\n\n\nThe collinearity statistics for the reduced factor set continue to show that they are uncorrelated, with VIFs of 1.0 and condition indices close to 1.\n\n\n47.6.7.2 Comparison of Model Performance of the Reduced FA Model and the Full OLS Model\nWhen reducing the number of factors from 10 to 3, the R-squared and Adjusted R-squared values for the Factor Analysis model decrease significantly (from ~0.48 to ~0.35). This indicates a trade-off: while reducing dimensionality successfully addresses multicollinearity, retaining too few factors can lead to information loss and reduced predictive accuracy. Lower MSE and RMSE values still suggest better predictive performance for the full OLS model in this specific comparison, as it retains more information.",
      +    "text": "47.6 Addressing Multicollinearity and Latent Structure with Factor Analysis (FA)\n\n47.6.1 Introduction to Factor Analysis\nFactor Analysis (FA) is a statistical method used to describe variability among observed, correlated variables in terms of a potentially lower number of unobserved variables called factors or latent variables. Unlike PCA, which is primarily a data reduction technique focused on maximizing variance explained, FA assumes that the observed variables are linear combinations of these underlying factors plus an error term. FA’s main goal is to uncover the underlying structure that explains the correlations among observed variables.\n\n\n47.6.2 Determining the Number of Factors for Factor Analysis\nFor Factor Analysis, the number of factors to extract is a crucial decision. A common approach, consistent with the KMO measure, is to consider factors with eigenvalues greater than 1 (Kaiser’s criterion). Factor analysis is then performed, often with a rotation method like Varimax to improve factor interpretability.\n\nanz_fak = 10 # Number of factors to extract, similar to the components in PCA\nn_factors = min(anz_fak, X_encoded.shape[1])\nfa = FactorAnalyzer(n_factors=n_factors, method=\"principal\", rotation=\"varimax\")\nfa.fit(X_encoded) # Fit the Factor Analyzer\nactual_factors = fa.loadings_.shape[1] # Number of factors actually extracted\nprint(f\"actual_factors: {actual_factors}\")\nif actual_factors < n_factors:\n    print(\n        f\"\\nWarning: Only {actual_factors} factors could be extracted \"\n        f\"(requested {n_factors}).\"\n    )\nfactor_columns = [f\"Factor{i+1}\" for i in range(actual_factors)]\n\nactual_factors: 10\n\n\n\n\n47.6.3 Scree Plot for Factor Analysis\nFigure 47.4 shows the eigenvalues for each factor extracted from Factor Analysis. The scree plot helps in determining the number of factors to retain by identifying the “elbow” point where the eigenvalues start to level off, indicating diminishing returns in explained variance.\n\nplt.figure(figsize=(10, 6))\nev_fa, _ = fa.get_eigenvalues()\nplt.plot(range(1, len(ev_fa) + 1), ev_fa, marker='o', linestyle='--')\nplt.title('Scree Plot for Factor Analysis')\nplt.xlabel('Number of Factors')\nplt.ylabel('Eigenvalue')\nplt.grid(True)\nplt.xticks(range(1, len(ev_fa) + 1))\nplt.show()\n\n\n\n\n\n\n\nFigure 47.4: Scree plot for Factor Analysis showing the eigenvalues for each factor.\n\n\n\n\n\n\n\n47.6.4 Factor Loadings\nFactor Loadings indicate how strongly each original variable is correlated with the extracted factors. High absolute values suggest that the variable has a significant influence on, or is strongly associated with, that factor. Loadings help in interpreting the meaning of each underlying factor.\n\n\n\n\n\n\nNoteSummary of Factor Loadings\n\n\n\nFactor loadings are used in Factor Analysis (FA). * Definition: Factor loadings represent the correlation or relationship between the observed variables and the latent factors. * Purpose: They indicate how much each observed variable is explained by a given factor. * Mathematical Representation: In FA, factor loadings are derived from the factor model, where observed variables are expressed as linear combinations of latent factors plus error terms. * Interpretation: High absolute values of factor loadings suggest that the variable is strongly associated with the corresponding factor.\n\n\nSection 47.7.3 explains the difference between loading scores in PCA and factor loadings in FA.\n\n# Print factor loadings with 2 decimals\nfactor_loadings = fa.loadings_\nprint(\"Factor Loadings (rounded to 2 decimals):\\n\", np.round(factor_loadings, 2))\n\n# Create a DataFrame for the factor loadings for better visualization\nfactor_loadings_df = pd.DataFrame(\n    factor_loadings, index=X_encoded.columns, # Original feature names\n    columns=factor_columns # Factor names\n)\n\n# Plot the heatmap for factor loadings\nplt.figure(figsize=(10, 8))\nsns.heatmap(\n    factor_loadings_df, annot=True, # Annotate with values\n    fmt=\".2f\", # Format values to 2 decimals\n    cmap=\"coolwarm\", # Color map\n    cbar=True # Show color bar\n)\nplt.title(\"Factor Loadings Heatmap\")\nplt.xlabel(\"Factors\")\nplt.ylabel(\"Features\")\nplt.tight_layout()\nplt.show()\n\nFactor Loadings (rounded to 2 decimals):\n [[ 0.12  0.97 -0.01  0.06 -0.01  0.08  0.01  0.09  0.01 -0.15]\n [ 0.12  0.6   0.18  0.28  0.61  0.2   0.31  0.09 -0.01  0.02]\n [ 0.06  0.87 -0.01  0.19  0.26  0.16  0.19  0.01 -0.02  0.26]\n [ 0.13  0.07  0.26  0.18  0.04  0.09  0.9   0.03  0.26 -0.01]\n [ 0.14  0.25  0.12  0.77  0.15  0.14  0.51  0.08  0.    0.01]\n [ 0.09  0.12  0.    0.2   0.14  0.11  0.92  0.09 -0.23  0.03]\n [ 0.28  0.43  0.41  0.26  0.15  0.25  0.44  0.48 -0.01 -0.  ]\n [ 0.63  0.32  0.46  0.22  0.11  0.24  0.4   0.12  0.01  0.  ]\n [-0.2  -0.41 -0.46 -0.2  -0.17 -0.66 -0.25 -0.1  -0.   -0.01]\n [ 0.1  -0.06  0.98  0.06  0.05  0.12  0.1   0.05  0.01 -0.  ]]\n\n\n\n\n\n\n\n\n\n\n\n47.6.5 Factor Scores\nThe factor scores are the transformed values of the original variables based on the extracted factors. These scores represent the values of the latent factors for each observation and can be used as new features in regression models, similar to principal components in PCA.\n\nDefinition 47.5 (Factor Scores) A factor score represents the value of a latent factor for a given observation, calculated as a linear combination of the observed variables weighted by the factor score coefficients.\nMathematically, the factor score for the \\(i\\)-th factor and the \\(j\\)-th observation is defined as:\n\\[\nF_{ji} = w_{i1} x_{j1} + w_{i2} x_{j2} + \\cdots + w_{ip} x_{jp} = \\sum_{k=1}^p w_{ik} x_{jk},\n\\]\nwhere\n\n\\(F_{ji}\\) is the factor score for factor \\(i\\) and observation \\(j\\),\n\n\\(w_{ik}\\) is the factor score coefficient for variable \\(k\\) on factor \\(i\\),\n\n\\(x_{jk}\\) is the standardized value of variable \\(k\\) for observation \\(j\\), and\n\\(p\\) is the number of observed variables.\n\n\n\n# Factor scores for each row (shape: [n_samples, actual_factors])\nX_factor_scores = fa.transform(X_encoded)\nprint(f\"X_factor_scores shape: {X_factor_scores.shape}\")\n\n# Adapt the factor column names to the actual factor count\ndf_factors = pd.DataFrame(X_factor_scores, columns=factor_columns)\nprint(f\"df_factors shape: {df_factors.shape}\")\nprint(f\"df_factors head:\\n{df_factors.head()}\")\n\nX_factor_scores shape: (157, 10)\ndf_factors shape: (157, 10)\ndf_factors head:\n    Factor1   Factor2   Factor3   Factor4   Factor5   Factor6   Factor7  \\\n0 -0.647996 -0.310986 -0.395620 -0.514476 -0.753763 -0.171572 -0.691765   \n1 -0.171241  0.352069 -0.579629 -0.677204  0.113380 -0.329903  0.434305   \n2  0.077192  0.050156 -0.595317 -0.396626  0.412052 -0.688322  0.246025   \n3 -0.683708  0.820534 -0.676114 -0.796906 -0.241928  0.602161  1.058645   \n4  0.615152 -0.262258 -0.541357 -0.489288 -1.207964 -0.186946 -0.485740   \n\n    Factor8   Factor9  Factor10  \n0 -0.233725  0.567292 -0.139248  \n1  0.852994 -0.099874  1.690789  \n2  0.941176 -0.209195  2.468886  \n3  1.063771  1.022527 -1.245557  \n4  0.259073  0.073952  0.308099  \n\n\n\n\n47.6.6 Creating the Regression Model with Extracted Factors (from FA)\nA linear regression model is built using all ten extracted factors from Factor Analysis. The expectation is that these factors are uncorrelated, addressing multicollinearity.\n\nX_model_fa = sm.add_constant(df_factors)\nmodel_factors = sm.OLS(y, X_model_fa).fit()\nprint(\"\\nRegression on Factor Scores (all 10 factors):\")\nprint(model_factors.summary())\n\n# Verify collinearity statistics for Factor Analysis scores (VIF and Tolerance)\ncoeffs_table_fa = compute_coefficients_table(\n    model=model_factors, X_encoded=X_model_fa, y=y, vif_table=None\n)\nprint(\"\\nCoefficients Table (Factor Analysis Model):\")\nprint(coeffs_table_fa)\n\n# Verify condition indices\nX_cond_fa = copy.deepcopy(df_factors)\ncondition_index_df_fa = condition_index(X_cond_fa)\nprint(\"\\nCondition Index (Factor Analysis Model):\")\nprint(condition_index_df_fa)\n\n\nRegression on Factor Scores (all 10 factors):\n                            OLS Regression Results                            \n==============================================================================\nDep. Variable:               ln_sales   R-squared:                       0.485\nModel:                            OLS   Adj. R-squared:                  0.449\nMethod:                 Least Squares   F-statistic:                     13.73\nDate:                Sat, 11 Apr 2026   Prob (F-statistic):           7.69e-17\nTime:                        22:39:02   Log-Likelihood:                -213.62\nNo. Observations:                 157   AIC:                             449.2\nDf Residuals:                     146   BIC:                             482.9\nDf Model:                          10                                         \nCovariance Type:            nonrobust                                         \n==============================================================================\n                 coef    std err          t      P>|t|      [0.025      0.975]\n------------------------------------------------------------------------------\nconst          3.2959      0.078     42.215      0.000       3.142       3.450\nFactor1       -0.1366      0.078     -1.749      0.082      -0.291       0.018\nFactor2       -0.7022      0.078     -8.994      0.000      -0.856      -0.548\nFactor3        0.3035      0.078      3.888      0.000       0.149       0.458\nFactor4     9.177e-06      0.078      0.000      1.000      -0.154       0.154\nFactor5        0.1719      0.078      2.201      0.029       0.018       0.326\nFactor6       -0.1653      0.078     -2.117      0.036      -0.320      -0.011\nFactor7        0.4130      0.078      5.290      0.000       0.259       0.567\nFactor8       -0.0072      0.078     -0.092      0.927      -0.161       0.147\nFactor9        0.0317      0.078      0.407      0.685      -0.123       0.186\nFactor10       0.0665      0.078      0.852      0.396      -0.088       0.221\n==============================================================================\nOmnibus:                       41.296   Durbin-Watson:                   1.423\nProb(Omnibus):                  0.000   Jarque-Bera (JB):              107.145\nSkew:                          -1.064   Prob(JB):                     5.42e-24\nKurtosis:                       6.442   Cond. No.                         1.00\n==============================================================================\n\nNotes:\n[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n\nCoefficients Table (Factor Analysis Model):\n   Variable  Zero-Order r  Partial r  Semipartial r  Tolerance  VIF\n0   Factor1     -0.103920  -0.143257      -0.103920        1.0  1.0\n1   Factor2     -0.534367  -0.597080      -0.534367        1.0  1.0\n2   Factor3      0.231004   0.306300       0.231004        1.0  1.0\n3   Factor4      0.000007   0.000010       0.000007        1.0  1.0\n4   Factor5      0.130790   0.179228       0.130790        1.0  1.0\n5   Factor6     -0.125772  -0.172560      -0.125772        1.0  1.0\n6   Factor7      0.314284   0.401023       0.314284        1.0  1.0\n7   Factor8     -0.005478  -0.007630      -0.005478        1.0  1.0\n8   Factor9      0.024158   0.033630       0.024158        1.0  1.0\n9  Factor10      0.050594   0.070298       0.050594        1.0  1.0\n\nCondition Index (Factor Analysis Model):\n   Index  Eigenvalue  Condition Index\n0      0     1.00641              1.0\n1      1     1.00641              1.0\n2      2     1.00641              1.0\n3      3     1.00641              1.0\n4      4     1.00641              1.0\n5      5     1.00641              1.0\n6      6     1.00641              1.0\n7      7     1.00641              1.0\n8      8     1.00641              1.0\n9      9     1.00641              1.0\n\n\nAs expected, the collinearity statistics (VIF and Tolerance) for the factor values show that they are uncorrelated (VIF=1, Tolerance=1). The condition indices are also all close to 1, confirming that Factor Analysis successfully mitigates multicollinearity. The coefficient estimates are larger relative to their standard errors compared to the original model, which can lead to more factors being identified as statistically significant.\nIf the R-squared and Adjusted R-squared values for model_factors are close to those of the original model, it indicates that the regression model based on Factor Analysis performs similarly well, while successfully reducing multicollinearity. When all factors are used, the predictive performance metrics are identical to the original OLS model.\n\n\n47.6.7 Factor Analysis: Creating the Regression Model with three Extracted Factors only\n\n47.6.7.1 Setting Up the Regression Model with Reduced Factors\nTo demonstrate the effect of dimensionality reduction, a regression model is created using only the first three extracted factors from Factor Analysis.\n\n# Create a regression model using only the first three factors\ndf_factors_reduced = df_factors.iloc[:, :3] # select the first three factors\nX_model_fa_reduced = sm.add_constant(df_factors_reduced)\nmodel_factors_reduced = sm.OLS(y, X_model_fa_reduced).fit()\nprint(\"\\nRegression on Factor Scores (three factors only):\")\nprint(model_factors_reduced.summary())\n\n# Verify collinearity statistics for reduced FA scores\ncoeffs_table_fa_reduced = compute_coefficients_table(\n    model=model_factors_reduced, X_encoded=X_model_fa_reduced, y=y, vif_table=None\n)\nprint(\"\\nCoefficients Table (Reduced Factor Analysis Model):\")\nprint(coeffs_table_fa_reduced)\n\n# Verify condition indices for reduced FA scores\nX_cond_fa_reduced = copy.deepcopy(df_factors_reduced)\ncondition_index_df_fa_reduced = condition_index(X_cond_fa_reduced)\nprint(\"\\nCondition Index (Reduced Factor Analysis Model):\")\nprint(condition_index_df_fa_reduced)\n\n\nRegression on Factor Scores (three factors only):\n                            OLS Regression Results                            \n==============================================================================\nDep. Variable:               ln_sales   R-squared:                       0.350\nModel:                            OLS   Adj. R-squared:                  0.337\nMethod:                 Least Squares   F-statistic:                     27.43\nDate:                Sat, 11 Apr 2026   Prob (F-statistic):           2.99e-14\nTime:                        22:39:02   Log-Likelihood:                -231.87\nNo. Observations:                 157   AIC:                             471.7\nDf Residuals:                     153   BIC:                             484.0\nDf Model:                           3                                         \nCovariance Type:            nonrobust                                         \n==============================================================================\n                 coef    std err          t      P>|t|      [0.025      0.975]\n------------------------------------------------------------------------------\nconst          3.2959      0.086     38.474      0.000       3.127       3.465\nFactor1       -0.1366      0.086     -1.594      0.113      -0.306       0.033\nFactor2       -0.7022      0.086     -8.197      0.000      -0.871      -0.533\nFactor3        0.3035      0.086      3.543      0.001       0.134       0.473\n==============================================================================\nOmnibus:                       43.992   Durbin-Watson:                   1.418\nProb(Omnibus):                  0.000   Jarque-Bera (JB):              134.618\nSkew:                          -1.068   Prob(JB):                     5.86e-30\nKurtosis:                       7.002   Cond. No.                         1.00\n==============================================================================\n\nNotes:\n[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n\nCoefficients Table (Reduced Factor Analysis Model):\n  Variable  Zero-Order r  Partial r  Semipartial r  Tolerance  VIF\n0  Factor1     -0.103920  -0.127811      -0.103920        1.0  1.0\n1  Factor2     -0.534367  -0.552381      -0.534367        1.0  1.0\n2  Factor3      0.231004   0.275385       0.231004        1.0  1.0\n\nCondition Index (Reduced Factor Analysis Model):\n   Index  Eigenvalue  Condition Index\n0      0     1.00641              1.0\n1      1     1.00641              1.0\n2      2     1.00641              1.0\n\n\nThe collinearity statistics for the reduced factor set continue to show that they are uncorrelated, with VIFs of 1.0 and condition indices close to 1.\n\n\n47.6.7.2 Comparison of Model Performance of the Reduced FA Model and the Full OLS Model\nWhen reducing the number of factors from 10 to 3, the R-squared and Adjusted R-squared values for the Factor Analysis model decrease significantly (from ~0.48 to ~0.35). This indicates a trade-off: while reducing dimensionality successfully addresses multicollinearity, retaining too few factors can lead to information loss and reduced predictive accuracy. Lower MSE and RMSE values still suggest better predictive performance for the full OLS model in this specific comparison, as it retains more information.",
           "crumbs": [
             "Data-Driven Modeling and Optimization",
             "47  Addressing Multicollinearity: Principle Component Analysis (PCA) and Factor Analysis (FA)"
      @@ -5250,7 +5250,7 @@
           "href": "100_ddmo_regression.html#assessing-confounding-effects-in-multiple-regression",
           "title": "48  Regression",
           "section": "48.5 Assessing Confounding Effects in Multiple Regression",
      -    "text": "48.5 Assessing Confounding Effects in Multiple Regression\nConfounding is a bias introduced by the imbalanced distribution of extraneous risk factors among comparison groups (Wang 2007). spotoptim provides tools for assessing confounding effects in multiple regression models.\n\nExample 48.1 (Assessing Confounding Effects in Multiple Regression with spotoptim) Consider the following data generation function generate_data and the fit_ols_model function to fit an ordinary least squares (OLS) regression model.\n\nimport numpy as np\nimport pandas as pd\nimport statsmodels.formula.api as smf\n\ndef generate_data(n_samples=100, b0=0, b1=-1, b2=0, b3=10, b12=0, b13=0, b23=0, b123=0, noise_std=1) -> pd.DataFrame:\n    \"\"\"\n    Generate data for the linear formula y ~ b0 + b1*x1 + b2*x2 + b3*x3 + b12*x1*x2 + b13*x1*x3 + b23*x2*x3 + b123*x1*x2*x3.\n\n    Args:\n        n_samples (int): Number of samples to generate.\n        b0 (float): Coefficient for the intercept.\n        b1 (float): Coefficient for x1.\n        b2 (float): Coefficient for x2.\n        b3 (float): Coefficient for x3.\n        b12 (float): Coefficient for the interaction term x1*x2.\n        b13 (float): Coefficient for the interaction term x1*x3.\n        b23 (float): Coefficient for the interaction term x2*x3.\n        b123 (float): Coefficient for the interaction term x1*x2*x3.\n        noise_std (float): Standard deviation of the Gaussian noise added to y.\n\n    Returns:\n        pd.DataFrame: A DataFrame containing the generated data with columns ['x1', 'x2', 'x3', 'y'].\n    \"\"\"\n    np.random.seed(42)  # For reproducibility\n    x1 = np.random.uniform(0, 1, n_samples)\n    x2 = np.random.uniform(0, 1, n_samples)\n    x3 = np.random.uniform(0, 1, n_samples)\n    \n    y = (b0 + b1*x1 + b2*x2 + b3*x3 + b12*x1*x2 + b13*x1*x3 + b23*x2*x3 + b123*x1*x2*x3 +\n         np.random.normal(0, noise_std, n_samples))\n    \n    data = pd.DataFrame({'y': y, 'x1': x1, 'x2': x2, 'x3': x3})\n    return data\n\ndef fit_ols_model(formula, data) -> dict:\n    \"\"\"\n    Fit an OLS model using the given formula and data, and print the results.\n\n    Args:\n        formula (str): The formula for the OLS model.\n        data (pd.DataFrame): The data frame containing the variables.\n\n    Returns:\n        dict: A dictionary containing the p-values, estimates, confidence intervals, and AIC value.\n    \"\"\"\n    mod_0 = smf.ols(formula=formula, data=data).fit()\n    p = mod_0.pvalues.iloc[1]\n    estimate = mod_0.params.iloc[1]\n    conf_int = mod_0.conf_int().iloc[1]\n    aic_value = mod_0.aic\n\n    print(f\"p-values: {p}\")\n    print(f\"estimate: {estimate}\")\n    print(f\"conf_int: {conf_int}\")\n    print(f\"aic: {aic_value}\")\n\nThese functions can be used to generate data and fit an OLS model. Here we use the model \\[\ny = f(x_1, x_2, x_3) + \\epsilon =  x_1 + 10 x_3 + \\epsilon.\n\\] We set up the basic model \\(y_0 = f_0(x_1)\\) and analyze how the model fit changes when adding \\(x_2\\) and \\(x_3\\) to the model. If the \\(p\\)-values are decreasing by adding a variable, this indicates that the variable is relevant for the model. Similiarly, if the \\(p\\)-values are increasing by removing a variable, this indicates that the variable is not relevant for the model.\n\ndata = generate_data(b0=0, b1=1, b2=0, b3=10, b12=0, b13=0, b23=0, b123=0, noise_std=1)\nfit_ols_model(\"y ~ x1\", data)\nfit_ols_model(\"y ~ x1 + x2\", data)\nfit_ols_model(\"y ~ x1 + x3\", data)\nfit_ols_model(\"y ~ x1 + x2 + x3\", data)\n\np-values: 0.34343741859526244\nestimate: 1.025306391110114\nconf_int: 0   -1.111963\n1    3.162575\nName: x1, dtype: float64\naic: 517.6397392012537\np-values: 0.36375118507784565\nestimate: 0.9810502049698092\nconf_int: 0   -1.152698\n1    3.114798\nName: x1, dtype: float64\naic: 518.1426513151566\np-values: 4.946760674421827e-05\nestimate: 1.407792346942117\nconf_int: 0    0.750106\n1    2.065479\nName: x1, dtype: float64\naic: 282.73524524532\np-values: 4.849840959643549e-05\nestimate: 1.4159292625696243\nconf_int: 0    0.755494\n1    2.076364\nName: x1, dtype: float64\naic: 284.34665447613634\n\n\nThe function fit_all_lm() simplifies this procedure. It can be used to fit all possible linear models with the given data and print the results in a systematic way for various combinations of variables.\n\nfrom spotoptim.utils.stats import fit_all_lm, plot_coeff_vs_pvals, plot_coeff_vs_pvals_by_included\nres = fit_all_lm(\"y ~ x1\", [\"x2\", \"x3\"], data)\nprint(res[\"estimate\"])\n\nThe basic model is: y ~ x1\nThe following features will be used for fitting the basic model: Index(['x3', 'x2', 'x1', 'y'], dtype='str')\np-values: 0.34343741859526244\nestimate: 1.025306391110114\nconf_int: 0   -1.111963\n1    3.162575\nName: x1, dtype: float64\naic: 517.6397392012537\nCombinations: [('x2',), ('x3',), ('x2', 'x3')]\n  variables  estimate  conf_low  conf_high         p         aic    n\n0     basic  1.025306 -1.111963   3.162575  0.343437  517.639739  100\n1        x2  0.981050 -1.152698   3.114798  0.363751  518.142651  100\n2        x3  1.407792  0.750106   2.065479  0.000049  282.735245  100\n3    x2, x3  1.415929  0.755494   2.076364  0.000048  284.346654  100\n\n\nInterpreting the results, we can see that the \\(p\\)-values decrease when adding \\(x_3\\) (as well as both \\(x_2\\) and $x_3) to the model, indicating that \\(x_3\\) is relevant for the model. Adding only \\(x_2\\) does not significantly improve the model fit.\nIn addition to the textural output, the function plot_coeff_vs_pvals_by_included() can be used to visualize the coefficients and p-values of the fitted models.\n\nplot_coeff_vs_pvals_by_included(res)\n\n\n\n\n\n\n\nFigure 48.1: Coefficients vs. p-values for different models. The right plot indicates that x3 should be included in the model, whereas the left plot shows that x2 is not relevant.\n\n\n\n\n\nFigure 48.1 shows the coefficients and p-values for different models. Because \\(y\\) depends on \\(x1\\) and \\(x3\\), the p-value much smaller if \\(x3\\) is included in the model as can be seen in the right plot in Figure 48.1. The left plot shows that including \\(x2\\) in the model does not significantly improve the model fit.",
      +    "text": "48.5 Assessing Confounding Effects in Multiple Regression\nConfounding is a bias introduced by the imbalanced distribution of extraneous risk factors among comparison groups (Wang 2007). spotoptim provides tools for assessing confounding effects in multiple regression models.\n\nExample 48.1 (Assessing Confounding Effects in Multiple Regression with spotoptim) Consider the following data generation function generate_data and the fit_ols_model function to fit an ordinary least squares (OLS) regression model.\n\nimport numpy as np\nimport pandas as pd\nimport statsmodels.formula.api as smf\n\ndef generate_data(n_samples=100, b0=0, b1=-1, b2=0, b3=10, b12=0, b13=0, b23=0, b123=0, noise_std=1) -> pd.DataFrame:\n    \"\"\"\n    Generate data for the linear formula y ~ b0 + b1*x1 + b2*x2 + b3*x3 + b12*x1*x2 + b13*x1*x3 + b23*x2*x3 + b123*x1*x2*x3.\n\n    Args:\n        n_samples (int): Number of samples to generate.\n        b0 (float): Coefficient for the intercept.\n        b1 (float): Coefficient for x1.\n        b2 (float): Coefficient for x2.\n        b3 (float): Coefficient for x3.\n        b12 (float): Coefficient for the interaction term x1*x2.\n        b13 (float): Coefficient for the interaction term x1*x3.\n        b23 (float): Coefficient for the interaction term x2*x3.\n        b123 (float): Coefficient for the interaction term x1*x2*x3.\n        noise_std (float): Standard deviation of the Gaussian noise added to y.\n\n    Returns:\n        pd.DataFrame: A DataFrame containing the generated data with columns ['x1', 'x2', 'x3', 'y'].\n    \"\"\"\n    np.random.seed(42)  # For reproducibility\n    x1 = np.random.uniform(0, 1, n_samples)\n    x2 = np.random.uniform(0, 1, n_samples)\n    x3 = np.random.uniform(0, 1, n_samples)\n    \n    y = (b0 + b1*x1 + b2*x2 + b3*x3 + b12*x1*x2 + b13*x1*x3 + b23*x2*x3 + b123*x1*x2*x3 +\n         np.random.normal(0, noise_std, n_samples))\n    \n    data = pd.DataFrame({'y': y, 'x1': x1, 'x2': x2, 'x3': x3})\n    return data\n\ndef fit_ols_model(formula, data) -> dict:\n    \"\"\"\n    Fit an OLS model using the given formula and data, and print the results.\n\n    Args:\n        formula (str): The formula for the OLS model.\n        data (pd.DataFrame): The data frame containing the variables.\n\n    Returns:\n        dict: A dictionary containing the p-values, estimates, confidence intervals, and AIC value.\n    \"\"\"\n    mod_0 = smf.ols(formula=formula, data=data).fit()\n    p = mod_0.pvalues.iloc[1]\n    estimate = mod_0.params.iloc[1]\n    conf_int = mod_0.conf_int().iloc[1]\n    aic_value = mod_0.aic\n\n    print(f\"p-values: {p}\")\n    print(f\"estimate: {estimate}\")\n    print(f\"conf_int: {conf_int}\")\n    print(f\"aic: {aic_value}\")\n\nThese functions can be used to generate data and fit an OLS model. Here we use the model \\[\ny = f(x_1, x_2, x_3) + \\epsilon =  x_1 + 10 x_3 + \\epsilon.\n\\] We set up the basic model \\(y_0 = f_0(x_1)\\) and analyze how the model fit changes when adding \\(x_2\\) and \\(x_3\\) to the model. If the \\(p\\)-values are decreasing by adding a variable, this indicates that the variable is relevant for the model. Similiarly, if the \\(p\\)-values are increasing by removing a variable, this indicates that the variable is not relevant for the model.\n\ndata = generate_data(b0=0, b1=1, b2=0, b3=10, b12=0, b13=0, b23=0, b123=0, noise_std=1)\nfit_ols_model(\"y ~ x1\", data)\nfit_ols_model(\"y ~ x1 + x2\", data)\nfit_ols_model(\"y ~ x1 + x3\", data)\nfit_ols_model(\"y ~ x1 + x2 + x3\", data)\n\np-values: 0.34343741859526244\nestimate: 1.025306391110114\nconf_int: 0   -1.111963\n1    3.162575\nName: x1, dtype: float64\naic: 517.6397392012537\np-values: 0.36375118507784565\nestimate: 0.9810502049698092\nconf_int: 0   -1.152698\n1    3.114798\nName: x1, dtype: float64\naic: 518.1426513151566\np-values: 4.946760674421827e-05\nestimate: 1.407792346942117\nconf_int: 0    0.750106\n1    2.065479\nName: x1, dtype: float64\naic: 282.73524524532\np-values: 4.849840959643549e-05\nestimate: 1.4159292625696243\nconf_int: 0    0.755494\n1    2.076364\nName: x1, dtype: float64\naic: 284.34665447613634\n\n\nThe function fit_all_lm() simplifies this procedure. It can be used to fit all possible linear models with the given data and print the results in a systematic way for various combinations of variables.\n\nfrom spotoptim.utils.stats import fit_all_lm, plot_coeff_vs_pvals, plot_coeff_vs_pvals_by_included\nres = fit_all_lm(\"y ~ x1\", [\"x2\", \"x3\"], data)\nprint(res[\"estimate\"])\n\nThe basic model is: y ~ x1\nThe following features will be used for fitting the basic model: Index(['x2', 'x1', 'y', 'x3'], dtype='str')\np-values: 0.34343741859526244\nestimate: 1.025306391110114\nconf_int: 0   -1.111963\n1    3.162575\nName: x1, dtype: float64\naic: 517.6397392012537\nCombinations: [('x2',), ('x3',), ('x2', 'x3')]\n  variables  estimate  conf_low  conf_high         p         aic    n\n0     basic  1.025306 -1.111963   3.162575  0.343437  517.639739  100\n1        x2  0.981050 -1.152698   3.114798  0.363751  518.142651  100\n2        x3  1.407792  0.750106   2.065479  0.000049  282.735245  100\n3    x2, x3  1.415929  0.755494   2.076364  0.000048  284.346654  100\n\n\nInterpreting the results, we can see that the \\(p\\)-values decrease when adding \\(x_3\\) (as well as both \\(x_2\\) and $x_3) to the model, indicating that \\(x_3\\) is relevant for the model. Adding only \\(x_2\\) does not significantly improve the model fit.\nIn addition to the textural output, the function plot_coeff_vs_pvals_by_included() can be used to visualize the coefficients and p-values of the fitted models.\n\nplot_coeff_vs_pvals_by_included(res)\n\n\n\n\n\n\n\nFigure 48.1: Coefficients vs. p-values for different models. The right plot indicates that x3 should be included in the model, whereas the left plot shows that x2 is not relevant.\n\n\n\n\n\nFigure 48.1 shows the coefficients and p-values for different models. Because \\(y\\) depends on \\(x1\\) and \\(x3\\), the p-value much smaller if \\(x3\\) is included in the model as can be seen in the right plot in Figure 48.1. The left plot shows that including \\(x2\\) in the model does not significantly improve the model fit.",
           "crumbs": [
             "Data-Driven Modeling and Optimization",
             "48  Regression"
      diff --git a/docs/spot_step_by_step.html b/docs/spot_step_by_step.html
      index 3ca1fc22..06bd0bcf 100644
      --- a/docs/spot_step_by_step.html
      +++ b/docs/spot_step_by_step.html
      @@ -907,7 +907,7 @@ 

      34 Setup and Tes \] where typically (a = 1) and (b = 100). The generalized form for n dimensions is: \[ f(X) = \sum_{i=1}^{N-1} \left[100 \cdot (x_{i+1} - x_i^2)^2 + (1 - x_i)^2\right] \] The documentation can be found here: DOC

      -
      +
      import numpy as np
       import time
       from spotoptim import SpotOptim
      @@ -916,14 +916,14 @@ 

      34 Setup and Tes warnings.filterwarnings('ignore')

      Set random seed for reproducibility:

      -
      +
      np.random.seed(42)

      Based on the standard Rosenbrock function, we define two variants:

      1. the noisy Rosenbrock function:
      -
      +
      def rosenbrock_noisy(X, noise_std=0.1):
           """
           Rosenbrock with Gaussian noise for testing noisy optimization.
      @@ -934,7 +934,7 @@ 

      34 Setup and Tes return base_values + noise

      and 2. the Rosenbrock function with occasional failures:

      -
      +
      def rosenbrock_with_failures(X, failure_prob=0.15):
           """
           Rosenbrock that occasionally returns NaN to simulate evaluation failures.
      @@ -1001,7 +1001,7 @@ 

      +
      # Create optimizer
       opt = SpotOptim(
           fun=rosenbrock,
      @@ -1029,7 +1029,7 @@ 

      +
      X0 = opt.get_initial_design(X0=None)
       
       print(f"Generated {len(X0)} initial design points using Latin Hypercube Sampling")
      @@ -1050,7 +1050,7 @@ 

      +
      X0_original = opt.inverse_transform_X(X0)
       print(f"\nFirst 3 points (original scale):")
       print(X0_original[:3])
      @@ -1072,7 +1072,7 @@ 

      +
      import matplotlib.pyplot as plt
       plt.figure(figsize=(6, 6))
       plt.scatter(X0_original[:, 0], X0_original[:, 1], c='blue', label='Initial Design Points')
      @@ -1103,7 +1103,7 @@ 

      +
      X0_curated = opt.curate_initial_design(X0)
       print(f"Curated design shape: {X0_curated.shape}")
       print(f"Unique points: {len(np.unique(X0_curated, axis=0))}")
      @@ -1128,7 +1128,7 @@ 

      +
      X0_original = opt.inverse_transform_X(X0_curated)
       y0 = opt.evaluate_function(X0_curated)
       
      @@ -1169,7 +1169,7 @@ 

      +
      n_before = len(y0)
       X0_clean, y0_clean, n_evaluated = opt.rm_initial_design_NA_values(X0_curated, y0)
       
      @@ -1202,7 +1202,7 @@ 

      +
      try:
           opt.check_size_initial_design(y0_clean, n_evaluated)
           print(f"x Validation passed: {len(y0_clean)} valid points available")
      @@ -1230,7 +1230,7 @@ 

      <
      • Initialize storage (as done in optimize())
      -
      +
      # Initialize storage and statistics using the new init_storage() method
       opt.init_storage(X0_clean, y0_clean)
       
      @@ -1258,7 +1258,7 @@ 

      <
    12. Store in mean_X, mean_y, and var_y
    -
    +
    print(f"\nStatistics updated:")
     if opt.repeats_initial > 1 or opt.repeats_surrogate > 1:
         print(f"  - mean_X: {opt.mean_X.shape}")
    @@ -1293,7 +1293,7 @@ 

    +
    opt.get_best_xy_initial_design()
     
     print(f"Best point found: {opt.best_x_}")
    @@ -1316,7 +1316,7 @@ 

    35.11 Illustration of the Initial Design Phase Results

    To visualize the results, we generate a contour plot with contour lines of the objective function and mark the best point. The other evaluated points are shown as well:

    -
    +
    # Create a grid of points for contour plotting
     x = np.linspace(-2, 2, 400)
     y = np.linspace(-2, 2, 400)
    @@ -1379,7 +1379,7 @@ 

  • Fit surrogate model using surrogate.fit(X, y)
  • Surrogate learns the function landscape and uncertainty
  • -
    +
    opt.fit_scheduler()
     
     print(f"Surrogate fitted with {len(opt.y_)} points")
    @@ -1409,7 +1409,7 @@ 

    +
    opt.plot_surrogate()
    @@ -1431,7 +1431,7 @@

    Select the most promising points based on the criteria.
  • Update the surrogate model with the selected points.
  • -
    +
    X_ocba = opt.apply_ocba()
     if X_ocba is not None:
         print(f"OCBA selected {X_ocba.shape[0]} points for re-evaluation")
    @@ -1456,7 +1456,7 @@ 

    +
    X_test = np.array([[0.5, 0.5], [1.0, 1.0], [-0.5, 0.5]])
     mu, sigma = opt._predict_with_uncertainty(X_test)
     
    @@ -1489,7 +1489,7 @@ 

    +
    x_next = opt.suggest_next_infill_point()
     print(f"Next point suggested: {x_next}")
    @@ -1497,7 +1497,7 @@

    +
    mu_next, sigma_next = opt._predict_with_uncertainty(x_next.reshape(1, -1))
     print(f"\nPrediction at suggested point:")
     print(f"  μ(x_next) = {mu_next[0]:.4f}")
    @@ -1522,7 +1522,7 @@ 

    36.4.1 Method: _acquisition_function()

    Compute acquisition function value to guide search. The acquisition function is used as an objective function by the optimizer on the surrogate, e.g., differential evolution. It determines where to sample next.

    -
    +
    # Evaluate acquisition at test points
     print(f"Acquisition type: {opt.acquisition} (Expected Improvement)")
     print(f"\nAcquisition values at test points:")
    @@ -1570,7 +1570,7 @@ 

    +
    x_next_repeated = opt.update_repeats_infill_points(x_next)
     print(f"Shape before repeating: {x_next.shape}")
     print(f"Shape after repeating: {x_next_repeated.shape}")
    @@ -1586,7 +1586,7 @@ 

    36.6 Append OCBA Points to Infill Points

    Combines OCBA selected points with the next suggested point for evaluation.

    -
    +
    if X_ocba is not None:
         x_next_repeated = np.append(X_ocba, x_next_repeated, axis=0)
    @@ -1596,7 +1596,7 @@

    36.7.1 Method: evaluate_function() (again)

    Evaluate the objective function at the suggested points.

    -
    +
    x_next_2d = x_next.reshape(1, -1)
     y_next = opt.evaluate_function(x_next_2d)
     
    @@ -1626,7 +1626,7 @@ 

    +
    x_clean, y_clean = opt._handle_NA_new_points(x_next_2d, y_next)
     
     if x_clean is not None:
    @@ -1656,7 +1656,7 @@ 

  • Calculate success rate as ratio of valid evaluations to total evaluated
  • -
    +
    opt.update_success_rate(y0_clean)
     print(f"\nSuccess rate updated: {opt.success_rate:.2%} (valid evaluations / total evaluations)")
    @@ -1671,7 +1671,7 @@

    36.10.1 Internal updates

    Add new evaluations to storage.

    -
    +
    opt.update_storage(x_next_repeated, y_next)

    @@ -1693,7 +1693,7 @@

    +
    opt.update_stats()
     print(f"Basic statistics:")
     print(f"  min_y: {opt.min_y:.6f}")
    @@ -1723,7 +1723,7 @@ 

    36.12.1 Method: _update_best_main_loop()

    Update the best solution if improvement found.

    -
    +
    best_before = opt.best_y_
     opt._update_best_main_loop(x_clean, y_clean, start_time=time.time())
     
    @@ -1756,7 +1756,7 @@ 

    37 Complete Optimization Example

    Now let’s run a complete optimization to see all steps in action:

    -
    +
    # Create fresh optimizer
     opt_complete = SpotOptim(
         fun=rosenbrock,
    @@ -1807,7 +1807,7 @@ 

    38 Noisy Functio

    When dealing with noisy objective functions, SpotOptim can evaluate each point multiple times and track statistics.

    38.1 Configuration for Noisy Functions

    -
    +
    NOISE_STD = 10.0
     print("\n" + "="*70)
     print("NOISY FUNCTION OPTIMIZATION")
    @@ -1851,7 +1851,7 @@ 

    38.2.1 Modified Initial Design

    With repeats_initial > 1:

    -
    +
    result_noisy = opt_noisy.optimize()
     
     print(f"\nInitial design with repeats:")
    @@ -1905,7 +1905,7 @@ 

    38.2.2 Update Statistics Method

    -
    +
    print(f"After optimization:")
     print(f"  Total evaluations: {len(opt_noisy.y_)}")
     print(f"  Unique points: {len(opt_noisy.mean_X)}")
    @@ -1935,7 +1935,7 @@ 

    39 Optimal Compu

    OCBA intelligently allocates additional evaluations to distinguish between competing solutions.

    39.1 OCBA Configuration

    -
    +
    print("\n" + "="*70)
     print("OPTIMAL COMPUTING BUDGET ALLOCATION (OCBA)")
     print("="*70)
    @@ -1972,7 +1972,7 @@ 

    39.2 OCBA Method

    39.2.1 Method: apply_ocba()

    -
    +
    result_ocba = opt_ocba.optimize()
     
     print(f"\nOCBA applied during optimization")
    @@ -2018,7 +2018,7 @@ 

    40 Handling Func

    SpotOptim robustly handles functions that occasionally fail (return NaN/inf).

    40.1 Example with Failures

    -
    +
    opt_failures = SpotOptim(
         fun=rosenbrock_with_failures,
         bounds=[(-2, 2), (-2, 2)],
    @@ -2060,7 +2060,7 @@ 

    <

    40.2 Failure Handling in Initial Design

    40.2.1 Method: rm_initial_design_NA_values()

    -
    +
    print("Initial design failure handling:")
     print("  1. Identify NaN/inf values")
     print("  2. Remove invalid points entirely")
    @@ -2092,7 +2092,7 @@ 

    40.3 Failure Handling in Sequential Phase

    40.3.1 Method: _handle_NA_new_points()

    -
    +
    print("Sequential phase failure handling:")
     print("  1. Apply penalty to NaN/inf values")
     print("     - Penalty = max(history) + 3×std(history)")
    @@ -2129,7 +2129,7 @@ 

    40.4.1 Method: apply_penalty_NA()

    Let’s demonstrate penalty calculation:

    -
    +
    # Simulate historical values
     y_history_sim = np.array([10.0, 15.0, 8.0, 12.0, 20.0, 9.0])
     y_new_sim = np.array([7.0, np.nan, 11.0, np.inf])
    @@ -2258,7 +2258,7 @@ 

    42 Termination Conditions

    42.1 Method: determine_termination()

    -
    +
    print("\n" + "="*70)
     print("TERMINATION CONDITIONS")
     print("="*70)
    @@ -2301,7 +2301,7 @@ 

    43 Performance Comparison on Noisy Functions

    Let’s compare standard, noisy, and noisy+OCBA optimization:

    -
    +
    print("\n" + "="*70)
     print("PERFORMANCE COMPARISON")
     print("="*70)
    diff --git a/docs/spot_step_by_step_files/figure-pdf/cell-17-output-1.pdf b/docs/spot_step_by_step_files/figure-pdf/cell-17-output-1.pdf
    index 0db496d5..9019fa85 100644
    Binary files a/docs/spot_step_by_step_files/figure-pdf/cell-17-output-1.pdf and b/docs/spot_step_by_step_files/figure-pdf/cell-17-output-1.pdf differ
    diff --git a/docs/spot_step_by_step_files/figure-pdf/cell-19-output-1.pdf b/docs/spot_step_by_step_files/figure-pdf/cell-19-output-1.pdf
    index efae6b7d..9a8c3db7 100644
    Binary files a/docs/spot_step_by_step_files/figure-pdf/cell-19-output-1.pdf and b/docs/spot_step_by_step_files/figure-pdf/cell-19-output-1.pdf differ
    diff --git a/docs/spot_step_by_step_files/figure-pdf/cell-9-output-1.pdf b/docs/spot_step_by_step_files/figure-pdf/cell-9-output-1.pdf
    index 91b8edaa..9880aa1b 100644
    Binary files a/docs/spot_step_by_step_files/figure-pdf/cell-9-output-1.pdf and b/docs/spot_step_by_step_files/figure-pdf/cell-9-output-1.pdf differ
    diff --git a/docs/spotoptim_examples.html b/docs/spotoptim_examples.html
    index e0167645..548ae6ab 100644
    --- a/docs/spotoptim_examples.html
    +++ b/docs/spotoptim_examples.html
    @@ -744,7 +744,7 @@ 

    3 

    3.1 Setup: Import Required Packages

    First, we import all necessary packages for the examples.

    -
    +
    # Core imports
     import numpy as np
     import time
    @@ -775,7 +775,7 @@ 

    +
    # Define a simple quadratic function
     def sphere(X):
         """Sphere function: f(x) = sum(x^2)"""
    @@ -802,37 +802,37 @@ 

    print(f"Message: {result.message}")

    TensorBoard logging disabled
    -Initial best: f(x) = 1.463558
    -Iter 1 | Best: 1.092223 | Rate: 1.00 | Evals: 30.0%
    -Iter 2 | Best: 0.222011 | Rate: 1.00 | Evals: 35.0%
    -Iter 3 | Best: 0.015843 | Rate: 1.00 | Evals: 40.0%
    -Iter 4 | Best: 0.003055 | Rate: 1.00 | Evals: 45.0%
    -Iter 5 | Best: 0.000562 | Rate: 1.00 | Evals: 50.0%
    -Iter 6 | Best: 0.000161 | Rate: 1.00 | Evals: 55.0%
    -Iter 7 | Best: 0.000019 | Rate: 1.00 | Evals: 60.0%
    -Iter 8 | Best: 0.000008 | Rate: 1.00 | Evals: 65.0%
    -Iter 9 | Best: 0.000006 | Rate: 1.00 | Evals: 70.0%
    -Iter 10 | Best: 0.000006 | Rate: 1.00 | Evals: 75.0%
    -Iter 11 | Best: 0.000005 | Rate: 1.00 | Evals: 80.0%
    -Iter 12 | Best: 0.000005 | Rate: 1.00 | Evals: 85.0%
    -Iter 13 | Best: 0.000005 | Rate: 1.00 | Evals: 90.0%
    -Iter 14 | Best: 0.000005 | Curr: 0.000005 | Rate: 0.93 | Evals: 95.0%
    -Iter 15 | Best: 0.000005 | Rate: 0.93 | Evals: 100.0%
    +Initial best: f(x) = 2.901470
    +Iter 1 | Best: 1.599343 | Rate: 1.00 | Evals: 30.0%
    +Iter 2 | Best: 1.599343 | Curr: 2.054661 | Rate: 0.50 | Evals: 35.0%
    +Iter 3 | Best: 0.349058 | Rate: 0.67 | Evals: 40.0%
    +Iter 4 | Best: 0.178050 | Rate: 0.75 | Evals: 45.0%
    +Iter 5 | Best: 0.089616 | Rate: 0.80 | Evals: 50.0%
    +Iter 6 | Best: 0.000354 | Rate: 0.83 | Evals: 55.0%
    +Iter 7 | Best: 0.000047 | Rate: 0.86 | Evals: 60.0%
    +Iter 8 | Best: 0.000002 | Rate: 0.88 | Evals: 65.0%
    +Iter 9 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.78 | Evals: 70.0%
    +Iter 10 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.70 | Evals: 75.0%
    +Iter 11 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.64 | Evals: 80.0%
    +Iter 12 | Best: 0.000002 | Curr: 0.000002 | Rate: 0.58 | Evals: 85.0%
    +Iter 13 | Best: 0.000001 | Rate: 0.62 | Evals: 90.0%
    +Iter 14 | Best: 0.000001 | Rate: 0.64 | Evals: 95.0%
    +Iter 15 | Best: 0.000001 | Rate: 0.67 | Evals: 100.0%
     
    -Best point found: [ 0.00110384 -0.00190381]
    -Best value: 0.000005
    +Best point found: [0.00066472 0.00053723]
    +Best value: 0.000001
     Total evaluations: 20
     Sequential iterations: 15
     Success: True
     Message: Optimization terminated: maximum evaluations (20) reached
    -         Current function value: 0.000005
    +         Current function value: 0.000001
              Iterations: 15
              Function evaluations: 20

    3.2.1 Changing the Optimizer

    -
    +
    # Create optimizer
     opt = SpotOptim(
         fun=sphere,
    @@ -854,30 +854,30 @@ 

    print(f"Message: {result.message}")

    TensorBoard logging disabled
    -Initial best: f(x) = 1.847955
    -Iter 1 | Best: 1.847955 | Curr: 2.775960 | Rate: 0.00 | Evals: 30.0%
    -Iter 2 | Best: 1.847955 | Curr: 3.161398 | Rate: 0.00 | Evals: 35.0%
    -Iter 3 | Best: 0.720748 | Rate: 0.33 | Evals: 40.0%
    -Iter 4 | Best: 0.300748 | Rate: 0.50 | Evals: 45.0%
    -Iter 5 | Best: 0.198922 | Rate: 0.60 | Evals: 50.0%
    -Iter 6 | Best: 0.066854 | Rate: 0.67 | Evals: 55.0%
    -Iter 7 | Best: 0.004573 | Rate: 0.71 | Evals: 60.0%
    -Iter 8 | Best: 0.004573 | Curr: 0.019856 | Rate: 0.62 | Evals: 65.0%
    -Iter 9 | Best: 0.004573 | Curr: 0.013083 | Rate: 0.56 | Evals: 70.0%
    -Iter 10 | Best: 0.004573 | Curr: 0.007611 | Rate: 0.50 | Evals: 75.0%
    -Iter 11 | Best: 0.004573 | Curr: 0.007171 | Rate: 0.45 | Evals: 80.0%
    -Iter 12 | Best: 0.004573 | Curr: 0.005684 | Rate: 0.42 | Evals: 85.0%
    -Iter 13 | Best: 0.004573 | Curr: 0.005130 | Rate: 0.38 | Evals: 90.0%
    -Iter 14 | Best: 0.004573 | Curr: 0.004954 | Rate: 0.36 | Evals: 95.0%
    -Iter 15 | Best: 0.004573 | Curr: 0.005546 | Rate: 0.33 | Evals: 100.0%
    +Initial best: f(x) = 3.988087
    +Iter 1 | Best: 3.988087 | Curr: 6.117423 | Rate: 0.00 | Evals: 30.0%
    +Iter 2 | Best: 3.988087 | Curr: 6.932585 | Rate: 0.00 | Evals: 35.0%
    +Iter 3 | Best: 2.237966 | Rate: 0.33 | Evals: 40.0%
    +Iter 4 | Best: 0.801531 | Rate: 0.50 | Evals: 45.0%
    +Iter 5 | Best: 0.090012 | Rate: 0.60 | Evals: 50.0%
    +Iter 6 | Best: 0.090012 | Curr: 0.138100 | Rate: 0.50 | Evals: 55.0%
    +Iter 7 | Best: 0.063386 | Rate: 0.57 | Evals: 60.0%
    +Iter 8 | Best: 0.037775 | Rate: 0.62 | Evals: 65.0%
    +Iter 9 | Best: 0.028391 | Rate: 0.67 | Evals: 70.0%
    +Iter 10 | Best: 0.026155 | Rate: 0.70 | Evals: 75.0%
    +Iter 11 | Best: 0.023670 | Rate: 0.73 | Evals: 80.0%
    +Iter 12 | Best: 0.023670 | Curr: 0.024301 | Rate: 0.67 | Evals: 85.0%
    +Iter 13 | Best: 0.023222 | Rate: 0.69 | Evals: 90.0%
    +Iter 14 | Best: 0.022894 | Rate: 0.71 | Evals: 95.0%
    +Iter 15 | Best: 0.022894 | Curr: 0.023553 | Rate: 0.67 | Evals: 100.0%
     
    -Best point found: [-0.06745712 -0.0046995 ]
    -Best value: 0.004573
    +Best point found: [0.15108317 0.00822385]
    +Best value: 0.022894
     Total evaluations: 20
     Sequential iterations: 15
     Success: True
     Message: Optimization terminated: maximum evaluations (20) reached
    -         Current function value: 0.004573
    +         Current function value: 0.022894
              Iterations: 15
              Function evaluations: 20
    @@ -893,7 +893,7 @@

    Used by: optimize() method at the start
    Calls: generate_initial_design() if X0 is None

    This method handles three scenarios: 1. Generate LHS design when X0=None 2. Include starting point x0 if provided 3. Transform user-provided X0

    -
    +
    # Example 1: Generate default LHS design
     opt = SpotOptim(
         fun=sphere,
    @@ -938,7 +938,7 @@ 

    Purpose: Remove duplicates and ensure sufficient unique points
    Used by: optimize() after get_initial_design()
    Handles: Duplicate removal, point generation, repetition for noisy functions

    -
    +
    # Example 1: Remove duplicates
     opt = SpotOptim(
         fun=sphere,
    @@ -977,7 +977,7 @@ 

    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1009,7 +1009,7 @@ 

    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1043,7 +1043,7 @@ 

    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1078,7 +1078,7 @@ 

    Purpose: Fit surrogate model to data
    Used by: optimize() in main loop
    Calls: fit_selection_dispatcher() if max_surrogate_points exceeded

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1106,7 +1106,7 @@ 

    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1141,7 +1141,7 @@ 

    < Used by: suggest_next_infill_point() for optimization
    Calls: _predict_with_uncertainty()
    Supports: Expected Improvement (EI), Probability of Improvement (PI), Mean prediction

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1176,7 +1176,7 @@ 

    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1211,7 +1211,7 @@ 

    Purpose: Apply OCBA for noisy functions to determine which points to re-evaluate
    Used by: optimize() in main loop when noise=True and ocba_delta > 0
    Returns: Points to re-evaluate or None

    -
    +
    # Example with OCBA enabled
     opt = SpotOptim(
         fun=sphere,
    @@ -1257,7 +1257,7 @@ 

    Purpose: Replace NaN and infinite values with penalty plus random noise
    Used by: _handle_NA_new_points() and indirectly by optimize()
    Algorithm: penalty = max(finite_y) + 3 * std(finite_y) + noise

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5)],
    @@ -1289,7 +1289,7 @@ 

    Purpose: Remove rows where y contains NaN or inf values
    Used by: optimize() after function evaluations
    Returns: Cleaned arrays

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5)],
    @@ -1320,7 +1320,7 @@ 

    < Used by: optimize() after evaluating new points
    Calls: apply_penalty_NA() and remove_nan()
    Returns: None, None if all evaluations invalid (skip iteration)

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1367,7 +1367,7 @@ 

    Purpose: Update best solution found during main optimization loop
    Used by: optimize() after each iteration
    Updates: self.best_x_ and self.best_y_ if improvement found

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1415,7 +1415,7 @@ 

    Purpose: Determine termination reason for optimization
    Used by: optimize() at the end
    Checks: Max iterations, time limit, or successful completion

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1463,7 +1463,7 @@ 

    3.9.1 8.1 select_new()

    Purpose: Select rows from A that are not in X (avoid duplicate evaluations)
    Used by: suggest_next_infill_point() to ensure new points are different from evaluated points

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5)],
    @@ -1501,7 +1501,7 @@ 

    3.9.2 8.2 repair_non_numeric()

    Purpose: Round non-numeric values (int, factor) to integers
    Used by: Various methods to ensure integer/factor variables have valid values

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1532,7 +1532,7 @@ 

    <

    Purpose: Map internal integer values to original factor strings
    Used by: optimize() when preparing results for user
    Handles: Factor (categorical) variables

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), ('red', 'green', 'blue')],
    @@ -1564,7 +1564,7 @@ 

    <

    3.10 9. Integration Examples

    3.10.1 9.1 Optimization with Custom Initial Design

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1603,7 +1603,7 @@ 

    3.10.2 9.2 Optimization with Starting Point

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1643,7 +1643,7 @@ 

    3.10.3 9.3 Optimization with Integer Variables

    -
    +
    opt = SpotOptim(
         fun=sphere,
         bounds=[(-5, 5), (-5, 5)],
    @@ -1775,7 +1775,7 @@ 

    3.10.5 9.5 Optimization with NaN Handling

    -
    +
    # Function that sometimes returns NaN
     def sometimes_nan(X):
         X = np.atleast_2d(X)
    diff --git a/docs/spotoptim_explained.html b/docs/spotoptim_explained.html
    index 0505b330..f3b69fe8 100644
    --- a/docs/spotoptim_explained.html
    +++ b/docs/spotoptim_explained.html
    @@ -1389,7 +1389,7 @@ 

    34.5 Using the Surrogate Model

    SpotOptim allows to use the surrogate model to predict the objective function values for new points. The default surrogate model is sklearn’s Gaussian Process Regressor. Here we compare the default surrogate model with a simple linear model, which is provided as MLP_Surrogate in the spotoptim.surrogate module.

    -
    +
    import numpy as np
     import pandas as pd
     from spotoptim.surrogate import Kriging, MLPSurrogate
    @@ -1457,7 +1457,7 @@ 

    34.6 Comparing the SpotOptim With the Default Surrogate Model and MLP Surrogate

    Here we run SpotOptim using the default Kriging surrogate and the MLPSurrogate on standard test functions to compare their optimization performance.

    -
    +
    from spotoptim import SpotOptim
     from spotoptim.surrogate import MLPSurrogate
     from spotoptim.function.so import rosenbrock, ackley, robot_arm_hard
    @@ -1562,7 +1562,7 @@ 

    34.7.1 Comparison: Default (GP) vs Multi-Surrogate (GP + MLP) on Robot Arm

    The following example demonstrates how to set up a hybrid optimization strategy combining: 1. GaussianProcessRegressor (default): Good for valid uncertainty estimation and exploitation. 2. MLPSurrogate: Good for global trend modeling and scalability.

    We compare the performance on the 10D Robot Arm problem.

    -
    +
    from spotoptim import SpotOptim
     from spotoptim.surrogate import MLPSurrogate
     from spotoptim.function.so import robot_arm_hard
    diff --git a/docs/spotoptim_intro.html b/docs/spotoptim_intro.html
    index e0db5b88..cf8946ce 100644
    --- a/docs/spotoptim_intro.html
    +++ b/docs/spotoptim_intro.html
    @@ -726,7 +726,7 @@ 

    2 Examples

    Let us consider the problem of minimizing the Rosenbrock function. This function (and its respective derivatives) is implemented in rosen (and rosen_der, rosen_hess) in the scipy.optimize module.

    The function is usually evaluated on the hypercube \(x_i \in [-5, 10]\), for all \(i = 1, \ldots, d\), although it may be restricted to the hypercube \(x_i \in [-2.048, 2.048]\), for all \(i = 1, \ldots, d\), see https://www.sfu.ca/~ssurjano/rosen.html.

    A simple application of the Nelder-Mead method is:

    -
    +
    from scipy.optimize import minimize, rosen, rosen_der
     x0 = [1.3, 0.7, 0.8, 1.9, 1.2]
     res = minimize(rosen, x0, method='Nelder-Mead', tol=1e-6, 
    @@ -737,7 +737,7 @@ 

    2 Examples

    Now using the BFGS algorithm, using the first derivative and a few options:

    -
    +
    res = minimize(rosen, x0, method='BFGS', jac=rosen_der,
                    options={'gtol': 1e-6, 'disp': True})
     res.x
    @@ -752,7 +752,7 @@ 

    2 Examples

    Now, let’s see how to solve the same problem using SpotOptim, which uses a Surrogate-Model Based Optimization (SMBO) approach. Unlike minimize, SpotOptim requires bounds as it samples the search space globally.

    -
    +
    import numpy as np
     from spotoptim import SpotOptim
     from scipy.optimize import rosen
    @@ -783,7 +783,7 @@ 

    2 Examples

    Function evaluations: 50
    -
    +
    print(f"Best solution found: {res.x}")
     print(f"Best objective value: {res.fun}")
     print(f"Total evaluations: {res.nfev}")
    @@ -798,7 +798,7 @@

    2 Examples

    2.1 Complex Constrained Optimization: The Robot Arm

    For a more challenging example, let’s consider the robot_arm_hard problem. This is a 10-dimensional problem where a simulated robot arm must reach a target while avoiding obstacles in a maze-like configuration. It features “hard” constraints implemented as severe penalties, creating a rugged landscape that is difficult for local optimizers.

    First, let’s try solving it with Scipy’s default local optimizer. Note that we need to wrap the function to return a scalar float, as robot_arm_hard returns an array.

    -
    +
    import numpy as np
     from scipy.optimize import minimize
     from spotoptim.function.so import robot_arm_hard
    @@ -827,7 +827,7 @@ 

    +
    from spotoptim import SpotOptim
     
     # Define bounds: 10 angles normalized to [0, 1]
    diff --git a/docs/spotoptim_parallel.html b/docs/spotoptim_parallel.html
    index 55c0a2b7..e4f17cc2 100644
    --- a/docs/spotoptim_parallel.html
    +++ b/docs/spotoptim_parallel.html
    @@ -866,12 +866,12 @@ 

    +Speedup: 1.65x

    diff --git a/docs/spotoptim_parallel_files/figure-html/benchmark_script-output-2.png b/docs/spotoptim_parallel_files/figure-html/benchmark_script-output-2.png index cc58b59b..951250a5 100644 Binary files a/docs/spotoptim_parallel_files/figure-html/benchmark_script-output-2.png and b/docs/spotoptim_parallel_files/figure-html/benchmark_script-output-2.png differ diff --git a/docs/spotoptim_parallel_files/figure-html/benchmark_script-output-4.png b/docs/spotoptim_parallel_files/figure-html/benchmark_script-output-4.png deleted file mode 100644 index 38176405..00000000 Binary files a/docs/spotoptim_parallel_files/figure-html/benchmark_script-output-4.png and /dev/null differ diff --git a/docs/spotoptim_parallel_files/figure-pdf/benchmark_script-output-2.pdf b/docs/spotoptim_parallel_files/figure-pdf/benchmark_script-output-2.pdf index b9beb92e..6cad19eb 100644 Binary files a/docs/spotoptim_parallel_files/figure-pdf/benchmark_script-output-2.pdf and b/docs/spotoptim_parallel_files/figure-pdf/benchmark_script-output-2.pdf differ diff --git a/docs/spotoptim_restart.html b/docs/spotoptim_restart.html index 749aecf5..1eec0a63 100644 --- a/docs/spotoptim_restart.html +++ b/docs/spotoptim_restart.html @@ -728,7 +728,7 @@

    8.3 Example: Triggering Restarts

    In this example, we set restart_after_n to a very small value (5) to intentionally force restarts and demonstrate the mechanism. We use a multimodal function where getting stuck is possible.

    -
    +
    import numpy as np
     from spotoptim import SpotOptim
     
    @@ -805,7 +805,7 @@ 

    8.3.1 Analyzing Restart Results

    You can access the results of each individual restart run via the restarts_results_ attribute.

    -
    +
    print(f"Total global evaluations: {result.nfev}")
     print(f"Number of restarts performed: {len(optimizer.restarts_results_) - 1}")
     print(f"Best value found globally: {result.fun:.6f}")
    diff --git a/docs/success_rate.html b/docs/success_rate.html
    index 34ee9db3..d67a28a7 100644
    --- a/docs/success_rate.html
    +++ b/docs/success_rate.html
    @@ -802,18 +802,18 @@ 

    print(f"Total evaluations: {optimizer.counter}")

    TensorBoard logging disabled
    -Initial best: f(x) = 3.194459
    -Iter 1 | Best: 2.930449 | Rate: 1.00 | Evals: 55.0%
    -Iter 2 | Best: 2.114603 | Rate: 1.00 | Evals: 60.0%
    -Iter 3 | Best: 0.261093 | Rate: 1.00 | Evals: 65.0%
    -Iter 4 | Best: 0.114542 | Rate: 1.00 | Evals: 70.0%
    -Iter 5 | Best: 0.021557 | Rate: 1.00 | Evals: 75.0%
    -Iter 6 | Best: 0.000849 | Rate: 1.00 | Evals: 80.0%
    -Iter 7 | Best: 0.000589 | Rate: 1.00 | Evals: 85.0%
    -Iter 8 | Best: 0.000034 | Rate: 1.00 | Evals: 90.0%
    -Iter 9 | Best: 0.000000 | Rate: 1.00 | Evals: 95.0%
    -Iter 10 | Best: 0.000000 | Rate: 1.00 | Evals: 100.0%
    -Final success rate: 100.00%
    +Initial best: f(x) = 12.204669
    +Iter 1 | Best: 12.204669 | Curr: 12.210079 | Rate: 0.00 | Evals: 55.0%
    +Iter 2 | Best: 11.418586 | Rate: 0.50 | Evals: 60.0%
    +Iter 3 | Best: 6.409408 | Rate: 0.67 | Evals: 65.0%
    +Iter 4 | Best: 2.374566 | Rate: 0.75 | Evals: 70.0%
    +Iter 5 | Best: 0.672796 | Rate: 0.80 | Evals: 75.0%
    +Iter 6 | Best: 0.352679 | Rate: 0.83 | Evals: 80.0%
    +Iter 7 | Best: 0.002272 | Rate: 0.86 | Evals: 85.0%
    +Iter 8 | Best: 0.001173 | Rate: 0.88 | Evals: 90.0%
    +Iter 9 | Best: 0.000027 | Rate: 0.89 | Evals: 95.0%
    +Iter 10 | Best: 0.000027 | Curr: 0.000039 | Rate: 0.80 | Evals: 100.0%
    +Final success rate: 80.00%
     Total evaluations: 20
    diff --git a/docs/surrogate_selection.html b/docs/surrogate_selection.html index 2505ff48..5d59aae6 100644 --- a/docs/surrogate_selection.html +++ b/docs/surrogate_selection.html @@ -815,7 +815,7 @@

  • XGBoost (if available)
  • -
    +
    import os
     os.environ["OMP_NUM_THREADS"] = "1"  # Prevent OpenMP conflict on macOS
     
    @@ -893,7 +893,7 @@ 

    +
    # Plot most important hyperparameters
     optimizer_default.plot_important_hyperparameter_contour(max_imp=3)
     plt.suptitle('Default GP Matern nu=2.5: Most Important Parameters', y=1.02)
    @@ -1025,7 +1025,7 @@ 

    +
    optimizer_rbf.plot_important_hyperparameter_contour(max_imp=3)
     plt.suptitle('GP RBF Kernel: Most Important Parameters', y=1.02)
     plt.show()
    @@ -1140,7 +1140,7 @@

    +
    optimizer_matern15.plot_important_hyperparameter_contour(max_imp=3)
     plt.suptitle('GP Matern nu=1.5 Kernel: Most Important Parameters', y=1.02)
     plt.show()
    @@ -1256,7 +1256,7 @@

    +
    optimizer_rq.plot_important_hyperparameter_contour(max_imp=3)
     plt.suptitle('GP Rational Quadratic Kernel: Most Important Parameters', y=1.02)
     plt.show()
    @@ -1351,7 +1351,7 @@

    @@ -1367,7 +1367,7 @@

    +
    optimizer_kriging.plot_important_hyperparameter_contour(max_imp=3)
     plt.suptitle('SpotOptim Kriging: Most Important Parameters', y=1.02)
     plt.show()
    @@ -1479,7 +1479,7 @@

    +
    optimizer_rf.plot_important_hyperparameter_contour(max_imp=3)
     plt.suptitle('Random Forest: Most Important Parameters', y=1.02)
     plt.show()
    @@ -1592,7 +1592,7 @@

    @@ -1714,7 +1714,7 @@

    -
    +
    optimizer_svr.plot_important_hyperparameter_contour(max_imp=3)
     plt.suptitle('Support Vector Regression: Most Important Parameters', y=1.02)
     plt.show()
    @@ -1824,7 +1824,7 @@

    +
    optimizer_gb.plot_important_hyperparameter_contour(max_imp=3)
     plt.suptitle('Gradient Boosting: Most Important Parameters', y=1.02)
     plt.show()
    @@ -1897,15 +1897,15 @@

    diff --git a/docs/surrogate_selection_files/figure-html/surrogate-selection-comparison-viz-output-1.png b/docs/surrogate_selection_files/figure-html/surrogate-selection-comparison-viz-output-1.png index 902c46ef..90171f37 100644 Binary files a/docs/surrogate_selection_files/figure-html/surrogate-selection-comparison-viz-output-1.png and b/docs/surrogate_selection_files/figure-html/surrogate-selection-comparison-viz-output-1.png differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-10-output-2.pdf b/docs/surrogate_selection_files/figure-pdf/cell-10-output-2.pdf index 15737892..a67ea304 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-10-output-2.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-10-output-2.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-10-output-4.pdf b/docs/surrogate_selection_files/figure-pdf/cell-10-output-4.pdf index ed7371d8..1d697e3c 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-10-output-4.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-10-output-4.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-10-output-6.pdf b/docs/surrogate_selection_files/figure-pdf/cell-10-output-6.pdf index 93456e6e..f592ce0d 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-10-output-6.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-10-output-6.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-13-output-2.pdf b/docs/surrogate_selection_files/figure-pdf/cell-13-output-2.pdf index 05d1a7b0..fc50203e 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-13-output-2.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-13-output-2.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-13-output-4.pdf b/docs/surrogate_selection_files/figure-pdf/cell-13-output-4.pdf index 1d3c5800..4ec4881b 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-13-output-4.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-13-output-4.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-13-output-6.pdf b/docs/surrogate_selection_files/figure-pdf/cell-13-output-6.pdf index 9ceb07c7..d5a60dce 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-13-output-6.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-13-output-6.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-16-output-2.pdf b/docs/surrogate_selection_files/figure-pdf/cell-16-output-2.pdf index 1136bb0e..d3710d35 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-16-output-2.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-16-output-2.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-16-output-4.pdf b/docs/surrogate_selection_files/figure-pdf/cell-16-output-4.pdf index 4ad3f033..a5b17eb1 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-16-output-4.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-16-output-4.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-16-output-6.pdf b/docs/surrogate_selection_files/figure-pdf/cell-16-output-6.pdf index 7eb51a40..22d51c47 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-16-output-6.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-16-output-6.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-19-output-2.pdf b/docs/surrogate_selection_files/figure-pdf/cell-19-output-2.pdf index dd279852..22af68c3 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-19-output-2.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-19-output-2.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-19-output-4.pdf b/docs/surrogate_selection_files/figure-pdf/cell-19-output-4.pdf index 2561ff19..3ff65d9b 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-19-output-4.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-19-output-4.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-19-output-6.pdf b/docs/surrogate_selection_files/figure-pdf/cell-19-output-6.pdf index c32f82ac..76372aaf 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-19-output-6.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-19-output-6.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-22-output-2.pdf b/docs/surrogate_selection_files/figure-pdf/cell-22-output-2.pdf index 4a6a38b4..3663e7be 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-22-output-2.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-22-output-2.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-22-output-4.pdf b/docs/surrogate_selection_files/figure-pdf/cell-22-output-4.pdf index 11ff7b02..346f3381 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-22-output-4.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-22-output-4.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-22-output-6.pdf b/docs/surrogate_selection_files/figure-pdf/cell-22-output-6.pdf index 9f49c598..4529985b 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-22-output-6.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-22-output-6.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-26-output-2.pdf b/docs/surrogate_selection_files/figure-pdf/cell-26-output-2.pdf index 2e7b94ad..3cfc2aad 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-26-output-2.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-26-output-2.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-26-output-4.pdf b/docs/surrogate_selection_files/figure-pdf/cell-26-output-4.pdf index 8be741fd..ddd6c0c9 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-26-output-4.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-26-output-4.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-26-output-6.pdf b/docs/surrogate_selection_files/figure-pdf/cell-26-output-6.pdf index f9647705..999ea473 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-26-output-6.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-26-output-6.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-29-output-2.pdf b/docs/surrogate_selection_files/figure-pdf/cell-29-output-2.pdf index 4cdcf325..204c0506 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-29-output-2.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-29-output-2.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-29-output-4.pdf b/docs/surrogate_selection_files/figure-pdf/cell-29-output-4.pdf index 283bffbd..7c8a69cc 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-29-output-4.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-29-output-4.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-29-output-6.pdf b/docs/surrogate_selection_files/figure-pdf/cell-29-output-6.pdf index 6943853b..854f47cb 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-29-output-6.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-29-output-6.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-7-output-2.pdf b/docs/surrogate_selection_files/figure-pdf/cell-7-output-2.pdf index 913e596c..d4477391 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-7-output-2.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-7-output-2.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-7-output-4.pdf b/docs/surrogate_selection_files/figure-pdf/cell-7-output-4.pdf index 876b6748..481dd92b 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-7-output-4.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-7-output-4.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/cell-7-output-6.pdf b/docs/surrogate_selection_files/figure-pdf/cell-7-output-6.pdf index de8549ae..cf593be9 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/cell-7-output-6.pdf and b/docs/surrogate_selection_files/figure-pdf/cell-7-output-6.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-comparison-viz-output-1.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-comparison-viz-output-1.pdf index 192b7c7e..2b0e707c 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-comparison-viz-output-1.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-comparison-viz-output-1.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-convergence-output-1.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-convergence-output-1.pdf index 16316b6c..017028e5 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-convergence-output-1.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-convergence-output-1.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-default-viz-output-1.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-default-viz-output-1.pdf index 50c9bbf5..6c447700 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-default-viz-output-1.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-default-viz-output-1.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-gb-viz-output-1.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-gb-viz-output-1.pdf index ffa9c193..82d3e4d4 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-gb-viz-output-1.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-gb-viz-output-1.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-kriging-viz-output-1.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-kriging-viz-output-1.pdf index 3157c77e..b6c34ec7 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-kriging-viz-output-1.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-kriging-viz-output-1.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-matern15-viz-output-1.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-matern15-viz-output-1.pdf index 1a30a30f..0cc44fe8 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-matern15-viz-output-1.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-matern15-viz-output-1.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-rbf-viz-output-1.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-rbf-viz-output-1.pdf index 588ab30a..b18d8411 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-rbf-viz-output-1.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-rbf-viz-output-1.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-rf-viz-output-1.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-rf-viz-output-1.pdf index 8e898bc8..987287a1 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-rf-viz-output-1.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-rf-viz-output-1.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-rq-viz-output-1.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-rq-viz-output-1.pdf index cbd2bda3..f7ff07c5 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-rq-viz-output-1.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-rq-viz-output-1.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-svr-viz-output-1.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-svr-viz-output-1.pdf index 67a59336..d00887ab 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-svr-viz-output-1.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-svr-viz-output-1.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-2.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-2.pdf index 8dc2652a..32ae33ef 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-2.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-2.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-3.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-3.pdf index 068a89f2..f0c0e6ff 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-3.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-3.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-5.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-5.pdf index 8ae480cc..85fd4251 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-5.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-5.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-7.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-7.pdf index 54514ae5..ba834901 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-7.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-7.pdf differ diff --git a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-9.pdf b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-9.pdf index 8595a276..c6e11e7f 100644 Binary files a/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-9.pdf and b/docs/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-9.pdf differ diff --git a/docs/tensorboard.html b/docs/tensorboard.html index 537dd2f9..8dcceaf9 100644 --- a/docs/tensorboard.html +++ b/docs/tensorboard.html @@ -743,9 +743,9 @@

    print(f"Best value: {result.fun:.6f}") print(f"Logs saved to: runs/{optimizer.tensorboard_path}")

    -
    Removed old TensorBoard logs: runs/spotoptim_20260404_093122
    +
    Removed old TensorBoard logs: runs/spotoptim_20260411_212846
     Cleaned 1 old TensorBoard log directory
    -TensorBoard logging enabled: runs/spotoptim_20260404_094042
    +TensorBoard logging enabled: runs/spotoptim_20260411_213759
     Initial best: f(x) = 3.925721
     Iter 1 | Best: 0.020506 | Rate: 1.00 | Evals: 55.0%
     Iter 2 | Best: 0.000751 | Rate: 1.00 | Evals: 60.0%
    @@ -757,9 +757,9 @@ 

    result = optimizer.optimize() print(f"Best value: {result.fun:.6f}")

    -
    Removed old TensorBoard logs: runs/spotoptim_20260404_094042
    +
    Removed old TensorBoard logs: runs/spotoptim_20260411_213759
     Cleaned 1 old TensorBoard log directory
    -TensorBoard logging enabled: runs/spotoptim_20260404_094050
    +TensorBoard logging enabled: runs/spotoptim_20260411_213807
     Initial best: f(x) = 3.925721
     Iter 1 | Best: 0.020506 | Rate: 1.00 | Evals: 55.0%
     Iter 2 | Best: 0.000751 | Rate: 1.00 | Evals: 60.0%
    @@ -879,7 +879,7 @@ 

    @@ -1041,9 +1041,9 @@

    print(f"Best value: {result.fun:.6f}") print(f"Logs saved to: runs/{optimizer.tensorboard_path}")

    -
    Removed old TensorBoard logs: runs/spotoptim_20260404_094059
    +
    Removed old TensorBoard logs: runs/spotoptim_20260411_213816
     Cleaned 1 old TensorBoard log directory
    -TensorBoard logging enabled: runs/spotoptim_20260404_094059
    +TensorBoard logging enabled: runs/spotoptim_20260411_213816
     Initial best: f(x) = 3.925721
     Iter 1 | Best: 0.020506 | Rate: 1.00 | Evals: 55.0%
     Iter 2 | Best: 0.000751 | Rate: 1.00 | Evals: 60.0%
    @@ -1055,9 +1055,9 @@ 

    +Logs saved to: runs/runs/spotoptim_20260411_213816

    diff --git a/docs/torch_objective.html b/docs/torch_objective.html index 48c6d343..a82ba6e0 100644 --- a/docs/torch_objective.html +++ b/docs/torch_objective.html @@ -734,7 +734,7 @@

    25.3.1 1. Define the Model

    You can use any standard PyTorch model. For this example, we’ll use the built-in MLP class from spotoptim.nn.mlp, but you could define your own nn.Module.

    -
    +
    import torch
     import torch.nn as nn
     from spotoptim.nn.mlp import MLP
    @@ -743,7 +743,7 @@

    25.3.2 2. Prepare the Data

    Load your data (numpy arrays or tensors) and wrap it in SpotDataFromArray.

    -
    +
    from sklearn.datasets import fetch_california_housing
     from sklearn.model_selection import train_test_split
     from sklearn.preprocessing import StandardScaler
    @@ -773,7 +773,7 @@ 

    25.3.3 3. Define Hyperparameters

    Use ParameterSet to define the search space. You can tune floats, integers, and categorical factors.

    -
    +
    from spotoptim.hyperparameters import ParameterSet
     
     params = ParameterSet()
    @@ -846,7 +846,7 @@ 

    25.3.4 4. Create Experiment Control

    Bundles the configuration together.

    -
    +
    import torch
     from spotoptim.core.experiment import ExperimentControl
     
    @@ -866,7 +866,7 @@ 

    25.3.5 5. Initialize TorchObjective

    This wraps the experiment into a callable function that SpotOptim can use.

    -
    +
    from spotoptim.function.torch_objective import TorchObjective
     
     objective = TorchObjective(experiment)
    @@ -875,7 +875,7 @@

    25.3.6 6. Run Optimization

    Now use SpotOptim to find the best hyperparameters.

    -
    +
    from spotoptim import SpotOptim
     import numpy as np
     
    @@ -939,7 +939,7 @@ 

    <
  • Optimization method: You can even tune the optimizer itself (e.g., “Adam” vs “SGD”) if you add it as a factor hyperparameter and your model class supports a get_optimizer method (like MLP does).
  • Example adding optimizer tuning:

    -
    +
    # Add optimizer choice
     params.add_factor("optimizer", ["Adam", "SGD", "RMSprop"], default="Adam")
    diff --git a/docs/transformations.html b/docs/transformations.html index ffa42724..a971e644 100644 --- a/docs/transformations.html +++ b/docs/transformations.html @@ -791,7 +791,7 @@

    11.3 Quick Start

    11.3.1 Basic Log-Scale Transformation

    -
    +
    from spotoptim import SpotOptim
     import numpy as np
     
    @@ -953,7 +953,7 @@ 

    11.5 Detailed Examples

    11.5.1 Example 1: Neural Network Hyperparameter Tuning

    -
    +
    import torch
     import torch.nn as nn
     from spotoptim import SpotOptim
    @@ -1090,7 +1090,7 @@ 

    11.5.2 Example 2: Physics-Informed Neural Networks (PINNs)

    -
    +
    from spotoptim import SpotOptim
     import numpy as np
     
    @@ -1144,7 +1144,7 @@ 

    11.5.3 Example 3: Mixing Transformations

    -
    +
    from spotoptim import SpotOptim
     import numpy as np
     
    @@ -1193,7 +1193,7 @@ 

    11.6.1 Design Table (Before Optimization)

    -
    +
    from spotoptim import SpotOptim
     import numpy as np
     
    @@ -1231,7 +1231,7 @@ 

    11.6.2 Results Table (After Optimization)

    -
    +
    result = optimizer.optimize()
     
     # Display results with transformations
    @@ -1239,10 +1239,10 @@ 

    |    name |   type |   default |   lower |   upper |   tuned |   transform |
     |---------|--------|-----------|---------|---------|---------|-------------|
    -|      lr |  float |    0.5005 |   0.001 |       1 |  0.0085 |       log10 |
    -|   alpha |  float |     5.005 |    0.01 |      10 |  0.1281 |         log |
    -| neurons |  float |       505 |      10 |    1000 | 31.1461 |        sqrt |
    -|    bias |  float |         0 |      -5 |       5 |  0.0908 |           - |
    +| lr | float | 0.5005 | 0.001 | 1 | 0.0041 | log10 | +| alpha | float | 5.005 | 0.01 | 10 | 0.1308 | log | +| neurons | float | 505 | 10 | 1000 | 21.3053 | sqrt | +| bias | float | 0 | -5 | 5 | -1.2417 | - |

    Output shows the “trans” column with transformation types, helping you understand which parameters were optimized in which scale.

    diff --git a/experiments/batch_001/exp_001_exp.pkl b/experiments/batch_001/exp_001_exp.pkl index d87858c0..fa433544 100644 Binary files a/experiments/batch_001/exp_001_exp.pkl and b/experiments/batch_001/exp_001_exp.pkl differ diff --git a/experiments_compare_de_tricands_bfgs/exp_bfgs_exp.pkl b/experiments_compare_de_tricands_bfgs/exp_bfgs_exp.pkl index c64b9ef6..4fc8fbbc 100644 Binary files a/experiments_compare_de_tricands_bfgs/exp_bfgs_exp.pkl and b/experiments_compare_de_tricands_bfgs/exp_bfgs_exp.pkl differ diff --git a/experiments_compare_de_tricands_bfgs/exp_bfgs_res.pkl b/experiments_compare_de_tricands_bfgs/exp_bfgs_res.pkl index 1f0a1103..a6fa89f2 100644 Binary files a/experiments_compare_de_tricands_bfgs/exp_bfgs_res.pkl and b/experiments_compare_de_tricands_bfgs/exp_bfgs_res.pkl differ diff --git a/experiments_compare_de_tricands_bfgs/exp_de_exp.pkl b/experiments_compare_de_tricands_bfgs/exp_de_exp.pkl index 73107484..e2693fa7 100644 Binary files a/experiments_compare_de_tricands_bfgs/exp_de_exp.pkl and b/experiments_compare_de_tricands_bfgs/exp_de_exp.pkl differ diff --git a/experiments_compare_de_tricands_bfgs/exp_de_res.pkl b/experiments_compare_de_tricands_bfgs/exp_de_res.pkl index 1018d272..cdb2aab9 100644 Binary files a/experiments_compare_de_tricands_bfgs/exp_de_res.pkl and b/experiments_compare_de_tricands_bfgs/exp_de_res.pkl differ diff --git a/experiments_compare_de_tricands_bfgs/exp_tricands_exp.pkl b/experiments_compare_de_tricands_bfgs/exp_tricands_exp.pkl index 0f4f1463..adc64ac2 100644 Binary files a/experiments_compare_de_tricands_bfgs/exp_tricands_exp.pkl and b/experiments_compare_de_tricands_bfgs/exp_tricands_exp.pkl differ diff --git a/experiments_compare_de_tricands_bfgs/exp_tricands_res.pkl b/experiments_compare_de_tricands_bfgs/exp_tricands_res.pkl index 8360d70a..5608d013 100644 Binary files a/experiments_compare_de_tricands_bfgs/exp_tricands_res.pkl and b/experiments_compare_de_tricands_bfgs/exp_tricands_res.pkl differ diff --git a/kriging_forrester_files/figure-pdf/cell-4-output-1.pdf b/kriging_forrester_files/figure-pdf/cell-4-output-1.pdf index 2f8e8b3b..91a12e75 100644 Binary files a/kriging_forrester_files/figure-pdf/cell-4-output-1.pdf and b/kriging_forrester_files/figure-pdf/cell-4-output-1.pdf differ diff --git a/kriging_surrogate_files/figure-pdf/cell-11-output-1.pdf b/kriging_surrogate_files/figure-pdf/cell-11-output-1.pdf index 4cc7033c..1978fe4f 100644 Binary files a/kriging_surrogate_files/figure-pdf/cell-11-output-1.pdf and b/kriging_surrogate_files/figure-pdf/cell-11-output-1.pdf differ diff --git a/kriging_surrogate_files/figure-pdf/cell-4-output-1.pdf b/kriging_surrogate_files/figure-pdf/cell-4-output-1.pdf index e759fae3..a73c5c70 100644 Binary files a/kriging_surrogate_files/figure-pdf/cell-4-output-1.pdf and b/kriging_surrogate_files/figure-pdf/cell-4-output-1.pdf differ diff --git a/mixed_vars_res.pkl b/mixed_vars_res.pkl index 4e3d48e5..188c4192 100644 Binary files a/mixed_vars_res.pkl and b/mixed_vars_res.pkl differ diff --git a/multiobjective_files/figure-pdf/fig-dtlz2-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-dtlz2-plot-output-1.pdf index 68918a79..2e32c043 100644 Binary files a/multiobjective_files/figure-pdf/fig-dtlz2-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-dtlz2-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-fonseca-fleming-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-fonseca-fleming-plot-output-1.pdf index b4866f11..78fa63cc 100644 Binary files a/multiobjective_files/figure-pdf/fig-fonseca-fleming-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-fonseca-fleming-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-1.pdf index 92464a0c..8b53f157 100644 Binary files a/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-2.pdf b/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-2.pdf index 87105f34..de8e43cd 100644 Binary files a/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-2.pdf and b/multiobjective_files/figure-pdf/fig-mo-desirability-function-plot-output-2.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-mo-pareto-front-orig-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-mo-pareto-front-orig-plot-output-1.pdf index af18444a..1fe338d6 100644 Binary files a/multiobjective_files/figure-pdf/fig-mo-pareto-front-orig-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-mo-pareto-front-orig-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-mo-pareto-optx-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-mo-pareto-optx-plot-output-1.pdf index 08319553..56d8af3a 100644 Binary files a/multiobjective_files/figure-pdf/fig-mo-pareto-optx-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-mo-pareto-optx-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-mo-xy-contour-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-mo-xy-contour-plot-output-1.pdf index 86c7bee1..46c91bda 100644 Binary files a/multiobjective_files/figure-pdf/fig-mo-xy-contour-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-mo-xy-contour-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-mo-xy-surface-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-mo-xy-surface-plot-output-1.pdf index 0e288b15..72a82e07 100644 Binary files a/multiobjective_files/figure-pdf/fig-mo-xy-surface-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-mo-xy-surface-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-schaffer-n1-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-schaffer-n1-plot-output-1.pdf index 5970d881..53e4fd22 100644 Binary files a/multiobjective_files/figure-pdf/fig-schaffer-n1-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-schaffer-n1-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-zdt1-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-zdt1-plot-output-1.pdf index 08ce9833..e3900e7a 100644 Binary files a/multiobjective_files/figure-pdf/fig-zdt1-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-zdt1-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-zdt1-scalarization-diff-output-1.pdf b/multiobjective_files/figure-pdf/fig-zdt1-scalarization-diff-output-1.pdf index cac475ee..3d00ad99 100644 Binary files a/multiobjective_files/figure-pdf/fig-zdt1-scalarization-diff-output-1.pdf and b/multiobjective_files/figure-pdf/fig-zdt1-scalarization-diff-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-zdt2-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-zdt2-plot-output-1.pdf index 6fc50af9..63945bb4 100644 Binary files a/multiobjective_files/figure-pdf/fig-zdt2-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-zdt2-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-zdt3-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-zdt3-plot-output-1.pdf index 42980f9f..1820c474 100644 Binary files a/multiobjective_files/figure-pdf/fig-zdt3-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-zdt3-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-zdt4-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-zdt4-plot-output-1.pdf index 70df6e6a..20729fcd 100644 Binary files a/multiobjective_files/figure-pdf/fig-zdt4-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-zdt4-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/fig-zdt6-plot-output-1.pdf b/multiobjective_files/figure-pdf/fig-zdt6-plot-output-1.pdf index 58b11ea2..17b396d4 100644 Binary files a/multiobjective_files/figure-pdf/fig-zdt6-plot-output-1.pdf and b/multiobjective_files/figure-pdf/fig-zdt6-plot-output-1.pdf differ diff --git a/multiobjective_files/figure-pdf/mo-mm-desirability-function-plot-output-1.pdf b/multiobjective_files/figure-pdf/mo-mm-desirability-function-plot-output-1.pdf index 65e1195a..0277e8d1 100644 Binary files a/multiobjective_files/figure-pdf/mo-mm-desirability-function-plot-output-1.pdf and b/multiobjective_files/figure-pdf/mo-mm-desirability-function-plot-output-1.pdf differ diff --git a/my_result_res.pkl b/my_result_res.pkl index 0c2dc649..bd5ee75c 100644 Binary files a/my_result_res.pkl and b/my_result_res.pkl differ diff --git a/noisy_opt_res.pkl b/noisy_opt_res.pkl index ea49b9a9..7103ec3c 100644 Binary files a/noisy_opt_res.pkl and b/noisy_opt_res.pkl differ diff --git a/pinns_1_files/figure-pdf/cell-10-output-1.pdf b/pinns_1_files/figure-pdf/cell-10-output-1.pdf index 214eb0cd..a7ae6832 100644 Binary files a/pinns_1_files/figure-pdf/cell-10-output-1.pdf and b/pinns_1_files/figure-pdf/cell-10-output-1.pdf differ diff --git a/pinns_1_files/figure-pdf/cell-10-output-2.pdf b/pinns_1_files/figure-pdf/cell-10-output-2.pdf index 1c6017d7..47a7528d 100644 Binary files a/pinns_1_files/figure-pdf/cell-10-output-2.pdf and b/pinns_1_files/figure-pdf/cell-10-output-2.pdf differ diff --git a/pinns_1_files/figure-pdf/cell-10-output-3.pdf b/pinns_1_files/figure-pdf/cell-10-output-3.pdf index 28607d7b..2b643fa8 100644 Binary files a/pinns_1_files/figure-pdf/cell-10-output-3.pdf and b/pinns_1_files/figure-pdf/cell-10-output-3.pdf differ diff --git a/pinns_1_files/figure-pdf/cell-10-output-4.pdf b/pinns_1_files/figure-pdf/cell-10-output-4.pdf index da2a113b..10383d12 100644 Binary files a/pinns_1_files/figure-pdf/cell-10-output-4.pdf and b/pinns_1_files/figure-pdf/cell-10-output-4.pdf differ diff --git a/pinns_1_files/figure-pdf/cell-11-output-1.pdf b/pinns_1_files/figure-pdf/cell-11-output-1.pdf index ffbd83d0..b6d67f8b 100644 Binary files a/pinns_1_files/figure-pdf/cell-11-output-1.pdf and b/pinns_1_files/figure-pdf/cell-11-output-1.pdf differ diff --git a/pinns_1_files/figure-pdf/cell-12-output-1.pdf b/pinns_1_files/figure-pdf/cell-12-output-1.pdf index 13707962..402fa16c 100644 Binary files a/pinns_1_files/figure-pdf/cell-12-output-1.pdf and b/pinns_1_files/figure-pdf/cell-12-output-1.pdf differ diff --git a/pinns_1_files/figure-pdf/cell-6-output-1.pdf b/pinns_1_files/figure-pdf/cell-6-output-1.pdf index 8c6504bc..5e7b6bb0 100644 Binary files a/pinns_1_files/figure-pdf/cell-6-output-1.pdf and b/pinns_1_files/figure-pdf/cell-6-output-1.pdf differ diff --git a/pinns_1_files/figure-pdf/cell-9-output-1.pdf b/pinns_1_files/figure-pdf/cell-9-output-1.pdf index 5df0a819..fa3b34aa 100644 Binary files a/pinns_1_files/figure-pdf/cell-9-output-1.pdf and b/pinns_1_files/figure-pdf/cell-9-output-1.pdf differ diff --git a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-baseline-comparison-pinn2-output-2.pdf b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-baseline-comparison-pinn2-output-2.pdf index 70929842..b3881c16 100644 Binary files a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-baseline-comparison-pinn2-output-2.pdf and b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-baseline-comparison-pinn2-output-2.pdf differ diff --git a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-training-pinn2-output-2.pdf b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-training-pinn2-output-2.pdf index 8d1827dd..e1843923 100644 Binary files a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-training-pinn2-output-2.pdf and b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-training-pinn2-output-2.pdf differ diff --git a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-visualization-pinn2-output-1.pdf b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-visualization-pinn2-output-1.pdf index f947544f..0bfd2539 100644 Binary files a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-visualization-pinn2-output-1.pdf and b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-final-model-visualization-pinn2-output-1.pdf differ diff --git a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-optimization-history-pinn2-output-1.pdf b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-optimization-history-pinn2-output-1.pdf index 877dfcd8..994d693f 100644 Binary files a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-optimization-history-pinn2-output-1.pdf and b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-optimization-history-pinn2-output-1.pdf differ diff --git a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-parameter-scatter-pinn2-output-1.pdf b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-parameter-scatter-pinn2-output-1.pdf index 7da472e7..125fa169 100644 Binary files a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-parameter-scatter-pinn2-output-1.pdf and b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-parameter-scatter-pinn2-output-1.pdf differ diff --git a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-sensitivity-analysis-pinn2-output-1.pdf b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-sensitivity-analysis-pinn2-output-1.pdf index 8e3cb98a..cf7eaf8c 100644 Binary files a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-sensitivity-analysis-pinn2-output-1.pdf and b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-sensitivity-analysis-pinn2-output-1.pdf differ diff --git a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-2.pdf b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-2.pdf index 3f095736..bc2404b8 100644 Binary files a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-2.pdf and b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-2.pdf differ diff --git a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-4.pdf b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-4.pdf index bee5fbd3..f53adadd 100644 Binary files a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-4.pdf and b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-4.pdf differ diff --git a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-6.pdf b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-6.pdf index 4431b5cf..b596ae29 100644 Binary files a/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-6.pdf and b/pinns_2_hyperparameter_tuning_files/figure-pdf/pinn-surrogate-visualization-pinn2-output-6.pdf differ diff --git a/plot_surrogate_files/figure-pdf/basic-plot-surrogate-example-output-1.pdf b/plot_surrogate_files/figure-pdf/basic-plot-surrogate-example-output-1.pdf index 47fc2171..794e37d7 100644 Binary files a/plot_surrogate_files/figure-pdf/basic-plot-surrogate-example-output-1.pdf and b/plot_surrogate_files/figure-pdf/basic-plot-surrogate-example-output-1.pdf differ diff --git a/plot_surrogate_files/figure-pdf/custom-plot-surrogate-example-output-1.pdf b/plot_surrogate_files/figure-pdf/custom-plot-surrogate-example-output-1.pdf index 43f8a61b..a92b886d 100644 Binary files a/plot_surrogate_files/figure-pdf/custom-plot-surrogate-example-output-1.pdf and b/plot_surrogate_files/figure-pdf/custom-plot-surrogate-example-output-1.pdf differ diff --git a/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-1.pdf b/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-1.pdf index f4b9862d..7b192c68 100644 Binary files a/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-1.pdf and b/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-1.pdf differ diff --git a/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-2.pdf b/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-2.pdf index d3f78bef..1d6bd39d 100644 Binary files a/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-2.pdf and b/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-2.pdf differ diff --git a/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-3.pdf b/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-3.pdf index e6c96a5f..cbe9fa7c 100644 Binary files a/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-3.pdf and b/plot_surrogate_files/figure-pdf/plot-surrogate-3d-example-all-output-3.pdf differ diff --git a/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-1.pdf b/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-1.pdf index 3ca24986..bdcb6a92 100644 Binary files a/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-1.pdf and b/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-1.pdf differ diff --git a/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-2.pdf b/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-2.pdf index f8e0c7b2..af774d7d 100644 Binary files a/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-2.pdf and b/plot_surrogate_files/figure-pdf/plot-surrogate-4d-example-output-2.pdf differ diff --git a/plot_surrogate_files/figure-pdf/plot-surrogate-kriging-example-output-1.pdf b/plot_surrogate_files/figure-pdf/plot-surrogate-kriging-example-output-1.pdf index 10b30c0d..0e9bf3bd 100644 Binary files a/plot_surrogate_files/figure-pdf/plot-surrogate-kriging-example-output-1.pdf and b/plot_surrogate_files/figure-pdf/plot-surrogate-kriging-example-output-1.pdf differ diff --git a/plot_surrogate_files/figure-pdf/plot-surrogate-rosenbrock-example-output-1.pdf b/plot_surrogate_files/figure-pdf/plot-surrogate-rosenbrock-example-output-1.pdf index 26ffeea6..1ba6d643 100644 Binary files a/plot_surrogate_files/figure-pdf/plot-surrogate-rosenbrock-example-output-1.pdf and b/plot_surrogate_files/figure-pdf/plot-surrogate-rosenbrock-example-output-1.pdf differ diff --git a/remote_job_001_exp.pkl b/remote_job_001_exp.pkl index fbf54216..198fdd6b 100644 Binary files a/remote_job_001_exp.pkl and b/remote_job_001_exp.pkl differ diff --git a/remote_job_001_res.pkl b/remote_job_001_res.pkl index f68ff22b..26cebb0a 100644 Binary files a/remote_job_001_res.pkl and b/remote_job_001_res.pkl differ diff --git a/save_load_files/figure-pdf/analyze-results-locally-complete-output-2.pdf b/save_load_files/figure-pdf/analyze-results-locally-complete-output-2.pdf index c179c148..76fb74cb 100644 Binary files a/save_load_files/figure-pdf/analyze-results-locally-complete-output-2.pdf and b/save_load_files/figure-pdf/analyze-results-locally-complete-output-2.pdf differ diff --git a/save_load_files/figure-pdf/fig-plot-progress-1-output-1.pdf b/save_load_files/figure-pdf/fig-plot-progress-1-output-1.pdf index 87d71ade..e459aec1 100644 Binary files a/save_load_files/figure-pdf/fig-plot-progress-1-output-1.pdf and b/save_load_files/figure-pdf/fig-plot-progress-1-output-1.pdf differ diff --git a/sphere_opt_res.pkl b/sphere_opt_res.pkl index bad76c5b..bb2f6990 100644 Binary files a/sphere_opt_res.pkl and b/sphere_opt_res.pkl differ diff --git a/spot_step_by_step_files/figure-pdf/cell-17-output-1.pdf b/spot_step_by_step_files/figure-pdf/cell-17-output-1.pdf index 0db496d5..9019fa85 100644 Binary files a/spot_step_by_step_files/figure-pdf/cell-17-output-1.pdf and b/spot_step_by_step_files/figure-pdf/cell-17-output-1.pdf differ diff --git a/spot_step_by_step_files/figure-pdf/cell-19-output-1.pdf b/spot_step_by_step_files/figure-pdf/cell-19-output-1.pdf index efae6b7d..9a8c3db7 100644 Binary files a/spot_step_by_step_files/figure-pdf/cell-19-output-1.pdf and b/spot_step_by_step_files/figure-pdf/cell-19-output-1.pdf differ diff --git a/spot_step_by_step_files/figure-pdf/cell-9-output-1.pdf b/spot_step_by_step_files/figure-pdf/cell-9-output-1.pdf index 91b8edaa..9880aa1b 100644 Binary files a/spot_step_by_step_files/figure-pdf/cell-9-output-1.pdf and b/spot_step_by_step_files/figure-pdf/cell-9-output-1.pdf differ diff --git a/spotoptim_parallel_files/figure-pdf/benchmark_script-output-2.pdf b/spotoptim_parallel_files/figure-pdf/benchmark_script-output-2.pdf index b9beb92e..6cad19eb 100644 Binary files a/spotoptim_parallel_files/figure-pdf/benchmark_script-output-2.pdf and b/spotoptim_parallel_files/figure-pdf/benchmark_script-output-2.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-10-output-2.pdf b/surrogate_selection_files/figure-pdf/cell-10-output-2.pdf index 15737892..a67ea304 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-10-output-2.pdf and b/surrogate_selection_files/figure-pdf/cell-10-output-2.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-10-output-4.pdf b/surrogate_selection_files/figure-pdf/cell-10-output-4.pdf index ed7371d8..1d697e3c 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-10-output-4.pdf and b/surrogate_selection_files/figure-pdf/cell-10-output-4.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-10-output-6.pdf b/surrogate_selection_files/figure-pdf/cell-10-output-6.pdf index 93456e6e..f592ce0d 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-10-output-6.pdf and b/surrogate_selection_files/figure-pdf/cell-10-output-6.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-13-output-2.pdf b/surrogate_selection_files/figure-pdf/cell-13-output-2.pdf index 05d1a7b0..fc50203e 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-13-output-2.pdf and b/surrogate_selection_files/figure-pdf/cell-13-output-2.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-13-output-4.pdf b/surrogate_selection_files/figure-pdf/cell-13-output-4.pdf index 1d3c5800..4ec4881b 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-13-output-4.pdf and b/surrogate_selection_files/figure-pdf/cell-13-output-4.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-13-output-6.pdf b/surrogate_selection_files/figure-pdf/cell-13-output-6.pdf index 9ceb07c7..d5a60dce 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-13-output-6.pdf and b/surrogate_selection_files/figure-pdf/cell-13-output-6.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-16-output-2.pdf b/surrogate_selection_files/figure-pdf/cell-16-output-2.pdf index 1136bb0e..d3710d35 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-16-output-2.pdf and b/surrogate_selection_files/figure-pdf/cell-16-output-2.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-16-output-4.pdf b/surrogate_selection_files/figure-pdf/cell-16-output-4.pdf index 4ad3f033..a5b17eb1 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-16-output-4.pdf and b/surrogate_selection_files/figure-pdf/cell-16-output-4.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-16-output-6.pdf b/surrogate_selection_files/figure-pdf/cell-16-output-6.pdf index 7eb51a40..22d51c47 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-16-output-6.pdf and b/surrogate_selection_files/figure-pdf/cell-16-output-6.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-19-output-2.pdf b/surrogate_selection_files/figure-pdf/cell-19-output-2.pdf index dd279852..22af68c3 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-19-output-2.pdf and b/surrogate_selection_files/figure-pdf/cell-19-output-2.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-19-output-4.pdf b/surrogate_selection_files/figure-pdf/cell-19-output-4.pdf index 2561ff19..3ff65d9b 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-19-output-4.pdf and b/surrogate_selection_files/figure-pdf/cell-19-output-4.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-19-output-6.pdf b/surrogate_selection_files/figure-pdf/cell-19-output-6.pdf index c32f82ac..76372aaf 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-19-output-6.pdf and b/surrogate_selection_files/figure-pdf/cell-19-output-6.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-22-output-2.pdf b/surrogate_selection_files/figure-pdf/cell-22-output-2.pdf index 4a6a38b4..3663e7be 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-22-output-2.pdf and b/surrogate_selection_files/figure-pdf/cell-22-output-2.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-22-output-4.pdf b/surrogate_selection_files/figure-pdf/cell-22-output-4.pdf index 11ff7b02..346f3381 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-22-output-4.pdf and b/surrogate_selection_files/figure-pdf/cell-22-output-4.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-22-output-6.pdf b/surrogate_selection_files/figure-pdf/cell-22-output-6.pdf index 9f49c598..4529985b 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-22-output-6.pdf and b/surrogate_selection_files/figure-pdf/cell-22-output-6.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-26-output-2.pdf b/surrogate_selection_files/figure-pdf/cell-26-output-2.pdf index 2e7b94ad..3cfc2aad 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-26-output-2.pdf and b/surrogate_selection_files/figure-pdf/cell-26-output-2.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-26-output-4.pdf b/surrogate_selection_files/figure-pdf/cell-26-output-4.pdf index 8be741fd..ddd6c0c9 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-26-output-4.pdf and b/surrogate_selection_files/figure-pdf/cell-26-output-4.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-26-output-6.pdf b/surrogate_selection_files/figure-pdf/cell-26-output-6.pdf index f9647705..999ea473 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-26-output-6.pdf and b/surrogate_selection_files/figure-pdf/cell-26-output-6.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-29-output-2.pdf b/surrogate_selection_files/figure-pdf/cell-29-output-2.pdf index 4cdcf325..204c0506 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-29-output-2.pdf and b/surrogate_selection_files/figure-pdf/cell-29-output-2.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-29-output-4.pdf b/surrogate_selection_files/figure-pdf/cell-29-output-4.pdf index 283bffbd..7c8a69cc 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-29-output-4.pdf and b/surrogate_selection_files/figure-pdf/cell-29-output-4.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-29-output-6.pdf b/surrogate_selection_files/figure-pdf/cell-29-output-6.pdf index 6943853b..854f47cb 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-29-output-6.pdf and b/surrogate_selection_files/figure-pdf/cell-29-output-6.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-7-output-2.pdf b/surrogate_selection_files/figure-pdf/cell-7-output-2.pdf index 913e596c..d4477391 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-7-output-2.pdf and b/surrogate_selection_files/figure-pdf/cell-7-output-2.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-7-output-4.pdf b/surrogate_selection_files/figure-pdf/cell-7-output-4.pdf index 876b6748..481dd92b 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-7-output-4.pdf and b/surrogate_selection_files/figure-pdf/cell-7-output-4.pdf differ diff --git a/surrogate_selection_files/figure-pdf/cell-7-output-6.pdf b/surrogate_selection_files/figure-pdf/cell-7-output-6.pdf index de8549ae..cf593be9 100644 Binary files a/surrogate_selection_files/figure-pdf/cell-7-output-6.pdf and b/surrogate_selection_files/figure-pdf/cell-7-output-6.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-comparison-viz-output-1.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-comparison-viz-output-1.pdf index 192b7c7e..2b0e707c 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-comparison-viz-output-1.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-comparison-viz-output-1.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-convergence-output-1.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-convergence-output-1.pdf index 16316b6c..017028e5 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-convergence-output-1.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-convergence-output-1.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-default-viz-output-1.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-default-viz-output-1.pdf index 50c9bbf5..6c447700 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-default-viz-output-1.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-default-viz-output-1.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-gb-viz-output-1.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-gb-viz-output-1.pdf index ffa9c193..82d3e4d4 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-gb-viz-output-1.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-gb-viz-output-1.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-kriging-viz-output-1.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-kriging-viz-output-1.pdf index 3157c77e..b6c34ec7 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-kriging-viz-output-1.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-kriging-viz-output-1.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-matern15-viz-output-1.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-matern15-viz-output-1.pdf index 1a30a30f..0cc44fe8 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-matern15-viz-output-1.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-matern15-viz-output-1.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-rbf-viz-output-1.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-rbf-viz-output-1.pdf index 588ab30a..b18d8411 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-rbf-viz-output-1.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-rbf-viz-output-1.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-rf-viz-output-1.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-rf-viz-output-1.pdf index 8e898bc8..987287a1 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-rf-viz-output-1.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-rf-viz-output-1.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-rq-viz-output-1.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-rq-viz-output-1.pdf index cbd2bda3..f7ff07c5 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-rq-viz-output-1.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-rq-viz-output-1.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-svr-viz-output-1.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-svr-viz-output-1.pdf index 67a59336..d00887ab 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-svr-viz-output-1.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-svr-viz-output-1.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-2.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-2.pdf index 8dc2652a..32ae33ef 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-2.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-2.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-3.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-3.pdf index 068a89f2..f0c0e6ff 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-3.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-3.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-5.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-5.pdf index 8ae480cc..85fd4251 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-5.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-5.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-7.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-7.pdf index 54514ae5..ba834901 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-7.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-7.pdf differ diff --git a/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-9.pdf b/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-9.pdf index 8595a276..c6e11e7f 100644 Binary files a/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-9.pdf and b/surrogate_selection_files/figure-pdf/surrogate-selection-xgboost-output-9.pdf differ diff --git a/uv.lock b/uv.lock index 325dece7..df3acee4 100644 --- a/uv.lock +++ b/uv.lock @@ -2212,7 +2212,7 @@ wheels = [ [[package]] name = "spotoptim" -version = "0.10.3" +version = "0.11.2" source = { editable = "../spotoptim" } dependencies = [ { name = "black" }, @@ -2233,6 +2233,7 @@ dependencies = [ { name = "tensorboard" }, { name = "torch", version = "2.11.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, { name = "torch", version = "2.11.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, + { name = "ty" }, { name = "xgboost" }, ] @@ -2268,6 +2269,7 @@ requires-dist = [ { name = "tabulate", specifier = ">=0.9.0" }, { name = "tensorboard", specifier = ">=2.20.0" }, { name = "torch", specifier = ">=2.9.1", index = "https://download.pytorch.org/whl/cpu" }, + { name = "ty", specifier = ">=0.0.29" }, { name = "xgboost", specifier = ">=3.1.1" }, ] provides-extras = ["dev", "docs"] @@ -2502,6 +2504,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, ] +[[package]] +name = "ty" +version = "0.0.29" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/d5/853561de49fae38c519e905b2d8da9c531219608f1fccc47a0fc2c896980/ty-0.0.29.tar.gz", hash = "sha256:e7936cca2f691eeda631876c92809688dbbab68687c3473f526cd83b6a9228d8", size = 5469221, upload-time = "2026-04-05T15:01:21.328Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/b7/911f9962115acfa24e3b2ec9d4992dd994c38e8769e1b1d7680bb4d28a51/ty-0.0.29-py3-none-linux_armv6l.whl", hash = "sha256:b8a40955f7660d3eaceb0d964affc81b790c0765e7052921a5f861ff8a471c30", size = 10568206, upload-time = "2026-04-05T15:01:19.165Z" }, + { url = "https://files.pythonhosted.org/packages/fe/c3/fcae2167d4c77a97269f92f11d1b43b03617f81de1283d5d05b43432110c/ty-0.0.29-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6b6849adae15b00bbe2d3c5b078967dcb62eba37d38936b8eeb4c81a82d2e3b8", size = 10442530, upload-time = "2026-04-05T15:01:28.471Z" }, + { url = "https://files.pythonhosted.org/packages/97/33/5a6bfa240cfcb9c36046ae2459fa9ea23238d20130d8656ff5ac4d6c012a/ty-0.0.29-py3-none-macosx_11_0_arm64.whl", hash = "sha256:dcdd9b17209788152f7b7ea815eda07989152325052fe690013537cc7904ce49", size = 9915735, upload-time = "2026-04-05T15:01:10.365Z" }, + { url = "https://files.pythonhosted.org/packages/b3/1e/318f45fae232118e81a6306c30f50de42c509c412128d5bd231eab699ffb/ty-0.0.29-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d8ed4789bae78ffaf94462c0d25589a734cab0366b86f2bbcb1bb90e1a7a169", size = 10419748, upload-time = "2026-04-05T15:01:32.375Z" }, + { url = "https://files.pythonhosted.org/packages/a9/a8/5687872e2ab5a0f7dd4fd8456eac31e9381ad4dc74961f6f29965ad4dd91/ty-0.0.29-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:91ec374b8565e0ad0900011c24641ebbef2da51adbd4fb69ff3280c8a7eceb02", size = 10394738, upload-time = "2026-04-05T15:01:06.473Z" }, + { url = "https://files.pythonhosted.org/packages/de/68/015d118097eeb95e6a44c4abce4c0a28b7b9dfb3085b7f0ee48e4f099633/ty-0.0.29-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:298a8d5faa2502d3810bbbb47a030b9455495b9921594206043c785dd61548cf", size = 10910613, upload-time = "2026-04-05T15:01:17.17Z" }, + { url = "https://files.pythonhosted.org/packages/1c/01/47ce3c6c53e0670eadbe80756b167bf80ed6681d1ba57cfde2e8065a13d1/ty-0.0.29-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c8fba1a3524c6109d1e020d92301c79d41bf442fa8d335b9fa366239339cb70", size = 11475750, upload-time = "2026-04-05T15:01:30.461Z" }, + { url = "https://files.pythonhosted.org/packages/c4/cf/e361845b1081c9264ad5b7c963231bab03f2666865a9f2a115c4233f2137/ty-0.0.29-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c48adf88a70d264128c39ee922ed14a947817fced1e93c08c1a89c9244edcde", size = 11190055, upload-time = "2026-04-05T15:01:12.369Z" }, + { url = "https://files.pythonhosted.org/packages/79/12/0fb0857e9a62cb11586e9a712103877bbf717f5fb570d16634408cfdefee/ty-0.0.29-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ce0a7a0e96bc7b42518cd3a1a6a6298ef64ff40ca4614355c1aa807059b5c6f", size = 11020539, upload-time = "2026-04-05T15:01:37.022Z" }, + { url = "https://files.pythonhosted.org/packages/20/36/5a26753802083f80cd125db6c4348ad42b3c982ec36e718e0bf4c18f75e5/ty-0.0.29-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a6ac86a05b4a3731d45365ab97780acc7b8146fa62fccb3cbe94fe6546c67a97", size = 10396399, upload-time = "2026-04-05T15:01:26.167Z" }, + { url = "https://files.pythonhosted.org/packages/00/e6/b4e75b5752239ab3ab400f19faef4dbef81d05aab5d3419fda0c062a3765/ty-0.0.29-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:6bbbf53141af0f3150bf288d716263f1a3550054e4b3551ca866d38192ba9891", size = 10421461, upload-time = "2026-04-05T15:01:08.367Z" }, + { url = "https://files.pythonhosted.org/packages/c0/21/1084b5b609f9abed62070ec0b31c283a403832a6310c8bbc208bd45ee1e6/ty-0.0.29-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1c9e06b770c1d0ff5efc51e34312390db31d53fcf3088163f413030b42b74f84", size = 10599187, upload-time = "2026-04-05T15:01:23.52Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a1/ce19a2ca717bbcc1ee11378aba52ef70b6ce5b87245162a729d9fdc2360f/ty-0.0.29-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0307fe37e3f000ef1a4ae230bbaf511508a78d24a5e51b40902a21b09d5e6037", size = 11121198, upload-time = "2026-04-05T15:01:15.22Z" }, + { url = "https://files.pythonhosted.org/packages/6b/6b/f1430b279af704321566ce7ec2725d3d8258c2f815ebd93e474c64cd4543/ty-0.0.29-py3-none-win32.whl", hash = "sha256:7a2a898217960a825f8bc0087e1fdbaf379606175e98f9807187221d53a4a8ed", size = 9995331, upload-time = "2026-04-05T15:01:01.32Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ef/3ef01c17785ff9a69378465c7d0faccd48a07b163554db0995e5d65a5a23/ty-0.0.29-py3-none-win_amd64.whl", hash = "sha256:fc1294200226b91615acbf34e0a9ad81caf98c081e9c6a912a31b0a7b603bc3f", size = 11023644, upload-time = "2026-04-05T15:01:04.432Z" }, + { url = "https://files.pythonhosted.org/packages/2c/55/87280a994d6a2d2647c65e12abbc997ed49835794366153c04c4d9304d76/ty-0.0.29-py3-none-win_arm64.whl", hash = "sha256:f9794bbd1bb3ce13f78c191d0c89ae4c63f52c12b6daa0c6fe220b90d019d12c", size = 10428165, upload-time = "2026-04-05T15:01:34.665Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0"