|
1 | 1 | import copy |
2 | 2 | from math import erf |
3 | 3 | import matplotlib.pyplot as plt |
4 | | -from numpy import max, min, var |
| 4 | +from numpy import min, var |
5 | 5 | from numpy import sqrt |
6 | 6 | from numpy import exp |
7 | 7 | from numpy import array |
@@ -436,13 +436,11 @@ def optimize_model(self) -> Union[List[float], Tuple[float]]: |
436 | 436 | def update_log(self) -> None: |
437 | 437 | """ |
438 | 438 | Update the log with the current values of negLnLike, theta, p, and Lambda. |
439 | | -
|
440 | 439 | This method appends the current values of negLnLike, theta, p (if optim_p is True), |
441 | 440 | and Lambda (if noise is True) |
442 | 441 | to their respective lists in the log dictionary. |
443 | 442 | It also updates the log_length attribute with the current length |
444 | 443 | of the negLnLike list in the log. |
445 | | -
|
446 | 444 | If spot_writer is not None, this method also writes the current values of |
447 | 445 | negLnLike, theta, p (if optim_p is True), |
448 | 446 | and Lambda (if noise is True) to the spot_writer object. |
@@ -505,7 +503,8 @@ def fit(self, nat_X: np.ndarray, nat_y: np.ndarray) -> object: |
505 | 503 | Fits the hyperparameters (`theta`, `p`, `Lambda`) of the Kriging model. |
506 | 504 | The function computes the following internal values: |
507 | 505 | 1. `theta`, `p`, and `Lambda` values via optimization of the function `fun_likelihood()`. |
508 | | - 2. Correlation matrix `Psi` via `rebuildPsi()`. |
| 506 | + 2. Correlation matrix `Psi` via `buildPsi()`. |
| 507 | + 3. U matrix via `buildU()`. |
509 | 508 |
|
510 | 509 | Args: |
511 | 510 | self (object): The Kriging object. |
@@ -591,25 +590,33 @@ def initialize_variables(self, nat_X: np.ndarray, nat_y: np.ndarray) -> None: |
591 | 590 | S.nat_y: [1 2] |
592 | 591 |
|
593 | 592 | """ |
| 593 | + # Validate input dimensions |
| 594 | + if nat_X.ndim != 2 or nat_y.ndim != 1: |
| 595 | + raise ValueError("nat_X must be a 2D array and nat_y must be a 1D array.") |
| 596 | + if nat_X.shape[0] != nat_y.shape[0]: |
| 597 | + raise ValueError("The number of samples in nat_X and nat_y must be equal.") |
| 598 | + |
| 599 | + # Initialize instance variables |
594 | 600 | self.nat_X = copy.deepcopy(nat_X) |
595 | 601 | self.nat_y = copy.deepcopy(nat_y) |
596 | | - self.n = self.nat_X.shape[0] |
597 | | - self.k = self.nat_X.shape[1] |
| 602 | + self.n, self.k = self.nat_X.shape |
598 | 603 |
|
599 | | - self.min_X = min(self.nat_X, axis=0) |
600 | | - self.max_X = max(self.nat_X, axis=0) |
| 604 | + # Calculate and store min and max of X |
| 605 | + self.min_X = np.min(self.nat_X, axis=0) |
| 606 | + self.max_X = np.max(self.nat_X, axis=0) |
601 | 607 |
|
602 | | - Z = aggregate_mean_var(X=self.nat_X, y=self.nat_y) |
603 | | - # aggregated y values: |
604 | | - mu = Z[1] |
605 | | - self.aggregated_mean_y = np.copy(mu) |
| 608 | + # Calculate the aggregated mean of y |
| 609 | + _, aggregated_mean_y, _ = aggregate_mean_var(X=self.nat_X, y=self.nat_y) |
| 610 | + self.aggregated_mean_y = np.copy(aggregated_mean_y) |
| 611 | + |
| 612 | + # Logging the initialized variables |
606 | 613 | logger.debug("In initialize_variables(): self.nat_X: %s", self.nat_X) |
607 | 614 | logger.debug("In initialize_variables(): self.nat_y: %s", self.nat_y) |
608 | 615 | logger.debug("In initialize_variables(): self.aggregated_mean_y: %s", self.aggregated_mean_y) |
609 | 616 | logger.debug("In initialize_variables(): self.min_X: %s", self.min_X) |
610 | 617 | logger.debug("In initialize_variables(): self.max_X: %s", self.max_X) |
611 | | - logger.debug("In initialize_variables(): self.n: %s", self.n) |
612 | | - logger.debug("In initialize_variables(): self.k: %s", self.k) |
| 618 | + logger.debug("In initialize_variables(): self.n: %d", self.n) |
| 619 | + logger.debug("In initialize_variables(): self.k: %d", self.k) |
613 | 620 |
|
614 | 621 | def set_variable_types(self) -> None: |
615 | 622 | """ |
@@ -645,16 +652,18 @@ def set_variable_types(self) -> None: |
645 | 652 | """ |
646 | 653 | logger.debug("In set_variable_types(): self.k: %s", self.k) |
647 | 654 | logger.debug("In set_variable_types(): self.var_type: %s", self.var_type) |
648 | | - # assume all variable types are "num" if "num" is |
649 | | - # specified once: |
| 655 | + |
| 656 | + # Ensure var_type has appropriate length by defaulting to 'num' |
650 | 657 | if len(self.var_type) < self.k: |
651 | | - self.var_type = self.var_type * self.k |
| 658 | + self.var_type = ['num'] * self.k # Corrected to fill with 'num' instead of duplicating |
652 | 659 | logger.warning("In set_variable_types(): All variable types forced to 'num'.") |
653 | 660 | logger.debug("In set_variable_types(): self.var_type: %s", self.var_type) |
654 | | - self.num_mask = np.array(list(map(lambda x: x == "num", self.var_type))) |
655 | | - self.factor_mask = np.array(list(map(lambda x: x == "factor", self.var_type))) |
656 | | - self.int_mask = np.array(list(map(lambda x: x == "int", self.var_type))) |
657 | | - self.ordered_mask = np.array(list(map(lambda x: x == "int" or x == "num" or x == "float", self.var_type))) |
| 661 | + # Create masks for each type using numpy vectorized operations |
| 662 | + var_type_array = np.array(self.var_type) |
| 663 | + self.num_mask = (var_type_array == "num") |
| 664 | + self.factor_mask = (var_type_array == "factor") |
| 665 | + self.int_mask = (var_type_array == "int") |
| 666 | + self.ordered_mask = np.isin(var_type_array, ["int", "num", "float"]) |
658 | 667 | logger.debug("In set_variable_types(): self.num_mask: %s", self.num_mask) |
659 | 668 | logger.debug("In set_variable_types(): self.factor_mask: %s", self.factor_mask) |
660 | 669 | logger.debug("In set_variable_types(): self.int_mask: %s", self.int_mask) |
|
0 commit comments