Skip to content
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Imports:
checkmate,
matrixStats (>= 0.52),
parallel,
posterior (>= 1.5.0),
posterior (>= 1.7.0),
stats
Suggests:
bayesplot (>= 1.7.0),
Expand Down
73 changes: 6 additions & 67 deletions R/gpdfit.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,71 +29,10 @@
#' for the generalized Pareto distribution. *Technometrics* **51**, 316-325.
#'
gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
# See section 4 of Zhang and Stephens (2009)
if (sort_x) {
x <- sort.int(x)
}
N <- length(x)
prior <- 3
M <- min_grid_pts + floor(sqrt(N))
jj <- seq_len(M)
xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample
theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
l_theta <- N * lx(theta, x) # profile log-lik
w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
theta_hat <- sum(theta * w_theta)
k <- mean.default(log1p(-theta_hat * x))
sigma <- -k / theta_hat

if (wip) {
k <- adjust_k_wip(k, n = N)
}

if (is.na(k)) {
k <- Inf
}

nlist(k, sigma)
}


# internal ----------------------------------------------------------------

lx <- function(a,x) {
a <- -a
k <- vapply(a, FUN = function(a_i) mean(log1p(a_i * x)), FUN.VALUE = numeric(1))
log(a / k) - k - 1
}

#' Adjust k based on weakly informative prior, Gaussian centered on 0.5. This
#' will stabilize estimates for very small Monte Carlo sample sizes and low neff
#' cases.
#'
#' @noRd
#' @param k Scalar khat estimate.
#' @param n Integer number of tail samples used to fit GPD.
#' @return Scalar adjusted khat estimate.
#'
adjust_k_wip <- function(k, n) {
a <- 10
n_plus_a <- n + a
k * n / n_plus_a + a * 0.5 / n_plus_a
}


#' Inverse CDF of generalized Pareto distribution
#' (assuming location parameter is 0)
#'
#' @noRd
#' @param p Vector of probabilities.
#' @param k Scalar shape parameter.
#' @param sigma Scalar scale parameter.
#' @return Vector of quantiles.
#'
qgpd <- function(p, k, sigma) {
if (is.nan(sigma) || sigma <= 0) {
return(rep(NaN, length(p)))
}

sigma * expm1(-k * log1p(-p)) / k
posterior::gpdfit(
x = x,
wip = wip,
min_grid_pts = min_grid_pts,
sort_x = sort_x
)
}
4 changes: 2 additions & 2 deletions R/psis.R
Original file line number Diff line number Diff line change
Expand Up @@ -254,12 +254,12 @@ psis_smooth_tail <- function(x, cutoff) {
exp_cutoff <- exp(cutoff)

# save time not sorting since x already sorted
fit <- gpdfit(exp(x) - exp_cutoff, sort_x = FALSE)
fit <- posterior::gpdfit(exp(x) - exp_cutoff, sort_x = FALSE)
k <- fit$k
sigma <- fit$sigma
if (is.finite(k)) {
p <- (seq_len(len) - 0.5) / len
qq <- qgpd(p, k, sigma) + exp_cutoff
qq <- posterior::qgeneralized_pareto(p, 0, sigma, k) + exp_cutoff
tail <- log(qq)
} else {
tail <- x
Expand Down
4 changes: 2 additions & 2 deletions R/psislw.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ psislw <- function(lw, wcp = 0.2, wtrunc = 3/4,
# body and gPd smoothed tail
tail_ord <- order(x_tail)
exp_cutoff <- exp(cutoff)
fit <- gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80)
fit <- posterior::gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80)
k <- fit$k
sigma <- fit$sigma
prb <- (seq_len(tail_len) - 0.5) / tail_len
qq <- qgpd(prb, k, sigma) + exp_cutoff
qq <- posterior::qgeneralized_pareto(prb, 0, sigma, k) + exp_cutoff
smoothed_tail <- rep.int(0, tail_len)
smoothed_tail[tail_ord] <- log(qq)
x_new <- x
Expand Down
8 changes: 0 additions & 8 deletions tests/testthat/test_gpdfit.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,3 @@ test_that("gpdfit returns correct result", {
expect_snapshot_value(gpdfit_val_wip_default_grid, style = "serialize")
})

test_that("qgpd returns the correct result ", {
probs <- seq(from = 0, to = 1, by = 0.25)
q1 <- qgpd(probs, k = 1, sigma = 1)
expect_equal(q1, c(0, 1 / 3, 1, 3, Inf))

q2 <- qgpd(probs, k = 1, sigma = 0)
expect_true(all(is.nan(q2)))
})
Loading