R/gamVineCopSelect.R
gamVineCopSelect.Rd
This function select the copula family and estimates the parameter(s) of a
Generalized Additive model
(GAM) Vine model, where GAMs for individual edges are specified either for
the copula parameter or Kendall's tau.
It solves the maximum penalized likelihood estimation for the copula families
supported in this package by reformulating each Newton-Raphson iteration as
a generalized ridge regression, which is solved using
the mgcv
package.
gamVineCopSelect(
data,
Matrix,
lin.covs = NULL,
smooth.covs = NULL,
simplified = FALSE,
familyset = NA,
rotations = TRUE,
familycrit = "AIC",
level = 0.05,
trunclevel = NA,
tau = TRUE,
method = "FS",
tol.rel = 0.001,
n.iters = 10,
parallel = FALSE,
verbose = FALSE,
select.once = TRUE,
...
)
A matrix or data frame containing the data in [0,1]^d.
Lower triangular d x d
matrix that defines the R-vine
tree structure.
A matrix or data frame containing the parametric (i.e.,
linear) covariates (default: lin.covs = NULL
).
A matrix or data frame containing the non-parametric
(i.e., smooth) covariates (default: smooth.covs = NULL
).
If TRUE
, then a simplified vine is fitted (which is
possible only if there are exogenous covariates). If FALSE
(default),
then a non-simplified vine is fitted.
An integer vector of pair-copula families to select from
(the independence copula MUST NOT be specified in this vector unless one
wants to fit an independence vine!). The vector has to include at least one
pair-copula family that allows for positive and one that allows for negative
dependence. Not listed copula families might be included to better handle
limit cases. If familyset = NA
(default), selection among all
possible families is performed. Coding of pair-copula families:
1
Gaussian, 2
Student t,
3
Clayton, 4
Gumbel, 13
Survival Clayton,
14
Survival Gumbel, 23
Rotated (90 degrees) Clayton,
24
Rotated (90 degrees) Gumbel,
33
Rotated (270 degrees) Clayton and
34
Rotated (270 degrees) Gumbel.
If TRUE
, all rotations of the families in familyset
are included.
Character indicating the criterion for bivariate copula
selection. Possible choices: familycrit = 'AIC'
(default) or
'BIC'
, as in BiCopSelect
from the
VineCopula
package.
Numerical; Passed to gamBiCopSelect
, it is the
significance level of the test for removing individual
predictors (default: level = 0.05
) for each conditional pair-copula.
Integer; level of truncation.
TRUE
(default) for a calibration function specified for
Kendall's tau or FALSE
for a calibration function specified
for the Copula parameter.
'NR'
for Newton-Raphson
and 'FS'
for Fisher-scoring (default).
Relative tolerance for 'FS'
/'NR'
algorithm.
Maximal number of iterations for
'FS'
/'NR'
algorithm.
TRUE
(default) for parallel selection of copula
family at each edge or FALSE
for the sequential version.
for the Copula parameter.
TRUE
if informations should be printed during the
estimation and FALSE
(default) for a silent version.
from mgcv
.
if TRUE
the GAM structure is only selected once,
for the family that appears first in familyset
.
gamVineCopSelect
returns a gamVine-class
object.
require(mgcv)
set.seed(0)
## Simulation parameters
# Sample size
n <- 1e3
# Copula families
familyset <- c(1:2, 301:304, 401:404)
# Define a 4-dimensional R-vine tree structure matrix
d <- 4
Matrix <- c(2, 3, 4, 1, 0, 3, 4, 1, 0, 0, 4, 1, 0, 0, 0, 1)
Matrix <- matrix(Matrix, d, d)
nnames <- paste("X", 1:d, sep = "")
## A function factory
eta0 <- 1
calib.surf <- list(
calib.quad <- function(t, Ti = 0, Tf = 1, b = 8) {
Tm <- (Tf - Ti) / 2
a <- -(b / 3) * (Tf^2 - 3 * Tf * Tm + 3 * Tm^2)
return(a + b * (t - Tm)^2)
},
calib.sin <- function(t, Ti = 0, Tf = 1, b = 1, f = 1) {
a <- b * (1 - 2 * Tf * pi / (f * Tf * pi +
cos(2 * f * pi * (Tf - Ti))
- cos(2 * f * pi * Ti)))
return((a + b) / 2 + (b - a) * sin(2 * f * pi * (t - Ti)) / 2)
},
calib.exp <- function(t, Ti = 0, Tf = 1, b = 2, s = Tf / 8) {
Tm <- (Tf - Ti) / 2
a <- (b * s * sqrt(2 * pi) / Tf) * (pnorm(0, Tm, s) - pnorm(Tf, Tm, s))
return(a + b * exp(-(t - Tm)^2 / (2 * s^2)))
}
)
## Create the model
# Define gam-vine model list
count <- 1
model <- vector(mode = "list", length = d * (d - 1) / 2)
sel <- seq(d, d^2 - d, by = d)
# First tree
for (i in 1:(d - 1)) {
# Select a copula family
family <- sample(familyset, 1)
model[[count]]$family <- family
# Use the canonical link and a randomly generated parameter
if (is.element(family, c(1, 2))) {
model[[count]]$par <- tanh(rnorm(1) / 2)
if (family == 2) {
model[[count]]$par2 <- 2 + exp(rnorm(1))
}
} else {
if (is.element(family, c(401:404))) {
rr <- rnorm(1)
model[[count]]$par <- sign(rr) * (1 + abs(rr))
} else {
model[[count]]$par <- rnorm(1)
}
model[[count]]$par2 <- 0
}
count <- count + 1
}
# A dummy dataset
data <- data.frame(u1 = runif(1e2), u2 = runif(1e2), matrix(runif(1e2 * d), 1e2, d))
# Trees 2 to (d-1)
for (j in 2:(d - 1)) {
for (i in 1:(d - j)) {
# Select a copula family
family <- sample(familyset, 1)
# Select the conditiong set and create a model formula
cond <- nnames[sort(Matrix[(d - j + 2):d, i])]
tmpform <- paste("~", paste(paste("s(", cond, ", k=10, bs='cr')",
sep = ""
), collapse = " + "))
l <- length(cond)
temp <- sample(3, l, replace = TRUE)
# Spline approximation of the true function
m <- 1e2
x <- matrix(seq(0, 1, length.out = m), nrow = m, ncol = 1)
if (l != 1) {
tmp.fct <- paste("function(x){eta0+",
paste(sapply(1:l, function(x)
paste("calib.surf[[", temp[x], "]](x[", x, "])",
sep = ""
)), collapse = "+"), "}",
sep = ""
)
tmp.fct <- eval(parse(text = tmp.fct))
x <- eval(parse(text = paste0("expand.grid(",
paste0(rep("x", l), collapse = ","), ")",
collapse = ""
)))
y <- apply(x, 1, tmp.fct)
} else {
tmp.fct <- function(x) eta0 + calib.surf[[temp]](x)
colnames(x) <- cond
y <- tmp.fct(x)
}
# Estimate the gam model
form <- as.formula(paste0("y", tmpform))
dd <- data.frame(y, x)
names(dd) <- c("y", cond)
b <- gam(form, data = dd)
# plot(x[,1],(y-fitted(b))/y)
# Create a dummy gamBiCop object
tmp <- gamBiCopFit(data = data, formula = form, family = 1, n.iters = 1)$res
# Update the copula family and the model coefficients
attr(tmp, "model")$coefficients <- coefficients(b)
attr(tmp, "model")$smooth <- b$smooth
attr(tmp, "family") <- family
if (family == 2) {
attr(tmp, "par2") <- 2 + exp(rnorm(1))
}
model[[count]] <- tmp
count <- count + 1
}
}
# Create the gamVineCopula object
GVC <- gamVine(Matrix = Matrix, model = model, names = nnames)
print(GVC)
#> GAM-Vine matrix:
#> [,1] [,2] [,3] [,4]
#> [1,] 2 0 0 0
#> [2,] 3 3 0 0
#> [3,] 4 4 4 0
#> [4,] 1 1 1 1
#>
#> Where
#> 1 <-> X1
#> 2 <-> X2
#> 3 <-> X3
#> 4 <-> X4
#>
#> Tree 1:
#> X2,X1: Gumbel type 3 (survival and 90 degrees rotated)
#> X3,X1: Gaussian
#> X4,X1: Gumbel type 1 (standard and 90 degrees rotated)
#>
#> Tree 2:
#> X2,X4|X1 : Gumbel type 1 (standard and 90 degrees rotated) copula with tau(z) = (exp(z)-1)/(exp(z)+1) where
#> z ~ s(X1, k = 10, bs = "cr")
#> X3,X4|X1 : Gumbel type 2 (standard and 270 degrees rotated) copula with tau(z) = (exp(z)-1)/(exp(z)+1) where
#> z ~ s(X1, k = 10, bs = "cr")
#>
#> Tree 3:
#> X2,X3|X4,X1 : Gumbel type 4 (survival and 270 degrees rotated) copula with tau(z) = (exp(z)-1)/(exp(z)+1) where
#> z ~ s(X1, k = 10, bs = "cr") + s(X4, k = 10, bs = "cr")
if (FALSE) { # \dontrun{
## Simulate and fit the model
sim <- gamVineSimulate(n, GVC)
fitGVC <- gamVineSeqFit(sim, GVC, verbose = TRUE)
fitGVC2 <- gamVineCopSelect(sim, Matrix, verbose = TRUE)
## Plot the results
par(mfrow = c(3, 4))
plot(GVC, ylim = c(-2.5, 2.5))
plot(fitGVC, ylim = c(-2.5, 2.5))
plot(fitGVC2, ylim = c(-2.5, 2.5))
} # }