## ----setup, include=FALSE---------------------------------------------------------------------------------------------
knitr::opts_chunk$set(
  warning = FALSE,
  message = FALSE,
  tidy = FALSE,
  fig.width  = 8,
  fig.height = 5,
  dpi        = 150,
  fig.retina = 2,
  fig.align  = "center",
  out.width  = "100%",
  out.extra  = 'style="height:auto;"'
)

options(width = 120) # width console output


## First, install the packages, if you have not done this already:
#if (!require("restriktor")) install.packages("restriktor")

## Then, load the packages:
#library(restriktor) # for the goric function

# If you want to use restriktor from github:
#if (!require("devtools")) install.packages("devtools")
#library(devtools) 
#install_github("LeonardV/restriktor") #, force = TRUE
library(restriktor) # for goric function

# NB default iter is 1000
# als testen dan ws iter = iters gebruiken, met:
# iters = 30 

## ----warning=F, message=F, eval=FALSE---------------------------------------------------------------------------------
# # If ANOVA model:
# 
# # In practise you may want to increase the number of iter, say 2000, and use parallel computing
# # future::plan(multisession, workers = 8) # windows machines
# # future::plan(multicore, workers = 8) # unix machines
# benchmarks_means <- benchmark(goric_object, model_type = "means", iter = 400)
# benchmarks_means
# plot(benchmarks_means)
# # Use 'pop_es' to specify own null population(s).	
# #
# ## If ANOVA or other model:
# benchmarks_asymp <- benchmark(goric_object, iter = 400)
# # 'model_type = "asymp"', the default
# benchmarks_asymp
# plot(benchmarks_asymp)
# # Use 'pop_est' to specify own null population(s).
# #
# # print() options:
# # output_type = c("rgw", "gw", "rlw", "ld", "all")
# # hypo_rate_threshold = <number>
# #
# # plot() options:
# # output_type = c("rgw", "gw", "rlw", "ld")
# # percentiles = NULL
# # x_lim = c(<min>, <max>)
# # log_scale = FALSE/TRUE

## ----echo=FALSE, results = FALSE, message = FALSE, warning = FALSE----------------------------------------------------
n.coef <- 3 # Number of dummy variables (model without intercept)

mu <- rep(0, n.coef)
intercept <- 0

f <- .25 #c(0, .1, .25, .4) # According to Cohen 1992
#f2 = r2 / (1-r2) -> r2 = f2 - f2*r2 -> (1-f2)*r2 = f2
#r2 <- f^2 / (1-f^2)
# NB correspondeert niet met: 0, .02, .15, .35
# Voor nu zo laten, lijken wel goede voorbeelden

samplesize <- 3*40 # c(40, 40, 40) - geeft fout in restr, data ms dan gek? # 40 - geeft nu ineens 40 in totaal 
b.ratios <- c(3,2,1)

sample <- NULL

# Determine true / population beta coefficients in data generating process
D_ <- matrix(rep(1:n.coef), nrow = samplesize)
D <- as.factor(D_)
sample$D <- D

sample <- model.matrix(~sample$D-1)
colnames(sample) <- paste0("D_", seq_len(ncol(sample)))
sample <- as.data.frame(sample)

# Determine mean values (betas)
var.e <- 1
if (sum(abs(b.ratios)) == 0) { # all zeros (thus under H0)
  means_pop <- b.ratios
} else {
  fun <- function (d) {
        means_pop = b.ratios*d
        (1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) - f
      }
      d <- uniroot(fun, lower=0, upper=100)$root
      # Construct means_pop
      means_pop <- b.ratios*d
      # Check
      #means_pop #[1] 0.9185587 0.6123724 0.3061862
      #means_pop[1] - means_pop[2]; means_pop[2] - means_pop[3]
      # Check: ES =
      #(1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) # .25
    }

set.seed(123) # set seed: to obtain same results when you re-run it
epsilon <- rnorm(samplesize, sd=sqrt(var.e))
sample$y <- as.matrix(sample[, 1:n.coef]) %*% matrix(means_pop, nrow = n.coef) + epsilon

# Obtain fit
fit <- lm(y ~ 0 + D, data=sample)
#fit


## ----message = TRUE, warning = FALSE----------------------------------------------------------------------------------
# H1 vs complement (default) - H1 is true
H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3

# Apply GORIC #
set.seed(123) # set seed: to obtain same results when you re-run it
results_1c <- goric(fit, hypotheses = list(H1))
results_1c

## ----message = F, warning = FALSE-------------------------------------------------------------------------------------
# Benchmarks based on null
set.seed(123) # set seed: to obtain same results when you re-run it
benchmarks_1c <- benchmark(results_1c, iter = 200)
#benchmarks_1c # use in R file
print(benchmarks_1c, color = FALSE) # use in Rmd file, since Rmd cannot deal with colored text
#plot(benchmarks_1c) 
plot(benchmarks_1c, log_scale = T)

## ----include = FALSE--------------------------------------------------------------------------------------------------
hypo_rate <- print(benchmarks_1c, hypo_rate_threshold = 1)
hyporate <- hypo_rate$hypothesis_rate[[2]]

## ----message=F, warning=F, eval = F-----------------------------------------------------------------------------------
# hypo_rate_2 <- print(benchmarks_1c, hypo_rate_threshold = 2)

## ----include = F------------------------------------------------------------------------------------------------------
hypo_rate_2 <- print(benchmarks_1c, hypo_rate_threshold = 2)

## ---------------------------------------------------------------------------------------------------------------------
hypo_rate_2$hypothesis_rate[[2]]

## ----message = TRUE, warning = FALSE----------------------------------------------------------------------------------
plot(benchmarks_1c, log_scale = TRUE)  

## ----message = F, warning = FALSE-------------------------------------------------------------------------------------
# H1, H2, and unconstrained (default) - subset/overlap, that is, H1 is true
H1 <- "D1 > D2 > D3"               # mu1 > mu2 > mu3
H2 <- "D1 > D2" # H2: D1 > D2, D3  # mu1 > mu2,  mu3

# Apply GORIC #
set.seed(123) 
results_12u <- goric(fit, hypotheses = list(H1, H2))

## ----message = F, warning = FALSE-------------------------------------------------------------------------------------
results_12u

## ----message = F, warning = FALSE-------------------------------------------------------------------------------------
round(results_12u$ratio.gw, 3)

## ----message = F, warning = FALSE-------------------------------------------------------------------------------------
# Benchmarks
set.seed(123) 
benchmarks_12u <- benchmark(results_12u, model_type = "means", iter = 400)
#benchmarks_12u # R file
print(benchmarks_12u, color = FALSE) # Rmd file
#
# Plots of benchmarks
#plot(benchmarks_12u, output_type = "rgw") 
#plot(benchmarks_12u, output_type = "rgw", log_scale = TRUE)
#
#plot_out <- plot(benchmarks_12u) # save all plots in object plot_out
#plot(plot_out$grobs$`H1 vs. H2`) # call separate plot
#
## Plots of benchmarks with log10 transformation of x-axis
#plot_out_log <- plot(benchmarks_12u, output_type = "rgw", log_scale = TRUE) # save all plots in object plot_out -->
#plot(plot_out_log$grobs$`H1 vs. H2`) # call separate plot

## ----message = F, warning = FALSE, eval = FALSE-----------------------------------------------------------------------
# #plot(benchmarks_12u, output_type = "rgw", log_scale = TRUE)

## ----echo=FALSE, message=FALSE, warning=FALSE, fig.width=12, fig.height=10, dpi=200, out.width="100%"-----------------
bp <- plot(benchmarks_12u, output_type = "rgw", log_scale = TRUE)

bp$plots <- lapply(bp$plots, function(g) {
  g +
    #@ggplot2::labs(title = NULL) +
    ggplot2::theme_bw(base_size = 13) +
    ggplot2::theme(
      legend.position = "bottom",
      legend.box      = "vertical",
      legend.title    = ggplot2::element_text(size = 12),
      legend.text     = ggplot2::element_text(size = 11),
      axis.title      = ggplot2::element_text(size = 13),
      axis.text       = ggplot2::element_text(size = 11)
    )
})

bp


## ----echo = F, message = FALSE, warning = FALSE-----------------------------------------------------------------------
b.ratios <- c(2.5,2.5,1)

sample <- NULL

# Determine true / population beta coefficients in data generating process
D_ <- matrix(rep(1:n.coef), nrow = samplesize)
D <- as.factor(D_)
sample$D <- D

sample <- model.matrix(~sample$D-1)
colnames(sample) <- paste0("D_", seq_len(ncol(sample)))
sample <- as.data.frame(sample)

# Determine mean values (betas)
var.e <- 1
if (sum(abs(b.ratios)) == 0) { # all zeros (thus under H0)
  means_pop <- b.ratios
} else {
  fun <- function (d) {
        means_pop = b.ratios*d
        (1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) - f
      }
      d <- uniroot(fun, lower=0, upper=100)$root
      # Construct means_pop
      means_pop <- b.ratios*d
      # Check
      #means_pop #[1] 0.8838835 0.8838835 0.3535534
      #means_pop[1] - means_pop[2]; means_pop[2] - means_pop[3]
      # Check: ES =
      #(1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) # .25
    }

set.seed(124) # set seed: to obtain same results when you re-run it
epsilon <- rnorm(samplesize, sd=sqrt(var.e))
sample$y <- as.matrix(sample[, 1:n.coef]) %*% matrix(means_pop, nrow = n.coef) + epsilon

# Obtain fit
fit_border <- lm(y ~ 0 + D, data=sample)
#fit_border

## ----message = TRUE, warning = FALSE----------------------------------------------------------------------------------
# H1 vs complement (default) - border (nl., mu1 = mu2 > mu3) is true
H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3

# Apply GORIC #
set.seed(123) 
results_1c_border <- goric(fit_border, hypotheses = list(H1))
results_1c_border

## ----message = F, warning = FALSE-------------------------------------------------------------------------------------
#Default null (when using `model_type = "means"`)
# Loglik benchmarks based on default null / no effect sizes, that is,
# setting all three means equal in the population
set.seed(123) 
benchmarks_1c_border <- benchmark(results_1c_border, model_type = "means", iter = 400)
# loglik diff
#print(benchmarks_1c_border, output_type = "ld") # in R file
print(benchmarks_1c_border, output_type = "ld", color = FALSE) # in Rmd file
plot(benchmarks_1c_border, output_type = "ld")
# ratio loglik weights
#print(benchmarks_1c_border, output_type = "rlw") # in R file
print(benchmarks_1c_border, output_type = "rlw", color = FALSE) # in Rmd file
#plot(benchmarks_1c_border, output_type = "rlw", x_lim = c(0, 2.5))
plot(benchmarks_1c_border, output_type = "rlw", log_scale = TRUE)

## ----message = F, warning = FALSE-------------------------------------------------------------------------------------
# Specifying multiple null populations, that is,
# using all possibilities of setting inequalities to equalities.
# Here, we will use the default `model_type` (i.e., "asymp") which takes population parameter values (instead of population effect sizes) 
est <- coef(fit_border)
pop_est <- matrix(c(
                  mean(est[1:3]), mean(est[1:3]), mean(est[1:3]),
                  mean(est[1:2]), mean(est[1:2]), est[3],
                  mean(est[1:2]), est[2], mean(est[1:2]),
                  est[1], mean(est[2:3]), mean(est[2:3]),
                  
                  est[1], est[2], est[3]
                  ),
                  byrow = TRUE, ncol = length(est))
rownames(pop_est) <- c("PE_123eq", "PE_12eq", "PE_13eq", "PE_23eq", "Observed")
#
set.seed(123) 
benchmarks_1c_border_allpos <- benchmark(results_1c_border, pop_est = pop_est, iter = 200)
#
# loglik difference
#print(benchmarks_1c_border_allpos, output_type = "ld") # R file
print(benchmarks_1c_border_allpos, output_type = "ld", color = FALSE) # Rmd file
plot(benchmarks_1c_border_allpos, output_type = "ld")
#plot(benchmarks_1c_border_allpos, output_type = "ld", x_lim = c(-.25,.25))
#
# ratio of loglik weights
#print(benchmarks_1c_border_allpos, output_type = "rlw") # R file
print(benchmarks_1c_border_allpos, output_type = "rlw", color = FALSE) # Rmd file
#plot(benchmarks_1c_border_allpos, output_type = "rlw")
#plot(benchmarks_1c_border_allpos, output_type = "rlw", x_lim = c(0.950, 1.025))
plot(benchmarks_1c_border_allpos, output_type = "rlw", log_scale = TRUE)

## ----echo = F, message = FALSE, warning = FALSE-----------------------------------------------------------------------
# Now, group size is 200 (instead of 40)

samplesize_orig <- samplesize
samplesize <- 3*200

b.ratios <- c(2.5,2.5,1)

sample <- NULL

# Determine true / population beta coefficients in data generating process
D_ <- matrix(rep(1:n.coef), nrow = samplesize)
D <- as.factor(D_)
sample$D <- D

sample <- model.matrix(~sample$D-1)
colnames(sample) <- paste0("D_", seq_len(ncol(sample)))
sample <- as.data.frame(sample)

# Determine mean values (betas)
var.e <- 1
if (sum(abs(b.ratios)) == 0) { # all zeros (thus under H0)
  means_pop <- b.ratios
} else {
  fun <- function (d) {
        means_pop = b.ratios*d
        (1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) - f
      }
      d <- uniroot(fun, lower=0, upper=100)$root
      # Construct means_pop
      means_pop <- b.ratios*d
      # Check
      #means_pop #[1] 0.8838835 0.8838835 0.3535534
      #means_pop[1] - means_pop[2]; means_pop[2] - means_pop[3]
      # Check: ES =
      #(1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) # .25
    }

set.seed(123) # set seed: to obtain same results when you re-run it
epsilon <- rnorm(samplesize, sd=sqrt(var.e))
sample$y <- as.matrix(sample[, 1:n.coef]) %*% matrix(means_pop, nrow = n.coef) + epsilon

# Obtain fit
fit_border_200 <- lm(y ~ 0 + D, data=sample)
#fit_border_200

# Return to original value
samplesize <- samplesize_orig

## ----message = TRUE, warning = FALSE----------------------------------------------------------------------------------
# Now, group size is 200 (instead of 40)

# H1 vs complement (default) - border (nl., mu1 = mu2 > mu3) is true
H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3

# Apply GORIC #
set.seed(123) 
results_1c_border_200 <- goric(fit_border_200, hypotheses = list(H1))
results_1c_border_200

## ----message = F, warning = FALSE-------------------------------------------------------------------------------------
# Specifying multiple null populations, that is,
# using all possibilities of setting inequalities to equalities.
# Here, we will use the default `model_type` (i.e., "asymp") which takes population parameter values (instead of population effect sizes) 
est <- coef(fit_border_200)
pop_est <- matrix(c(
                  mean(est[1:3]), mean(est[1:3]), mean(est[1:3]),
                  mean(est[1:2]), mean(est[1:2]), est[3],
                  mean(est[1:2]), est[2], mean(est[1:2]),
                  est[1], mean(est[2:3]), mean(est[2:3]),
                  
                  est[1], est[2], est[3]
                  ),
                  byrow = TRUE, ncol = length(est))
rownames(pop_est) <- c("PE_123eq", "PE_12eq", "PE_13eq", "PE_23eq", "Observed")
#
set.seed(123) 
benchmarks_1c_border_allpos_200 <- benchmark(results_1c_border_200, pop_est = pop_est, iter = 200)
#
# loglik difference
#print(benchmarks_1c_border_allpos_200, output_type = "ld") # R file
print(benchmarks_1c_border_allpos_200, output_type = "ld", color = FALSE) # Rmd file
plot(benchmarks_1c_border_allpos_200, output_type = "ld") # x_lim = c(<min>, <max>)
#
# ratio of loglik weights
#print(benchmarks_1c_border_allpos_200, output_type = "rlw") # R file
print(benchmarks_1c_border_allpos_200, output_type = "rlw", color = FALSE) # Rmd file
#plot(benchmarks_1c_border_allpos_200, output_type = "rlw") # x_lim = c(<min>, <max>)
plot(benchmarks_1c_border_allpos_200, output_type = "rlw", log_scale = T)

## ----message = F, warning = FALSE-------------------------------------------------------------------------------------
# Specifying multiple null populations, that is,
# using all possibilities of setting inequalities to equalities.
# Here, we will use the default `model_type` (i.e., "asymp") which takes population parameter values (instead of population effect sizes) 
est <- coef(fit)
pop_est <- matrix(c(
                  mean(est[1:3]), mean(est[1:3]), mean(est[1:3]),
                  mean(est[1:2]), mean(est[1:2]), est[3],
                  mean(est[1:2]), est[2], mean(est[1:2]),
                  est[1], mean(est[2:3]), mean(est[2:3]),

                  est[1], est[2], est[3]
                  ),
                  byrow = TRUE, ncol = length(est))
rownames(pop_est) <- c("PE_123eq", "PE_12eq", "PE_13eq", "PE_23eq", "Observed")
#
set.seed(123) 
benchmarks_1c_allpos <- benchmark(results_1c, pop_est = pop_est, iter = 200)
#
# loglik difference
#print(benchmarks_1c_allpos, output_type = "ld") # R file
print(benchmarks_1c_allpos, output_type = "ld", color = FALSE) # Rmd file
plot(benchmarks_1c_border_allpos_200, output_type = "ld") # x_lim = c(<min>, <max>)
#
# ratio of loglik weights
#print(benchmarks_1c_allpos, output_type = "rlw") # R file
print(benchmarks_1c_allpos, output_type = "rlw", color = FALSE) # Rmd file
#plot(benchmarks_1c_allpos, output_type = "rlw") # x_lim = c(<min>, <max>)
plot(benchmarks_1c_allpos, output_type = "rlw", log_scale = T)

## ----echo = F, message = FALSE, warning = FALSE-----------------------------------------------------------------------
# Now, total sample size is 200 (instead of 40)

samplesize_orig <- samplesize
samplesize <- 3*200

b.ratios <- c(3,2,1)

sample <- NULL

# Determine true / population beta coefficients in data generating process
D_ <- matrix(rep(1:n.coef), nrow = samplesize)
D <- as.factor(D_)
sample$D <- D

sample <- model.matrix(~sample$D-1)
colnames(sample) <- paste0("D_", seq_len(ncol(sample)))
sample <- as.data.frame(sample)

# Determine mean values (betas)
var.e <- 1
if (sum(abs(b.ratios)) == 0) { # all zeros (thus under H0)
  means_pop <- b.ratios
} else {
  fun <- function (d) {
        means_pop = b.ratios*d
        (1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) - f
      }
      d <- uniroot(fun, lower=0, upper=100)$root
      # Construct means_pop
      means_pop <- b.ratios*d
      # Check
      #means_pop #[1] 0.9185587 0.6123724 0.3061862
      #means_pop[1] - means_pop[2]; means_pop[2] - means_pop[3]
      # Check: ES =
      #(1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) # .25
    }

set.seed(123) # set seed: to obtain same results when you re-run it
epsilon <- rnorm(samplesize, sd=sqrt(var.e))
sample$y <- as.matrix(sample[, 1:n.coef]) %*% matrix(means_pop, nrow = n.coef) + epsilon

# Obtain fit
fit_200 <- lm(y ~ 0 + D, data=sample)
#fit_200

# Return to original value
samplesize <- samplesize_orig

## ----message = TRUE, warning = FALSE----------------------------------------------------------------------------------
# Now, group size is 200 (instead of 40)

# H1 vs complement (default)
H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3

# Apply GORIC #
set.seed(123) 
results_1c_200 <- goric(fit_200, hypotheses = list(H1))
results_1c_200

## ----message = F, warning = FALSE-------------------------------------------------------------------------------------
# Specifying multiple null populations, that is,
# using all possibilities of setting inequalities to equalities.
# Here, we will use the default `model_type` (i.e., "asymp") which takes population parameter values (instead of population effect sizes) 
est <- coef(fit_200)
pop_est <- matrix(c(
                  mean(est[1:3]), mean(est[1:3]), mean(est[1:3]),
                  mean(est[1:2]), mean(est[1:2]), est[3],
                  mean(est[1:2]), est[2], mean(est[1:2]),
                  est[1], mean(est[2:3]), mean(est[2:3]),
                  
                  est[1], est[2], est[3]
                  ),
                  byrow = TRUE, ncol = length(est))
rownames(pop_est) <- c("PE_123eq", "PE_12eq", "PE_13eq", "PE_23eq", "Observed")
#
set.seed(123) 
benchmarks_1c_allpos_200 <- benchmark(results_1c_200, pop_est = pop_est, iter = 200)
#
# loglik difference
#print(benchmarks_1c_allpos, output_type = "ld") # R file
print(benchmarks_1c_allpos_200, output_type = "ld", color = FALSE) # Rmd file
plot(benchmarks_1c_allpos_200, output_type = "ld") # x_lim = c(<min>, <max>)
#
# ratio of loglik weights
#print(benchmarks_1c_allpos_200, output_type = "rlw") # R file
print(benchmarks_1c_allpos_200, output_type = "rlw", color = FALSE) # Rmd file
#plot(benchmarks_1c_allpos_200, output_type = "rlw") # x_lim = c(<min>, <max>)
plot(benchmarks_1c_allpos_200, output_type = "rlw", log_scale = TRUE)

