## ----chunkname, echo=-1-------------------------------------------------------
data.table::setDTthreads(2)

## ----echo = FALSE, message = FALSE--------------------------------------------
library(simstudy)
library(ggplot2)
library(scales)
library(grid)
library(gridExtra)
library(survival)
library(gee)
library(data.table)

plotcolors <- c("#B84226", "#1B8445", "#1C5974")

cbbPalette <- c("#B84226","#B88F26", "#A5B435", "#1B8446",
                "#B87326","#B8A526", "#6CA723", "#1C5974") 

ggtheme <- function(panelback = "white") {
  
  ggplot2::theme(
    panel.background = element_rect(fill = panelback),
    panel.grid = element_blank(),
    axis.ticks =  element_line(colour = "black"),
    panel.spacing =unit(0.25, "lines"),  # requires package grid
    panel.border = element_rect(fill = NA, colour="gray90"), 
    plot.title = element_text(size = 8,vjust=.5,hjust=0),
    axis.text = element_text(size=8),
    axis.title = element_text(size = 8)
  )  
  
}

## ----tidy=TRUE----------------------------------------------------------------
# specifying a specific correlation matrix C
C <- matrix(c(1,.7,.2, .7, 1, .8, .2, .8, 1),nrow = 3)
C

set.seed(282726)

# generate 3 correlated variables with different location and scale for each field
dt <- genCorData(1000, mu=c(4,12,3), sigma = c(1,2,3), corMatrix=C)
dt

# estimate correlation matrix
dt[,round(cor(cbind(V1, V2, V3)),1)]

# estimate standard deviation
dt[,round(sqrt(diag(var(cbind(V1, V2, V3)))),1)]

## ----tidy=TRUE----------------------------------------------------------------
# generate 3 correlated variables with different location but same standard deviation
# and compound symmetry (cs) correlation matrix with correlation coefficient = 0.4.
# Other correlation matrix structures are "independent" ("ind") and "auto-regressive" ("ar1").

dt <- genCorData(1000, mu=c(4,12,3), sigma = 3, rho = .4, corstr = "cs", 
                cnames=c("x0","x1","x2"))
dt

# estimate correlation matrix
dt[,round(cor(cbind(x0, x1, x2)),1)]

# estimate standard deviation
dt[,round(sqrt(diag(var(cbind(x0, x1, x2)))),1)]

## ----tidy = TRUE--------------------------------------------------------------

# define and generate the original data set
def <- defData(varname = "x", dist = "normal", formula = 0, variance = 1, id = "cid")
dt <- genData(1000, def)

# add new correlate fields a0 and a1 to "dt"
dt <- addCorData(dt, idname="cid", mu=c(0,0), sigma = c(2,.2), rho = -0.2, 
                 corstr = "cs", cnames=c("a0","a1"))

dt

# estimate correlation matrix
dt[,round(cor(cbind(a0, a1)),1)]

# estimate standard deviation
dt[,round(sqrt(diag(var(cbind(a0, a1)))),1)]

## -----------------------------------------------------------------------------
l <- c(8, 10, 12) # lambda for each new variable

dx <- genCorGen(1000, nvars = 3, params1 = l, dist = "poisson", rho = .3, corstr = "cs", wide = TRUE)
dx
round(cor(as.matrix(dx[, .(V1, V2, V3)])), 2)

## -----------------------------------------------------------------------------
genCorGen(1000, nvars = 3, params1 = c(.3, .5, .7), dist = "binary", rho = .8, corstr = "cs", wide = TRUE)

## -----------------------------------------------------------------------------
dx <- genCorGen(1000, nvars = 3, params1 = l, params2 = c(1,1,1), dist = "gamma", rho = .7, corstr = "cs", wide = TRUE, cnames="a, b, c")
dx
round(cor(as.matrix(dx[, .(a, b, c)])), 2)

## -----------------------------------------------------------------------------
dx <- genCorGen(1000, nvars = 3, params1 = l, params2 = c(1,1,1), dist = "gamma", rho = .7, corstr = "cs", wide = FALSE, cnames="NewCol")
dx

## -----------------------------------------------------------------------------
def <- defData(varname = "xbase", formula = 5, variance = .2, dist = "gamma", id = "cid")
def <- defData(def, varname = "lambda", formula = ".5 + .1*xbase", dist="nonrandom", link = "log")
def <- defData(def, varname = "p", formula = "-2 + .3*xbase", dist="nonrandom", link = "logit")
def <- defData(def, varname = "gammaMu", formula = ".5 + .2*xbase", dist="nonrandom", link = "log")
def <- defData(def, varname = "gammaDis", formula = 1, dist="nonrandom")

dt <- genData(10000, def)
dt

## -----------------------------------------------------------------------------

dtX1 <- addCorGen(dtOld = dt, idvar = "cid", nvars = 3, rho = .1, corstr = "cs",
                    dist = "poisson", param1 = "lambda", cnames = "a, b, c")
dtX1

## -----------------------------------------------------------------------------
dtX2 <- addCorGen(dtOld = dt, idvar = "cid", nvars = 4, rho = .4, corstr = "ar1",
                    dist = "binary", param1 = "p")
dtX2

## -----------------------------------------------------------------------------
dtX3 <- addCorGen(dtOld = dt, idvar = "cid", nvars = 4, rho = .4, corstr = "cs",
                  dist = "gamma", param1 = "gammaMu", param2 = "gammaDis")
dtX3

## -----------------------------------------------------------------------------
def <- defData(varname = "xbase", formula = 5, variance = .4, dist = "gamma", id = "cid")
def <- defData(def, "nperiods", formula = 3, dist = "noZeroPoisson")

def2 <- defDataAdd(varname = "lambda", formula = ".5+.5*period + .1*xbase", dist="nonrandom", link = "log")

dt <- genData(1000, def)

dtLong <- addPeriods(dt, idvars = "cid", nPeriods = 3)
dtLong <- addColumns(def2, dtLong)

dtLong

### Generate the data 

dtX3 <- addCorGen(dtOld = dtLong, idvar = "cid", nvars = 3, rho = .6, corstr = "cs",
                  dist = "poisson", param1 = "lambda", cnames = "NewPois")
dtX3

## -----------------------------------------------------------------------------
geefit <- gee(NewPois ~ period + xbase, data = dtX3, id = cid, family = poisson, corstr = "exchangeable")
round(summary(geefit)$working.correlation, 2)


