## ----message=FALSE, warning=FALSE, echo = FALSE-------------------------------
library("knitr")
require("Ravages")

## -----------------------------------------------------------------------------
# Import data in a bed matrix
x <- read.bed.matrix( system.file("extdata", "LCT.EUR.b37.bed", package="Ravages") )

# Group variants within know genes by extending their positions
# 500bp upstream and downstream
x <- set.genomic.region(x, flank.width=500)

## -----------------------------------------------------------------------------
# a quick look at the result
table(x@snps$genomic.region, useNA = "ifany")

# Filter variants with maf in the entire sample lower than 1%
# And keep only genomic region with at least 10 SNPs
x1 <- filter.rare.variants(x, filter = "whole", maf.threshold = 0.01, min.nb.snps = 10)
table(x1@snps$genomic.region, useNA="ifany")

# run burden test CAST, using the 1000Genome population as "outcome"
# Null model for CAST
x1.H0.burden <- NullObject.parameters(x1@ped$population, ref.level = "CEU", 
                                      RVAT = "burden", pheno.type = "categorical")
burden(x1, NullObject = x1.H0.burden, burden = "CAST", cores = 1)

# run SKAT, using the 1000Genome population as "outcome"
# COnstruct null model for SKAT, then run test with only a few permutations
x1.H0.SKAT <- NullObject.parameters(x1@ped$population, RVAT = "SKAT", pheno.type = "categorical")
SKAT(x1, x1.H0.SKAT, params.sampling=list(perm.target = 10, perm.max = 500))

# run a similar analysis but using the RAVA-FIRST approach with WSS
# RAVA-FIRST(x1, filter = "whole", maf.threshold = 0.01, min.nb.snps = 10,
#           burden = TRUE, x1.H0.burden, SKAT = F, build = "b37")


## ----eval = F-----------------------------------------------------------------
# # Example bed matrix with 4 variants
# x.ex <- as.bed.matrix(x=matrix(0, ncol=4, nrow=10),
#                       bim=data.frame(chr=1:4, id=paste("rs", 1:4, sep=""),
#                                      dist = rep(0,4), pos=c(150,150,200,250),
#                                      A1=rep("A", 4), A2=rep("T", 4)))
# 
# # Example genes dataframe
# genes.ex <- data.frame(Chr=c(1,1,3,4), Start=c(10,110,190,220), End=c(170,180,250,260),
#                        Gene_Name=factor(letters[1:4]))
# 
# # Attribute genomic regions without splitting the variants
# # attributed to multiple genomic regions
# x.ex <- set.genomic.region(x.ex, regions = genes.ex, split = FALSE)
# x.ex@snps$genomic.region
# 
# # Split genomic regions
# x.ex.split <- bed.matrix.split.genomic.region(x.ex, split.pattern = ",")
# x.ex.split@snps$genomic.region

## -----------------------------------------------------------------------------
# Calculation of the genetic score with a maf threshold of 1%
CAST.score <- CAST(x = x1, genomic.region = x1@snps$genomic.region, maf.threshold = 0.01)
head(CAST.score)

## -----------------------------------------------------------------------------
WSS.score <- WSS(x = x1, genomic.region = x1@snps$genomic.region)
head(WSS.score)

## -----------------------------------------------------------------------------
Sum.score <- burden.weighted.matrix(x = x1, weights = rep(1, ncol(x1)))
head(Sum.score)

## ----eval = F-----------------------------------------------------------------
# # Null model
# x1.H0 <- NullObject.parameters(x1@ped$population, ref.level = "CEU",
#                                RVAT = "burden", pheno.type = "categorical")
# # WSS
# burden(x = x1, NullObject = x1.H0, burden ="WSS",
#       alpha=0.05, get.effect.size=TRUE, cores = 1)
# 
# # Sex + a simulated variable as covariates
# sex <- x1@ped$sex
# u <- runif(nrow(x1))
# covar <- cbind(sex, u)
# # Null model with the covariate "sex"
# x1.H0.covar <- NullObject.parameters(x1@ped$pop, ref.level = "CEU",
#                                      RVAT = "burden", pheno.type = "categorical",
#                                      data = covar, formula = ~ sex)
# 
# # Regression with the covariate "sex" without OR values
# # Using the score matrix WSS computed previously
# burden(NullObject = x1.H0.covar, burden=WSS.score, cores = 1)

## ----eval = F-----------------------------------------------------------------
# # Random continuous phenotype
# set.seed(1) ; pheno1 <- rnorm(nrow(x1))
# # Null model
# x1.H0.continuous <- NullObject.parameters(pheno1, RVAT = "burden",
#                                           pheno.type = "continuous")
# # Test CAST
# burden(x1, NullObject = x1.H0.continuous, burden = "CAST", cores = 1)

## -----------------------------------------------------------------------------
# *** Functionally-informed WSS analysis ***
# Attribution of variants to regions and subregions
x2 <- set.genomic.region.subregion(x, regions = genes.b37,
                                  subregions = subregions.LCT)
# Burden test
burden.subscores(x2, x1.H0.burden, cores = 1)

## ----eval = F-----------------------------------------------------------------
# # Null model
# x1.null <- NullObject.parameters(x1@ped$population, RVAT = "SKAT", pheno.type = "categorical")
# # Permutations because no covariates
# SKAT(x1, x1.null, get.moments = "permutations", debug = TRUE,
#      params.sampling = list(perm.target = 100, perm.max =5e4))
# # Theoretical on 1 core
# SKAT(x1, x1.null, get.moments = "theoretical", debug = TRUE, cores = 1)

## ----eval = F-----------------------------------------------------------------
# # Random continuous phenotype
# set.seed(1) ; pheno1 <- rnorm(nrow(x1))
# # Null Model with covariates
# x1.H0.c <- NullObject.parameters(pheno1, RVAT = "SKAT", pheno.type = "continuous",
#                                  data = covar)
# # Run SKAT
# SKAT(x1, x1.H0.c)

## ----eval = F-----------------------------------------------------------------
# # Attribution of CADD regions
# x.CADDregions <- set.CADDregions(x, build = "b37")
# # Annotation of variants with adjusted CADD scores
# x <- adjustedCADD.annotation(x, build = "b37")

## ----eval = FALSE-------------------------------------------------------------
# # Keep only CADD regions with 2 variants and variants with a MAF greater than 1%
# # and with an adjusted CADD score greater than the median
# x.median <- filter.adjustedCADD(x.CADDregions, maf.threshold = 0.01, min.nb.snps = 2)

## ----eval = FALSE-------------------------------------------------------------
# # Functionally-informed WSS analysis
# x.burden <- burden.subscores(x.median, x1.H0.burden, burden.function = WSS)
# # SKAT analysis
# x.SKAT <- SKAT(x.median, x1.H0.SKAT)

## ----eval = F-----------------------------------------------------------------
# # *** RAVA-FIRST analysis with functionally-informed WSS and SKAT
# #Burden parameters
# burden.parameters <- list(get.effect.size = T, burden.function = WSS)
# # Chromosome by chromosome
# res.bychr <- vector("list", 22)
# for(chr in 1:22){
#   x <- read.bed.matrix(paste0(path_file, prefix_vcf, chr, ".vcf.gz"))
#   res.bychr[[chr]] <- RAVA.FIRST(x, H0.burden = H0.burden, H0.SKAT = H0.SKAT,
#                                  burden.parameters = burden.parameters)
# }

## -----------------------------------------------------------------------------
# Selection of each group of individuals
CEU <- select.inds(x1, population=="CEU")
CEU
FIN <- select.inds(x1, population=="FIN")
FIN
GBR <- select.inds(x1, population=="GBR")
GBR

# Combine in one file:
CEU.FIN.GBR <- rbind(CEU, FIN, GBR)
CEU.FIN.GBR