## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(MaddisonData)

## ----findData-----------------------------------------------------------------
(MadXlsx <- path_package2('^mpd2023.*xlsx$'))
(foundData <- (length(MadXlsx)>0))

## ----whichMadxlsx, eval=foundData---------------------------------------------
MadInfo <- file.info(MadXlsx)
cat('Multiple files found.\n')
print(MadInfo)
imax <- which.max(MadInfo$mtime)
Madxlsx <- MadXlsx[imax]

## ----readMaddison, eval=foundData---------------------------------------------
MaddisonData0 <- readxl::read_xlsx(Madxlsx, sheet='Full data')
head(MaddisonData0, 2)
tail(MaddisonData0)
MaddisonSources0 <- readxl::read_xlsx(Madxlsx, 
                        sheet='Sources')
names(MaddisonSources0) <- c('ISO', 'years', 'source')
head(MaddisonSources0)
tail(MaddisonSources0)

## ----nchCode, eval=foundData--------------------------------------------------
ctryCds <- unique(MaddisonData0$countrycode)
nCds <- length(ctryCds)
table(nchCode <- nchar(ctryCds))

## ----cdCtry, eval=foundData---------------------------------------------------
cdCtry <- sort(with(MaddisonData0, unique(
            paste0(countrycode, country))))
if(length(cdCtry)!=nCds){
    stop('some countrycode(s) have more than one', 
         ' country')
}
cd_Ctry <- data.frame(ISO=substring(cdCtry, 1, 3),
                        country=substring(cdCtry, 4))

## ----MaddisonCountries, eval=foundData----------------------------------------
rownames(cd_Ctry) <- cd_Ctry$countrycode
ctryRgn <- sort(with(MaddisonData0, unique(
            paste0(countrycode, region))))
MaddisonCountries <- cbind(cd_Ctry, 
            region = substring(ctryRgn, 4))
rownames(MaddisonCountries) <- MaddisonCountries$ISO
save(MaddisonCountries, file='MaddisonCountries.rdb', 
       compress=TRUE)  
# CANNOT include MaddisonCountries.rda in .Rbuildignore, 
# so change the name.   
# per R Packages, section 9.7  
# As of 2025-09-24 the R Packages book recommends writing 
# using usethis::use_data 
if(FALSE){  
    tryUse <- try(usethis::use_data(MaddisonData))
}
getwd()

## ----subset, eval=foundData---------------------------------------------------
subset(MaddisonCountries, ISO=='GBR', country)
subset(MaddisonCountries, grepl('Yugo', country), 1:3)
table(MaddisonCountries$region)
# What are "Western Offshoots"? 
subset(MaddisonCountries, grepl('Of', region), 
                      c(country, ISO))

## ----MaddisonData, eval=foundData---------------------------------------------
NAs <- with(MaddisonData0, is.na(gdppc) & is.na(pop))
str(MaddisonData <- MaddisonData0[!NAs, c(1, 4:6)])
names(MaddisonData)[1] <- 'ISO' 

save(MaddisonData, file='MaddisonData.rdb', 
       compress=TRUE)  
# CANNOT include MaddisonData.rda in .Rbuildignore, 
# so change the name.   
# per R Packages, section 9.7  
# As of 2025-09-24 the R Packages book recommends writing 
# using usethis::use_data 
if(FALSE){  
    tryUse <- try(usethis::use_data(MaddisonCountries))
}
getwd()


## ----checkSources, eval=foundData---------------------------------------------
str(sourceNA <- which(is.na(MaddisonSources0[,1])
        & is.na(MaddisonSources0[,2]) & 
          is.na(MaddisonSources0[,3])))
str(row1 <- c(4, sourceNA[-1]+1))
MaddisonSources0[row1[1:3], ]
MaddisonSources0[tail(row1, 3), ]

## ----countryRows, eval=foundData----------------------------------------------
ISOsourceNms <- MaddisonSources0[head(row1, -1), 1, drop=TRUE]
nISOsources <- length(ISOsourceNms)
MaddisonSources <- vector('list', nISOsources)
names(MaddisonSources) <- ISOsourceNms
# Get year ranges for each country
MaddisonYears <- data.frame(
  ISO      =character(0), 
  yearBegin=integer(0), 
  yearEnd  =integer(0), 
  sourceNum=integer(0)
  )
for(i in 1:nISOsources){
  rowi <- (row1[i]+1):(row1[i+1]-2)
  MadSrci <- MaddisonSources0[rowi, 2:3]
  MaddisonSources[[i]] <- MadSrci
  MadYrsi0 <- MadDateRanges(MadSrci[, 1, drop=TRUE])
  MadYrsi <- cbind(ISO=ISOsourceNms[i], MadYrsi0)
  MaddisonYears <- rbind(MaddisonYears, 
                              MadYrsi) 
}

MaddisonSources[['EGY']]
MaddisonYears[MaddisonYears$ISO=='EGY', ]

attr(MaddisonSources, 'since2008') <- paste(
  "gdppc since 2008: Total Economy Database (TED) from the", 
  "Conference Board for all countries included in TED and UN",
  "national accounts statistics for all others.")
head(MaddisonSources, 3)
head(MaddisonYears)

tail(MaddisonSources, 3)
tail(MaddisonYears)

save(MaddisonSources, file='MaddisonSources.rdb', 
       compress=TRUE)  
save(MaddisonYears, file='MaddisonYears.rdb', 
       compress=TRUE)  

