---
title: "Benchmarking hdfqlr"
author: "Michael Koohafkan"
date: 2021-06-09
output: 
  rmarkdown::html_vignette:
    standalone: true
vignette: >
  %\VignetteIndexEntry{Benchmarks}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

This document provides some benchmarks for comparing the performance
of hdfqlr to the 
[`hdf5r`](https://cran.r-project.org/package=hdf5r) package. This vignette
previously included comparisons to the 
now-deprecated
[`h5`](https://cran.r-project.org/package=h5) package. 
Other packages that provide read (but not write) support for HDF files
were not tested.




```r
library(hdfqlr)
library(hdf5r)
library(microbenchmark)
```


## Writing HDF datasets


```r
write_hdfqlr = function() {
	test.file = tempfile(fileext = ".h5")
	hql_create_file(test.file)
	hql_use_file(test.file)
	for (i in paste0("vector", 1:6)) {
		write("DATASET", runif(10000), i)
	}
	hql_close_file(test.file)
}

write_hdf5r = function() {
	test.file = tempfile(fileext = ".h5")
	# create hdf5 file (6 vectors with 10k random numbers each)
	h5file = hdf5r::H5File$new(test.file, "w")
	for (i in paste0("vector", 1:6)) {
		h5file[[i]] = runif(10000)
	}
	h5file$close_all()
}

write.benchmark = microbenchmark(
		"hdf5r" = write_hdf5r(),
		"hdfqlr" = write_hdfqlr(),
	times = 1000L,
	unit = 'ms'
)
```


|expr   |      min|        lq|      mean|   median|        uq|      max| neval|
|:------|--------:|---------:|---------:|--------:|---------:|--------:|-----:|
|hdf5r  | 150.1851| 185.27945| 199.61208| 198.1042| 211.18990| 399.0265|  1000|
|hdfqlr |  23.8104|  29.05005|  36.77295|  37.1426|  41.68425|  88.0541|  1000|

![plot of chunk write-benchmark](./write-benchmark-1.png)



## Reading HDF datasets


```r
tf = tempfile(fileext = ".h5")
hql_create_file(tf)
hql_use_file(tf)
sets = paste0("vector", 1:6)
for (i in sets) {
	hql_write_dataset(runif(10000), i)
}
hql_close_file(tf)

read_hdfqlr = function(file, sets) {
	hql_use_file(file)
	result = lapply(sets, hql_read_dataset)
	hql_close_file(file)
	result
}

read_hdf5r = function(file, sets) {
	h5file = hdf5r::H5File$new(file, "r")
	result = lapply(sets, function(i) h5file[[i]][])
	h5file$close_all()
	result
}

read.benchmark = microbenchmark(
	"hdf5r" = read_hdf5r(tf, sets),
	"hdfqlr" = read_hdfqlr(tf, sets),
	times = 1000L,
	unit = 'ms'
)
```


|expr   |      min|        lq|      mean|   median|       uq|      max| neval|
|:------|--------:|---------:|---------:|--------:|--------:|--------:|-----:|
|hdf5r  | 116.4230| 137.74210| 143.90668| 143.6394| 148.6843| 219.4179|  1000|
|hdfqlr |  12.5036|  18.11385|  21.01237|  21.9278|  23.0107|  41.2884|  1000|

![plot of chunk read-benchmark](./read-benchmark-1.png)
