Download datasets from ilostat https://ilostat.ilo.org via bulk download facility https://ilostat.ilo.org/data/bulk/.
get_ilostat( id, segment = getOption("ilostat_segment", "indicator"), type = getOption("ilostat_type", "code"), lang = getOption("ilostat_lang", "en"), time_format = getOption("ilostat_time_format", "raw"), filters = getOption("ilostat_filter", "none"), fixed = getOption("ilostat_fixed", TRUE), detail = getOption("ilostat_detail", "full"), cache = getOption("ilostat_cache", TRUE), cache_update = getOption("ilostat_cache_update", TRUE), cache_dir = getOption("ilostat_cache_dir", NULL), cache_format = getOption("ilostat_cache_format", "rds"), back = getOption("ilostat_back", TRUE), distribution = getOption("ilostat_distribution", "no"), cmd = getOption("ilostat_cmd", "none"), quiet = getOption("ilostat_quiet", FALSE) )
id | A code name for the dataset of interest.
See |
---|---|
segment | A character, way to get datasets by: |
type | a character, type of variables, |
lang | a character, code for language. Available are |
time_format | a string giving a type of the conversion of the time
column from the ilostat format. "raw" (default)
does not do conversion and return time as character (ie. '2017', '2017Q1', '2017M01'). A "date" converted to
a |
filters | a list;
|
fixed | a logical, if |
detail | a character, |
cache | a logical whether to do caching. Default is |
cache_update | a logical whether to update cache. Check cache update with last.update attribute store on the cache file name
and the one from the table of contents. Can be set also with
options(ilostat_cache_update = FALSE). Default is |
cache_dir | a path to a cache directory. The directory has to exist.
The |
cache_format | a character, format to store on the cache |
back | a logical, |
distribution | a character, transform number of persons into distribution by sex or classif1 or classif2, |
cmd | a character, R expression use for manipulate internal data frame |
quiet | a logical, if |
a tibble. One column for each dimension in the data and the values column for numerical values, as well as the metadata columns. The time column for a time dimension.
Data sets are downloaded from the
ilostat bulk download facility.
If only the table id
is given, the whole table is downloaded from the
bulk download facility.
The bulk download facility is the fastest method to download whole datasets. It is also often the only way as the sdmx API has limitation of maximum 300 000 records at the same time and whole datasets usually exceeds that.
By default datasets from the bulk download facility are cached as they are often rather large.
Cache files are stored in a temporary directory by default or in
a named directory if cache_dir or option ilostat_cache_dir is defined.
The cache can be emptied with clean_ilostat_cache
.
The id
, a code, for the dataset can be searched with
the get_ilostat_toc
or from the [bulk download facility](https://ilostat.ilo.org/data/bulk/).
See citation("Rilostat") ilostat bulk download facility user guidelines https://ilostat.ilo.org/data/bulk/
David Bescond bescond@ilo.org
if (FALSE) { ############# get simple dataset dat <- get_ilostat("UNE_2UNE_SEX_AGE_NB_A") head(dat) dat <- get_ilostat("NZL_Q", segment = "ref_area") head(dat) dir.create(file.path(tempdir(), "r_cache")) dat <- get_ilostat("UNE_2UNE_SEX_AGE_NB_A", cache_dir = file.path(tempdir(), "r_cache")) head(dat) clean_ilostat_cache(cache_dir = file.path(tempdir(), "r_cache")) options(ilostat_update = TRUE) dat <- get_ilostat("UNE_2UNE_SEX_AGE_NB_A") head(dat) options(ilostat_update = FALSE) options(ilostat_cache_dir = file.path(tempdir(), "r_cache")) dat <- get_ilostat("UNE_2UNE_SEX_AGE_NB_A") clean_ilostat_cache() ############# get multiple datasets dat <- get_ilostat(c("CPI_ACPI_COI_RT_M", 'CPI_ACPI_COI_RT_Q'), cache = FALSE) head(dat) toc <- get_ilostat_toc(search = 'CPI_') head(toc) dat <- get_ilostat(toc, cache = FALSE) #id as a tibble ############# get datasets with filters dat <- get_ilostat(id = c("UNE_2UNE_SEX_AGE_NB_A", 'EMP_2EMP_SEX_AGE_NB_A'), filters = list( ref_area = "FRA", classif1 = "AGE_YTHADULT_YGE15", time = "2016", sex = c("T", 'SEX_F')), quiet = TRUE) head(dat) clean_ilostat_cache() ############# store in other format dir.create(file.path(tempdir(), "ilostat")) dat <- get_ilostat("UNE_2UNE_SEX_AGE_NB_A", cache_dir = file.path(tempdir(), "r_cache"), cache_format = 'csv') dat <- get_ilostat("UNE_2UNE_SEX_AGE_NB_A", cache_dir = file.path(tempdir(), "r_cache"), cache_format = 'dta') ############# use distribution dat <- get_ilostat("EMP_TEMP_SEX_STE_GEO_NB_A", distribution = 'classif1') # obs_status and note_classif could not be pivot as store at value level select(dat, -obs_status, -note_classif) %>% spread(classif1, obs_value) # use label get_ilostat(dat, "EMP_TEMP_SEX_STE_GEO_NB_A", distribution = 'classif1', type = 'label') select(dat, -obs_status.label, -note_classif.label) %>% spread(classif1.label, obs_value) clean_ilostat_cache() ############# advanced manipulation dat <- get_ilostat("UNE_2UNE_SEX_AGE_NB_A", cmd = "dat %>% count(ref_area)", quiet = TRUE) label_ilostat(dat, code = 'ref_area') clean_ilostat_cache() }