# Install packages if they are not already available. ---- if (!require(yaml)) stop("yaml package not installed, please install it with command \n install.packages('yaml')") if (!require(ipumsr)) stop("ipumsr package not installed, please install it with command \n install.packages('ipumsr')") # Setup (run each time) ---- library(ipumsr) read_yaml_as_ddi <- function(file) { d_yaml <- yaml::yaml.load_file(file) # Get rectype information rectypes <- d_yaml$record_types rectype_idvar <- d_yaml$record_type_var_name # Note that YML has a `data_structure` field, but it always has hierarchical # instead I consider 1 rectype var df's as rectangular filetype <- ifelse(length(rectypes) > 1, "hierarchical", "rectangular") # Get variable information v_info <- purrr::map_df(d_yaml$variables, ~.[c("name", "label", "start_column", "width", "is_string_var", "record_type", "implied_decimals", "is_common_var")]) v_info <- dplyr::mutate( v_info, var_desc = "", end = start_column + width - 1, var_type = ifelse(is_string_var, "character", "numeric"), rectypes = ifelse(is_common_var, list(rectypes), record_type) ) v_info <- dplyr::select( v_info, var_name = name, var_label = label, var_desc = var_desc, start = start_column, end, imp_decim = implied_decimals, var_type, rectypes ) # Nested data.frames hold the information about value encodings vals <- purrr::map(d_yaml$variables, ~dplyr::bind_rows(.$values)) vals <- purrr::map_if( vals, ~length(.) == 0, ~dplyr::data_frame(value = numeric(0), label = character(0)) ) vals <- purrr::map(vals, ~dplyr::select(., val = value, lbl = label)) v_info$val_labels <- vals # Make ipums_ddi object out <- ipumsr:::make_ddi_from_scratch( ipums_project = d_yaml$project, file_name = d_yaml$data_file_name, file_path = d_yaml$data_dir_name, file_type = filetype, rectypes = rectypes, rectype_idvar = rectype_idvar, var_info = v_info, conditions = "IPUMS custom data", citation = NULL ) out } # Load data ---- ddi <- read_yaml_as_ddi("yrbss.yml") yrbss <- read_ipums_micro(ddi, data_file = "ipums-yrbss.dat.gz")