diff --git a/src/sas/readstat_sas7bcat_read.c b/src/sas/readstat_sas7bcat_read.c index 20eddb6..aac17ba 100644 --- a/src/sas/readstat_sas7bcat_read.c +++ b/src/sas/readstat_sas7bcat_read.c @@ -43,16 +43,24 @@ static void sas7bcat_ctx_free(sas7bcat_ctx_t *ctx) { } static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, size_t value_labels_len, - int label_count_used, int label_count_capacity, const char *name, sas7bcat_ctx_t *ctx) { + int label_count_used, int label_count_capacity, int default_label_pos, const char *name, + sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int i; const char *lbp1 = value_start; - uint32_t *value_offset = readstat_calloc(label_count_used, sizeof(uint32_t)); + uint32_t *value_offset = NULL; /* Doubles appear to be stored as big-endian, always */ int bswap_doubles = machine_is_little_endian(); - int is_string = (name[0] == '$'); + int is_string = 0; char *label = NULL; + if (name[0] == '$') { + is_string = 1; + } else if (name[0] == '@') { + goto cleanup; // Skip informats + } + + value_offset = readstat_calloc(label_count_used, sizeof(uint32_t)); if (value_offset == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; @@ -70,7 +78,7 @@ static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, siz goto cleanup; } uint32_t label_pos = sas_read4(&lbp1[10+ctx->pad1], ctx->bswap); - if (label_pos >= label_count_used) { + if (label_pos >= label_count_used || (default_label_pos > 0 && label_pos == default_label_pos - 1)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } @@ -80,54 +88,61 @@ static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, siz } const char *lbp2 = lbp1; + readstat_value_t value = { 0 }; /* Pass 2 -- parse pairs of values & labels */ - for (i=0; i 0 && i == default_label_pos - 1; + if (!is_default_label) { + lbp1 = value_start + value_offset[i]; - if (&lbp1[30] - value_start > value_labels_len || - &lbp2[10] - value_start > value_labels_len) { - retval = READSTAT_ERROR_PARSE; - goto cleanup; - } - readstat_value_t value = { .type = is_string ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE }; - char string_val[4*16+1]; - if (is_string) { - size_t value_entry_len = 6 + sas_read2(&lbp1[2], ctx->bswap); - retval = readstat_convert(string_val, sizeof(string_val), - &lbp1[value_entry_len-16], 16, ctx->converter); - if (retval != READSTAT_OK) + if (&lbp1[30] - value_start > value_labels_len) { + retval = READSTAT_ERROR_PARSE; goto cleanup; + } + value.type = is_string ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE; + char string_val[4*16+1]; + if (is_string) { + size_t value_entry_len = 6 + sas_read2(&lbp1[2], ctx->bswap); + if (&lbp1[value_entry_len] - value_start > value_labels_len) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } + retval = readstat_convert(string_val, sizeof(string_val), + &lbp1[value_entry_len-16], 16, ctx->converter); + if (retval != READSTAT_OK) + goto cleanup; - value.v.string_value = string_val; - } else { - uint64_t val = sas_read8(&lbp1[22], bswap_doubles); - double dval = NAN; - if ((val | 0xFF0000000000) == 0xFFFFFFFFFFFF) { - sas_assign_tag(&value, (val >> 40)); + value.v.string_value = string_val; } else { - memcpy(&dval, &val, 8); - if (dval > 0.0) { - val = ~val; - memcpy(&dval, &val, 8); + uint64_t val = sas_read8(&lbp1[22], bswap_doubles); + double dval = NAN; + if ((val | 0xFF0000000000) == 0xFFFFFFFFFFFF) { + sas_assign_tag(&value, (val >> 40)); } else { - dval *= -1.0; + memcpy(&dval, &val, 8); + if (dval > 0.0) { + val = ~val; + memcpy(&dval, &val, 8); + } else { + dval *= -1.0; + } } - } - value.v.double_value = dval; + value.v.double_value = dval; + } } - size_t label_len = sas_read2(&lbp2[8], ctx->bswap); if (&lbp2[10] > value_start + value_labels_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } + size_t label_len = sas_read2(&lbp2[8], ctx->bswap); /* Some labels seem to overflow the reported block length, truncate it */ /* (Observed with formats.sasbcat from GSS2021, produced with 9.0401M6X64_SR12R2 */ if (label_len > value_start + value_labels_len - &lbp2[10]) { label_len = value_start + value_labels_len - &lbp2[10]; } - if (ctx->value_label_handler) { + if (ctx->value_label_handler && !is_default_label) { label = realloc(label, 4 * label_len + 1); retval = readstat_convert(label, 4 * label_len + 1, &lbp2[10], label_len, ctx->converter); @@ -155,6 +170,7 @@ static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size, size_t pad = 0; uint64_t label_count_capacity = 0; uint64_t label_count_used = 0; + uint64_t default_label_pos = 0; int payload_offset = 106; uint16_t flags = 0; char name[4*32+1]; @@ -167,11 +183,18 @@ static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size, if (ctx->u64) { label_count_capacity = sas_read8(&data[42+pad], ctx->bswap); label_count_used = sas_read8(&data[50+pad], ctx->bswap); + default_label_pos = sas_read8(&data[74+pad], ctx->bswap); payload_offset += 32; } else { label_count_capacity = sas_read4(&data[38+pad], ctx->bswap); label_count_used = sas_read4(&data[42+pad], ctx->bswap); + default_label_pos = sas_read4(&data[66+pad], ctx->bswap); + } + + if (default_label_pos > label_count_used) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; } if ((retval = readstat_convert(name, sizeof(name), &data[8], 8, ctx->converter)) != READSTAT_OK) @@ -198,7 +221,7 @@ static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size, goto cleanup; if ((retval = sas7bcat_parse_value_labels(&data[payload_offset+pad], data_size - payload_offset - pad, - label_count_used, label_count_capacity, name, ctx)) != READSTAT_OK) + label_count_used, label_count_capacity, default_label_pos, name, ctx)) != READSTAT_OK) goto cleanup; cleanup: @@ -479,8 +502,9 @@ readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char * retval = READSTAT_ERROR_READ; goto cleanup; } - if (memcmp(&page[16], "XLSR", sizeof("XLSR")-1) == 0) { - retval = sas7bcat_augment_index(&page[16], ctx->page_size - 16, ctx); + int offset = ctx->u64 ? 32 : 16; + if (memcmp(&page[offset], "XLSR", sizeof("XLSR")-1) == 0) { + retval = sas7bcat_augment_index(&page[offset], ctx->page_size - offset, ctx); if (retval != READSTAT_OK) goto cleanup; }