diff --git a/gc/default/default.c b/gc/default/default.c index bfddb08ea5c23d..9771af004eccc3 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -452,7 +452,6 @@ typedef int (*gc_compact_compare_func)(const void *l, const void *r, void *d); typedef struct rb_heap_struct { short slot_size; - bits_t slot_bits_mask; /* Basic statistics */ size_t total_allocated_pages; @@ -765,6 +764,7 @@ struct free_slot { struct heap_page { unsigned short slot_size; + uint32_t slot_div_magic; unsigned short total_slots; unsigned short free_slots; unsigned short final_slots; @@ -841,17 +841,29 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page * #define GET_PAGE_HEADER(x) (&GET_PAGE_BODY(x)->header) #define GET_HEAP_PAGE(x) (GET_PAGE_HEADER(x)->page) -#define NUM_IN_PAGE(p) (((bits_t)(p) & HEAP_PAGE_ALIGN_MASK) / BASE_SLOT_SIZE) -#define BITMAP_INDEX(p) (NUM_IN_PAGE(p) / BITS_BITLENGTH ) -#define BITMAP_OFFSET(p) (NUM_IN_PAGE(p) & (BITS_BITLENGTH-1)) -#define BITMAP_BIT(p) ((bits_t)1 << BITMAP_OFFSET(p)) +static uint32_t slot_div_magics[HEAP_COUNT]; + +static inline size_t +slot_index_for_offset(size_t offset, uint32_t div_magic) +{ + return (size_t)(((uint64_t)offset * div_magic) >> 32); +} + +#define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_div_magic) +#define SLOT_BITMAP_INDEX(page, p) (SLOT_INDEX(page, p) / BITS_BITLENGTH) +#define SLOT_BITMAP_OFFSET(page, p) (SLOT_INDEX(page, p) & (BITS_BITLENGTH - 1)) +#define SLOT_BITMAP_BIT(page, p) ((bits_t)1 << SLOT_BITMAP_OFFSET(page, p)) + +#define _MARKED_IN_BITMAP(bits, page, p) ((bits)[SLOT_BITMAP_INDEX(page, p)] & SLOT_BITMAP_BIT(page, p)) +#define _MARK_IN_BITMAP(bits, page, p) ((bits)[SLOT_BITMAP_INDEX(page, p)] |= SLOT_BITMAP_BIT(page, p)) +#define _CLEAR_IN_BITMAP(bits, page, p) ((bits)[SLOT_BITMAP_INDEX(page, p)] &= ~SLOT_BITMAP_BIT(page, p)) + +#define MARKED_IN_BITMAP(bits, p) _MARKED_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) +#define MARK_IN_BITMAP(bits, p) _MARK_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) +#define CLEAR_IN_BITMAP(bits, p) _CLEAR_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) -/* Bitmap Operations */ -#define MARKED_IN_BITMAP(bits, p) ((bits)[BITMAP_INDEX(p)] & BITMAP_BIT(p)) -#define MARK_IN_BITMAP(bits, p) ((bits)[BITMAP_INDEX(p)] = (bits)[BITMAP_INDEX(p)] | BITMAP_BIT(p)) -#define CLEAR_IN_BITMAP(bits, p) ((bits)[BITMAP_INDEX(p)] = (bits)[BITMAP_INDEX(p)] & ~BITMAP_BIT(p)) +#define NUM_IN_PAGE(p) (((bits_t)(p) & HEAP_PAGE_ALIGN_MASK) / BASE_SLOT_SIZE) -/* getting bitmap */ #define GET_HEAP_MARK_BITS(x) (&GET_HEAP_PAGE(x)->mark_bits[0]) #define GET_HEAP_PINNED_BITS(x) (&GET_HEAP_PAGE(x)->pinned_bits[0]) #define GET_HEAP_UNCOLLECTIBLE_BITS(x) (&GET_HEAP_PAGE(x)->uncollectible_bits[0]) @@ -861,9 +873,11 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page * static int RVALUE_AGE_GET(VALUE obj) { - bits_t *age_bits = GET_HEAP_PAGE(obj)->age_bits; - int idx = BITMAP_INDEX(obj) * 2; - int shift = BITMAP_OFFSET(obj); + struct heap_page *page = GET_HEAP_PAGE(obj); + bits_t *age_bits = page->age_bits; + size_t slot_idx = SLOT_INDEX(page, obj); + size_t idx = (slot_idx / BITS_BITLENGTH) * 2; + int shift = (int)(slot_idx & (BITS_BITLENGTH - 1)); int lo = (age_bits[idx] >> shift) & 1; int hi = (age_bits[idx + 1] >> shift) & 1; return lo | (hi << 1); @@ -873,9 +887,11 @@ static void RVALUE_AGE_SET_BITMAP(VALUE obj, int age) { RUBY_ASSERT(age <= RVALUE_OLD_AGE); - bits_t *age_bits = GET_HEAP_PAGE(obj)->age_bits; - int idx = BITMAP_INDEX(obj) * 2; - int shift = BITMAP_OFFSET(obj); + struct heap_page *page = GET_HEAP_PAGE(obj); + bits_t *age_bits = page->age_bits; + size_t slot_idx = SLOT_INDEX(page, obj); + size_t idx = (slot_idx / BITS_BITLENGTH) * 2; + int shift = (int)(slot_idx & (BITS_BITLENGTH - 1)); bits_t mask = (bits_t)1 << shift; age_bits[idx] = (age_bits[idx] & ~mask) | ((bits_t)(age & 1) << shift); @@ -1986,8 +2002,12 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) page->start = start; page->total_slots = slot_count; page->slot_size = heap->slot_size; + page->slot_div_magic = slot_div_magics[heap - heaps]; page->heap = heap; + memset(&page->wb_unprotected_bits[0], 0, HEAP_PAGE_BITMAP_SIZE); + memset(&page->age_bits[0], 0, sizeof(page->age_bits)); + asan_unlock_freelist(page); page->freelist = NULL; asan_unpoison_memory_region(page->body, HEAP_PAGE_SIZE, false); @@ -2598,7 +2618,7 @@ is_pointer_to_heap(rb_objspace_t *objspace, const void *ptr) else { if (p < page->start) return FALSE; if (p >= page->start + (page->total_slots * page->slot_size)) return FALSE; - if ((NUM_IN_PAGE(p) * BASE_SLOT_SIZE) % page->slot_size != 0) return FALSE; + if ((p - page->start) % page->slot_size != 0) return FALSE; return TRUE; } @@ -3489,8 +3509,6 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit { struct heap_page *sweep_page = ctx->page; short slot_size = sweep_page->slot_size; - short slot_bits = slot_size / BASE_SLOT_SIZE; - GC_ASSERT(slot_bits > 0); do { VALUE vp = (VALUE)p; @@ -3566,7 +3584,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit } } p += slot_size; - bitset >>= slot_bits; + bitset >>= 1; } while (bitset); } @@ -3591,50 +3609,33 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context p = (uintptr_t)sweep_page->start; bits = sweep_page->mark_bits; + short slot_size = sweep_page->slot_size; + int total_slots = sweep_page->total_slots; + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); - int page_rvalue_count = sweep_page->total_slots * (sweep_page->slot_size / BASE_SLOT_SIZE); - int out_of_range_bits = (NUM_IN_PAGE(p) + page_rvalue_count) % BITS_BITLENGTH; - if (out_of_range_bits != 0) { // sizeof(RVALUE) == 64 - bits[BITMAP_INDEX(p) + page_rvalue_count / BITS_BITLENGTH] |= ~(((bits_t)1 << out_of_range_bits) - 1); + int out_of_range_bits = total_slots % BITS_BITLENGTH; + if (out_of_range_bits != 0) { + bits[bitmap_plane_count - 1] |= ~(((bits_t)1 << out_of_range_bits) - 1); } - /* The last bitmap plane may not be used if the last plane does not - * have enough space for the slot_size. In that case, the last plane must - * be skipped since none of the bits will be set. */ - int bitmap_plane_count = CEILDIV(NUM_IN_PAGE(p) + page_rvalue_count, BITS_BITLENGTH); - GC_ASSERT(bitmap_plane_count == HEAP_PAGE_BITMAP_LIMIT - 1 || - bitmap_plane_count == HEAP_PAGE_BITMAP_LIMIT); - - bits_t slot_mask = heap->slot_bits_mask; - // Clear wb_unprotected and age bits for all unmarked slots { bits_t *wb_unprotected_bits = sweep_page->wb_unprotected_bits; bits_t *age_bits = sweep_page->age_bits; for (int i = 0; i < bitmap_plane_count; i++) { - bits_t unmarked = ~bits[i] & slot_mask; + bits_t unmarked = ~bits[i]; wb_unprotected_bits[i] &= ~unmarked; age_bits[i * 2] &= ~unmarked; age_bits[i * 2 + 1] &= ~unmarked; } } - // Skip out of range slots at the head of the page - bitset = ~bits[0]; - bitset >>= NUM_IN_PAGE(p); - bitset &= slot_mask; - if (bitset) { - gc_sweep_plane(objspace, heap, p, bitset, ctx); - } - p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE; - - for (int i = 1; i < bitmap_plane_count; i++) { + for (int i = 0; i < bitmap_plane_count; i++) { bitset = ~bits[i]; - bitset &= slot_mask; if (bitset) { gc_sweep_plane(objspace, heap, p, bitset, ctx); } - p += BITS_BITLENGTH * BASE_SLOT_SIZE; + p += BITS_BITLENGTH * slot_size; } if (!heap->compact_cursor) { @@ -4086,7 +4087,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_ GC_ASSERT(BUILTIN_TYPE(forwarding_object) != T_NONE); } } - p += BASE_SLOT_SIZE; + p += page->slot_size; bitset >>= 1; } while (bitset); } @@ -4098,25 +4099,21 @@ invalidate_moved_page(rb_objspace_t *objspace, struct heap_page *page) int i; bits_t *mark_bits, *pin_bits; bits_t bitset; + short slot_size = page->slot_size; + int total_slots = page->total_slots; + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); mark_bits = page->mark_bits; pin_bits = page->pinned_bits; uintptr_t p = page->start; - // Skip out of range slots at the head of the page - bitset = pin_bits[0] & ~mark_bits[0]; - bitset >>= NUM_IN_PAGE(p); - invalidate_moved_plane(objspace, page, p, bitset); - p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE; - - for (i=1; i < HEAP_PAGE_BITMAP_LIMIT; i++) { + for (i=0; i < bitmap_plane_count; i++) { /* Moved objects are pinned but never marked. We reuse the pin bits * to indicate there is a moved object in this slot. */ bitset = pin_bits[i] & ~mark_bits[i]; - invalidate_moved_plane(objspace, page, p, bitset); - p += BITS_BITLENGTH * BASE_SLOT_SIZE; + p += BITS_BITLENGTH * slot_size; } } #endif @@ -5310,7 +5307,7 @@ gc_remember_unprotected(rb_objspace_t *objspace, VALUE obj) } static inline void -gc_marks_wb_unprotected_objects_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bits) +gc_marks_wb_unprotected_objects_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bits, short slot_size) { if (bits) { do { @@ -5320,7 +5317,7 @@ gc_marks_wb_unprotected_objects_plane(rb_objspace_t *objspace, uintptr_t p, bits GC_ASSERT(RVALUE_MARKED(objspace, (VALUE)p)); gc_mark_children(objspace, (VALUE)p); } - p += BASE_SLOT_SIZE; + p += slot_size; bits >>= 1; } while (bits); } @@ -5335,18 +5332,15 @@ gc_marks_wb_unprotected_objects(rb_objspace_t *objspace, rb_heap_t *heap) bits_t *mark_bits = page->mark_bits; bits_t *wbun_bits = page->wb_unprotected_bits; uintptr_t p = page->start; + short slot_size = page->slot_size; + int total_slots = page->total_slots; + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); size_t j; - bits_t bits = mark_bits[0] & wbun_bits[0]; - bits >>= NUM_IN_PAGE(p); - gc_marks_wb_unprotected_objects_plane(objspace, p, bits); - p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE; - - for (j=1; jslot_size; - short slot_bits = slot_size / BASE_SLOT_SIZE; - GC_ASSERT(slot_bits > 0); do { VALUE vp = (VALUE)p; @@ -5619,7 +5611,7 @@ gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t b } } p += slot_size; - bitset >>= slot_bits; + bitset >>= 1; } while (bitset); return true; @@ -5634,26 +5626,21 @@ gc_compact_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page bits_t *mark_bits, *pin_bits; bits_t bitset; uintptr_t p = page->start; + short slot_size = page->slot_size; + int total_slots = page->total_slots; + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); mark_bits = page->mark_bits; pin_bits = page->pinned_bits; - // objects that can be moved are marked and not pinned - bitset = (mark_bits[0] & ~pin_bits[0]); - bitset >>= NUM_IN_PAGE(p); - if (bitset) { - if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page)) - return false; - } - p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE; - - for (int j = 1; j < HEAP_PAGE_BITMAP_LIMIT; j++) { + for (int j = 0; j < bitmap_plane_count; j++) { + // objects that can be moved are marked and not pinned bitset = (mark_bits[j] & ~pin_bits[j]); if (bitset) { if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page)) return false; } - p += BITS_BITLENGTH * BASE_SLOT_SIZE; + p += BITS_BITLENGTH * slot_size; } return true; @@ -5940,7 +5927,7 @@ rgengc_remember(rb_objspace_t *objspace, VALUE obj) #endif static inline void -rgengc_rememberset_mark_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bitset) +rgengc_rememberset_mark_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bitset, short slot_size) { if (bitset) { do { @@ -5956,7 +5943,7 @@ rgengc_rememberset_mark_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bitse rb_darray_append_without_gc(&objspace->weak_references, obj); } } - p += BASE_SLOT_SIZE; + p += slot_size; bitset >>= 1; } while (bitset); } @@ -5975,6 +5962,9 @@ rgengc_rememberset_mark(rb_objspace_t *objspace, rb_heap_t *heap) ccan_list_for_each(&heap->pages, page, page_node) { if (page->flags.has_remembered_objects | page->flags.has_uncollectible_wb_unprotected_objects) { uintptr_t p = page->start; + short slot_size = page->slot_size; + int total_slots = page->total_slots; + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); bits_t bitset, bits[HEAP_PAGE_BITMAP_LIMIT]; bits_t *remembered_bits = page->remembered_bits; bits_t *uncollectible_bits = page->uncollectible_bits; @@ -5984,21 +5974,16 @@ rgengc_rememberset_mark(rb_objspace_t *objspace, rb_heap_t *heap) else if (page->flags.has_remembered_objects) has_old++; else if (page->flags.has_uncollectible_wb_unprotected_objects) has_shady++; #endif - for (j=0; jflags.has_remembered_objects = FALSE; - bitset = bits[0]; - bitset >>= NUM_IN_PAGE(p); - rgengc_rememberset_mark_plane(objspace, p, bitset); - p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE; - - for (j=1; j < HEAP_PAGE_BITMAP_LIMIT; j++) { + for (j=0; j < (size_t)bitmap_plane_count; j++) { bitset = bits[j]; - rgengc_rememberset_mark_plane(objspace, p, bitset); - p += BITS_BITLENGTH * BASE_SLOT_SIZE; + rgengc_rememberset_mark_plane(objspace, p, bitset, slot_size); + p += BITS_BITLENGTH * slot_size; } } #if PROFILE_REMEMBERSET_MARK @@ -9526,17 +9511,7 @@ rb_gc_impl_objspace_init(void *objspace_ptr) rb_heap_t *heap = &heaps[i]; heap->slot_size = (1 << i) * BASE_SLOT_SIZE; - - // Bitmask with every (1 << i)th bit set, representing aligned slot positions - static const bits_t slot_bits_masks[] = { - ~(bits_t)0, // i=0: every 1st bit - (bits_t)0x5555555555555555ULL, // i=1: every 2nd bit - (bits_t)0x1111111111111111ULL, // i=2: every 4th bit - (bits_t)0x0101010101010101ULL, // i=3: every 8th bit - (bits_t)0x0001000100010001ULL, // i=4: every 16th bit - }; - GC_ASSERT(HEAP_COUNT == sizeof(slot_bits_masks) / sizeof(slot_bits_masks[0])); - heap->slot_bits_mask = slot_bits_masks[i]; + slot_div_magics[i] = (uint32_t)((uint64_t)UINT32_MAX / heap->slot_size + 1); ccan_list_head_init(&heap->pages); } diff --git a/prism/prism.c b/prism/prism.c index 34e5d38b0a8bef..ca0f2a55aa3485 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -9783,6 +9783,12 @@ parser_lex(pm_parser_t *parser) { unsigned int semantic_token_seen = parser->semantic_token_seen; parser->semantic_token_seen = true; + // We'll jump to this label when we are about to encounter an EOF. + // If we still have lex_modes on the stack, we pop them so that cleanup + // can happen. For example, we should still continue parsing after a heredoc + // identifier, even if the heredoc body was syntax invalid. + switch_lex_modes: + switch (parser->lex_modes.current->mode) { case PM_LEX_DEFAULT: case PM_LEX_EMBEXPR: @@ -9856,6 +9862,14 @@ parser_lex(pm_parser_t *parser) { // We'll check if we're at the end of the file. If we are, then we // need to return the EOF token. if (parser->current.end >= parser->end) { + // We may be missing closing tokens. We should pop modes one by one + // to do the appropriate cleanup like moving next_start for heredocs. + // Only when no mode is remaining will we actually emit the EOF token. + if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) { + lex_mode_pop(parser); + goto switch_lex_modes; + } + // If we hit EOF, but the EOF came immediately after a newline, // set the start of the token to the newline. This way any EOF // errors will be reported as happening on that line rather than @@ -15433,7 +15447,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { pm_token_t opening = parser->previous; pm_statements_node_t *statements = NULL; - if (!match1(parser, PM_TOKEN_EMBEXPR_END)) { + if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { pm_accepts_block_stack_push(parser, true); statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1)); pm_accepts_block_stack_pop(parser); diff --git a/test/prism/errors/unterminated_heredoc_and_embexpr.txt b/test/prism/errors/unterminated_heredoc_and_embexpr.txt new file mode 100644 index 00000000000000..bed7fcd24efc4e --- /dev/null +++ b/test/prism/errors/unterminated_heredoc_and_embexpr.txt @@ -0,0 +1,11 @@ +<= "3.3" - def test_lex_compare - prism = Prism.lex_compat(File.read(__FILE__), version: "current").value - ripper = Ripper.lex(File.read(__FILE__)) + def test_lex_compat + source = "foo bar" + prism = Prism.lex_compat(source, version: "current").value + ripper = Ripper.lex(source) assert_equal(ripper, prism) end end + + def test_lex_interpolation_unterminated + assert_equal( + %i[STRING_BEGIN EMBEXPR_BEGIN EOF], + token_types('"#{') + ) + + assert_equal( + %i[STRING_BEGIN EMBEXPR_BEGIN IGNORED_NEWLINE EOF], + token_types('"#{' + "\n") + ) + end + + def test_lex_interpolation_unterminated_with_content + # FIXME: Emits EOL twice. + assert_equal( + %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT EOF EOF], + token_types('"#{C') + ) + + assert_equal( + %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT NEWLINE EOF], + token_types('"#{C' + "\n") + ) + end + + def test_lex_heredoc_unterminated + code = <<~'RUBY'.strip + <