Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 41 additions & 41 deletions ext/json/parser/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -345,22 +345,25 @@ static void rvalue_stack_eagerly_release(VALUE handle)
#define JSON_FRAME_STACK_INITIAL_CAPA 32

enum json_frame_type {
JSON_FRAME_ROOT,
JSON_FRAME_ARRAY,
JSON_FRAME_OBJECT,
JSON_FRAME_ROOT, // == JSON_PHASE_DONE
JSON_FRAME_ARRAY, // == JSON_PHASE_ARRAY_COMMA
JSON_FRAME_OBJECT, // = JSON_PHASE_OBJECT_COMMA
};

// Where a frame is within its container's grammar. This is the entirety of the
// parser's "what to do next" state: json_parse_any dispatches on the top
// frame's phase and holds no resume state in C locals, so a parse can stop at
// any value boundary and be resumed purely from the (persistable) frame stack.
//
// The first three phases are deliberately equal to the corresponding json_frame_type
// to simplify the transition of phase in json_value_completed.
enum json_frame_phase {
JSON_PHASE_DONE = JSON_FRAME_ROOT, // root only: the document value has been parsed
JSON_PHASE_ARRAY_COMMA = JSON_FRAME_ARRAY, // after a value: expecting ',' or the closing ']'
JSON_PHASE_OBJECT_COMMA = JSON_FRAME_OBJECT, // after a value: expecting ',' or the closing '}'
JSON_PHASE_VALUE, // expecting a value (document root, array element, or object value after ':')
JSON_PHASE_ARRAY_COMMA, // after a value: expecting ',' or the closing ']'
JSON_PHASE_OBJECT_KEY, // expecting a '"' key (after '{' or ',')
JSON_PHASE_OBJECT_COMMA, // after a value: expecting ',' or the closing '}'
JSON_PHASE_OBJECT_COLON, // object only: after a key, expecting ':'
JSON_PHASE_DONE, // root only: the document value has been parsed
};

typedef struct json_frame_struct {
Expand Down Expand Up @@ -1422,9 +1425,7 @@ static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_Par

static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
{
const char *start = state->cursor;
state->cursor++;
return json_parse_number(state, config, true, start);
return json_parse_number(state, config, true, state->cursor - 1);
}

// How many values (array elements, or interleaved object keys+values) have been
Expand All @@ -1442,18 +1443,27 @@ static inline long json_frame_entry_count(const json_frame *frame, const rvalue_
// after a container close is the freshly re-exposed parent.
static inline void json_value_completed(json_frame *frame)
{
switch (frame->type) {
case JSON_FRAME_ROOT:
frame->phase = JSON_PHASE_DONE;
return;
case JSON_FRAME_ARRAY:
frame->phase = JSON_PHASE_ARRAY_COMMA;
return;
case JSON_FRAME_OBJECT:
frame->phase = JSON_PHASE_OBJECT_COMMA;
return;
JSON_ASSERT((int)JSON_PHASE_DONE == (int)JSON_FRAME_ROOT);
JSON_ASSERT((int)JSON_PHASE_ARRAY_COMMA == (int)JSON_FRAME_ARRAY);
JSON_ASSERT((int)JSON_PHASE_OBJECT_COMMA == (int)JSON_FRAME_OBJECT);

frame->phase = (enum json_frame_phase) frame->type;
}

static inline bool json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset)
{
// It is assumed that since `keyword` is always a literal, the compiler is able to constantize this
// `strlen` and several other computations in that routine, such as eliminating the `if (resumable)` branch.

size_t len = strlen(keyword);

// Note: memcmp with a small power of two and a literal string compile to an integer comparison /
// That's why we sometime compare starting from the first byte and sometimes from the second.
if (rest(state) >= len && (memcmp(state->cursor + offset, keyword + offset, len - offset) == 0)) {
state->cursor += len;
return true;
}
UNREACHABLE;
return false;
}

// Parse an arbitrary JSON value iteratively. This is a state machine driven
Expand All @@ -1479,27 +1489,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)

switch (peek(state)) {
case 'n':
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
state->cursor += 4;
if (json_match_keyword(state, "null", 0)) {
json_push_value(state, config, Qnil);
json_value_completed(frame);
break;
}

raise_parse_error("unexpected token %s", state);
case 't':
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
state->cursor += 4;
if (json_match_keyword(state, "true", 0)) {
json_push_value(state, config, Qtrue);
json_value_completed(frame);
break;
}

raise_parse_error("unexpected token %s", state);
case 'f':
// Note: memcmp with a small power of two compile to an integer comparison
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
state->cursor += 5;
if (json_match_keyword(state, "false", 1)) {
json_push_value(state, config, Qfalse);
json_value_completed(frame);
break;
Expand All @@ -1508,35 +1514,29 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
raise_parse_error("unexpected token %s", state);
case 'N':
// Note: memcmp with a small power of two compile to an integer comparison
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
state->cursor += 3;
if (config->allow_nan && json_match_keyword(state, "NaN", 1)) {
json_push_value(state, config, CNaN);
json_value_completed(frame);
break;
}

raise_parse_error("unexpected token %s", state);
case 'I':
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
state->cursor += 8;
if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) {
json_push_value(state, config, CInfinity);
json_value_completed(frame);
break;
}

raise_parse_error("unexpected token %s", state);
case '-': {
// Note: memcmp with a small power of two compile to an integer comparison
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
if (config->allow_nan) {
state->cursor += 9;
json_push_value(state, config, CMinusInfinity);
json_value_completed(frame);
break;
} else {
raise_parse_error("unexpected token %s", state);
}
state->cursor++;
if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) {
json_push_value(state, config, CMinusInfinity);
json_value_completed(frame);
break;
}

json_push_value(state, config, json_parse_negative_number(state, config));
json_value_completed(frame);
break;
Expand Down
92 changes: 43 additions & 49 deletions ext/objspace/object_tracing.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ struct traceobj_arg {
int running;
int keep_remains;
VALUE newobj_trace;
VALUE freeobj_trace;
st_table *object_table; /* obj (VALUE) -> allocation_info */
st_table *str_table; /* cstr -> refcount */
struct traceobj_arg *prev_traceobj_arg;
Expand Down Expand Up @@ -96,13 +95,11 @@ newobj_i(VALUE tpval, void *data)
st_data_t v;

if (st_lookup(arg->object_table, (st_data_t)obj, &v)) {
/* keep_remains kept this slot's entry after its object was freed. The
* allocator has now reused that address, so recycle the dead entry's
* info. A living entry here would mean two live objects at one address. */
info = (struct allocation_info *)v;
if (arg->keep_remains) {
if (info->living) {
/* do nothing. there is possibility to keep living if FREEOBJ events while suppressing tracing */
}
}
/* reuse info */
assert(!info->living);
delete_unique_str(arg->str_table, info->path);
delete_unique_str(arg->str_table, info->class_path);
}
Expand All @@ -121,37 +118,6 @@ newobj_i(VALUE tpval, void *data)
st_insert(arg->object_table, (st_data_t)obj, (st_data_t)info);
}

static void
freeobj_i(VALUE tpval, void *data)
{
struct traceobj_arg *arg = (struct traceobj_arg *)data;
rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval);
st_data_t obj = (st_data_t)rb_tracearg_object(tparg);
st_data_t v;
struct allocation_info *info;

/* Modifying the st table can cause allocations, which can trigger GC.
* Since freeobj_i is called during GC, it must not trigger another GC. */
VALUE gc_disabled = rb_gc_disable_no_rest();

if (arg->keep_remains) {
if (st_lookup(arg->object_table, obj, &v)) {
info = (struct allocation_info *)v;
info->living = 0;
}
}
else {
if (st_delete(arg->object_table, &obj, &v)) {
info = (struct allocation_info *)v;
delete_unique_str(arg->str_table, info->path);
delete_unique_str(arg->str_table, info->class_path);
ruby_xfree(info);
}
}

if (gc_disabled == Qfalse) rb_gc_enable();
}

static int
free_keys_i(st_data_t key, st_data_t value, st_data_t data)
{
Expand All @@ -171,7 +137,6 @@ allocation_info_tracer_mark(void *ptr)
{
struct traceobj_arg *trace_arg = (struct traceobj_arg *)ptr;
rb_gc_mark(trace_arg->newobj_trace);
rb_gc_mark(trace_arg->freeobj_trace);
}

static void
Expand All @@ -197,15 +162,47 @@ allocation_info_tracer_memsize(const void *ptr)
return size;
}

static int
allocation_info_tracer_weak_reference_i(st_data_t key, st_data_t value, st_data_t data)
{
struct traceobj_arg *arg = (struct traceobj_arg *)data;
struct allocation_info *info = (struct allocation_info *)value;

if (rb_gc_handle_weak_references_alive_p((VALUE)key)) {
return ST_CONTINUE;
}

/* Object was collected. keep_remains keeps the dead entry for reporting. */
if (arg->keep_remains) {
info->living = 0;
return ST_CONTINUE;
}
else {
delete_unique_str(arg->str_table, info->path);
delete_unique_str(arg->str_table, info->class_path);
ruby_xfree(info);
return ST_DELETE;
}
}

static void
allocation_info_tracer_weak_reference(void *ptr)
{
struct traceobj_arg *arg = (struct traceobj_arg *)ptr;

st_foreach(arg->object_table, allocation_info_tracer_weak_reference_i, (st_data_t)arg);
}

static int
allocation_info_tracer_compact_update_object_table_i(st_data_t key, st_data_t value, st_data_t data)
{
st_table *table = (st_table *)data;
struct allocation_info *info = (struct allocation_info *)value;

if (!rb_gc_pointer_to_heap_p(key)) {
struct allocation_info *info = (struct allocation_info *)value;
xfree(info);
return ST_DELETE;
/* In keep_remains mode the table keeps entries for freed objects. Their keys
* are dangling, so skip them instead of passing them to rb_gc_location. */
if (!info->living) {
return ST_CONTINUE;
}

if (key != rb_gc_location(key)) {
Expand Down Expand Up @@ -242,6 +239,7 @@ static const rb_data_type_t allocation_info_tracer_type = {
allocation_info_tracer_free, /* Never called because global */
allocation_info_tracer_memsize,
allocation_info_tracer_compact,
allocation_info_tracer_weak_reference,
},
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
};
Expand All @@ -260,9 +258,10 @@ get_traceobj_arg(void)
tmp_trace_arg->running = 0;
tmp_trace_arg->keep_remains = tmp_keep_remains;
tmp_trace_arg->newobj_trace = 0;
tmp_trace_arg->freeobj_trace = 0;
tmp_trace_arg->object_table = st_init_numtable();
tmp_trace_arg->str_table = st_init_strtable();

rb_gc_declare_weak_references(obj);
}
return tmp_trace_arg;
}
Expand All @@ -284,10 +283,8 @@ trace_object_allocations_start(VALUE self)
else {
if (arg->newobj_trace == 0) {
arg->newobj_trace = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, newobj_i, arg);
arg->freeobj_trace = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_FREEOBJ, freeobj_i, arg);
}
rb_tracepoint_enable(arg->newobj_trace);
rb_tracepoint_enable(arg->freeobj_trace);
}

return Qnil;
Expand Down Expand Up @@ -315,9 +312,6 @@ trace_object_allocations_stop(VALUE self)
if (arg->newobj_trace != 0) {
rb_tracepoint_disable(arg->newobj_trace);
}
if (arg->freeobj_trace != 0) {
rb_tracepoint_disable(arg->freeobj_trace);
}
}

return Qnil;
Expand Down
Loading