Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 28 additions & 24 deletions ext/json/ext/parser/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -1425,9 +1425,7 @@ static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_Par

static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
{
const char *start = state->cursor;
state->cursor++;
return json_parse_number(state, config, true, start);
return json_parse_number(state, config, true, state->cursor - 1);
}

// How many values (array elements, or interleaved object keys+values) have been
Expand All @@ -1452,6 +1450,22 @@ static inline void json_value_completed(json_frame *frame)
frame->phase = (enum json_frame_phase) frame->type;
}

static inline bool json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset)
{
// It is assumed that since `keyword` is always a literal, the compiler is able to constantize this
// `strlen` and several other computations in that routine, such as eliminating the `if (resumable)` branch.
Comment on lines +1453 to +1456
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An ALWAYS_INLINE (I think available from CRuby headers) would help guarantee that, vs depending on the compiler inlining heuristics.
(inline doesn't mean to inline but just tolerate multiple definitions of it)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah we have a shim for it.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#998 if it's helpful


size_t len = strlen(keyword);

// Note: memcmp with a small power of two and a literal string compile to an integer comparison /
// That's why we sometime compare starting from the first byte and sometimes from the second.
if (rest(state) >= len && (memcmp(state->cursor + offset, keyword + offset, len - offset) == 0)) {
state->cursor += len;
return true;
}
return false;
}

// Parse an arbitrary JSON value iteratively. This is a state machine driven
// entirely by the top frame's phase so it can stop at any value boundary and
// resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the
Expand All @@ -1475,27 +1489,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)

switch (peek(state)) {
case 'n':
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
state->cursor += 4;
if (json_match_keyword(state, "null", 0)) {
json_push_value(state, config, Qnil);
json_value_completed(frame);
break;
}

raise_parse_error("unexpected token %s", state);
case 't':
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
state->cursor += 4;
if (json_match_keyword(state, "true", 0)) {
json_push_value(state, config, Qtrue);
json_value_completed(frame);
break;
}

raise_parse_error("unexpected token %s", state);
case 'f':
// Note: memcmp with a small power of two compile to an integer comparison
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
state->cursor += 5;
if (json_match_keyword(state, "false", 1)) {
json_push_value(state, config, Qfalse);
json_value_completed(frame);
break;
Expand All @@ -1504,35 +1514,29 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
raise_parse_error("unexpected token %s", state);
case 'N':
// Note: memcmp with a small power of two compile to an integer comparison
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
state->cursor += 3;
if (config->allow_nan && json_match_keyword(state, "NaN", 1)) {
json_push_value(state, config, CNaN);
json_value_completed(frame);
break;
}

raise_parse_error("unexpected token %s", state);
case 'I':
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
state->cursor += 8;
if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) {
json_push_value(state, config, CInfinity);
json_value_completed(frame);
break;
}

raise_parse_error("unexpected token %s", state);
case '-': {
// Note: memcmp with a small power of two compile to an integer comparison
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
if (config->allow_nan) {
state->cursor += 9;
json_push_value(state, config, CMinusInfinity);
json_value_completed(frame);
break;
} else {
raise_parse_error("unexpected token %s", state);
}
state->cursor++;
if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) {
json_push_value(state, config, CMinusInfinity);
json_value_completed(frame);
break;
}

json_push_value(state, config, json_parse_negative_number(state, config));
json_value_completed(frame);
break;
Expand Down
2 changes: 1 addition & 1 deletion test/json/json_ext_parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_error_messages

ex = assert_raise(ParserError) { parse('-Infinity something') }
unless RUBY_PLATFORM =~ /java/
assert_equal "unexpected token '-Infinity' at line 1 column 1", ex.message
assert_equal "invalid number: '-Infinity' at line 1 column 1", ex.message
end

ex = assert_raise(ParserError) { parse('NaN something') }
Expand Down
Loading