diff --git a/Rakefile b/Rakefile index 280845c..e30479b 100755 --- a/Rakefile +++ b/Rakefile @@ -21,10 +21,10 @@ task specs: :spec require 'rspec/core/rake_task' desc 'Run specifications' RSpec::Core::RakeTask.new do |spec| - spec.rspec_opts = %w(--options spec/spec.opts) if File.exists?('spec/spec.opts') + spec.rspec_opts = %w(--options spec/spec.opts) if File.exist?('spec/spec.opts') end desc "Run specifications for continuous integration" RSpec::Core::RakeTask.new("spec:ci") do |spec| - spec.rspec_opts = %w(--options spec/spec.opts) if File.exists?('spec/spec.opts') + spec.rspec_opts = %w(--options spec/spec.opts) if File.exist?('spec/spec.opts') end diff --git a/lib/json/canonicalization.rb b/lib/json/canonicalization.rb index e8ec97f..2b090a6 100644 --- a/lib/json/canonicalization.rb +++ b/lib/json/canonicalization.rb @@ -26,38 +26,94 @@ def to_json_c14n end class Numeric + # This is intended to be compliant with ECMA-262, version 6.0 (ES6) + # + # See https://262.ecma-international.org/6.0/#sec-tostring-applied-to-the-number-type + # + # JSON does not permit NaN or infinite values, so those raise an error def to_json_c14n - raise RangeError if self.is_a?(Float) && (self.nan? || self.infinite?) - return "0" if self.zero? - num = self - if num < 0 - num, sign = -num, '-' - end - native_rep = "%.15E" % num - decimal, exponential = native_rep.split('E') - exp_val = exponential.to_i - exponential = exp_val > 0 ? ('+' + exp_val.to_s) : exp_val.to_s - - integral, fractional = decimal.split('.') - fractional = fractional.sub(/0+$/, '') # Remove trailing zeros - - if exp_val > 0 && exp_val < 21 - while exp_val > 0 - integral += fractional.to_s[0] || '0' - fractional = fractional.to_s[1..-1] - exp_val -= 1 - end - exponential = nil - elsif exp_val == 0 - exponential = nil - elsif exp_val < 0 && exp_val > -7 - # Small numbers are shown as 0.etc with e-6 as lower limit - fractional, integral, exponential = integral + fractional.to_s, '0', nil - fractional = ("0" * (-exp_val - 1)) + fractional - end + raise RangeError if self.is_a?(Float) && !self.finite? + return '0' if self.zero? + + # We may or may not be using scientific notation (see https://en.wikipedia.org/wiki/Scientific_notation) + # at this point, but the terminology is the same. Numbers are represented as a significand + # (also known as a mantissa) multiplied by 10 raised to an exponent. A number like 1701 may be represented + # as 1701 * 10^0, 170.1 * 10^1, 17.01 * 10^2, or (in scientific notation) 1.701 * 10^3. ES6 and Ruby don't + # always agree on when to use scientific notation, but if Ruby has done the conversion, we can use the + # exponent below when reproducing the behavior in the ES6 spec. + significand_digits, exponent_digits = self.abs.to_s.split('e', 2) + + integer_digits, fraction_digits = significand_digits.split('.', 2) + + + # From the ES6 spec: + # + # "The abstract operation ToString converts a Number m to String format as follows ... + # let n, k, and s be integers such that k ≥ 1, 10k−1 ≤ s < 10k, the Number value for s × 10n−k is m, + # and k is as small as possible. Note that k is the number of digits in the decimal representation of s, + # that s is not divisible by 10, and that the least significant digit of s is not necessarily uniquely + # determined by these criteria." + # + # This is just a different sort of exponential notation, but instead of preferring 0 < s < 10 as in + # scientific notation, here we want s to be an integer, and not divisible by 10. Since we're relying + # on ruby's existing #to_s, s is just the significand without the decimal or any leading or trailing + # zeroes + s = significand_digits.sub('.', '').sub(/^-?0*/, '').sub(/0*$/, '') - fractional = nil if fractional.to_s.empty? - sign.to_s + integral + (fractional ? ".#{fractional}" : '') + (exponential ? "e#{exponential}" : '') + # Once we know s, k is easy + k = s.length + + # If n is positive, it represents the number of digits (including trailing zeroes) to the left of the + # decimal. If n is negative or zero, it represents the number of zeroes to the right of the decimal. + # n-1 is also equal to the exponent used in scientific notation represenations of m, so if we already + # have that representation, we can use that rather than try to recalculate where the decimal would be + # If we don't already have an exponent, we just do digit counting rather than using Math.log10(self) or + # the slightly more precise Math.log2(self)/Math.log2(10) since that can lose precision for values very + # close to n = 22, like the Integer value 999999999999999700000 + n = if exponent_digits + exponent_digits.to_i + 1 + elsif integer_digits.to_i > 0 + integer_digits.length + else + -fraction_digits.index(/[1-9]/) + end + + exponent = n - 1 + + # Per the spec, positive numbers do not include a sign, but exponents always do + sign = self.negative? ? '-' : '' + exponent_sign = exponent.negative? ? '-' : '+' + + if k <= n && n <= 21 # Whole numbers, possibly with trailing zeroes, and < 10^21 + # return the String consisting of the code units of the k digits of the decimal representation of s + # (in order, with no leading zeroes), followed by n−k occurrences of the code unit 0x0030 (DIGIT ZERO). + [sign, s, '0' * (n - k)].join + elsif 0 < n && n <= 21 # Numbers with an integer component < 10^21 + # return the String consisting of the code units of the most significant n digits of the decimal + # representation of s, followed by the code unit 0x002E (FULL STOP), followed by the code units of the + # remaining k−n digits of the decimal representation of s. + [sign, s[0..(n-1)], '.', s[n..-1]].join + elsif -6 < n && n <= 0 # Fractional numbers to no more than 6 decimal places + # return the String consisting of the code unit 0x0030 (DIGIT ZERO), followed by the code unit 0x002E + # (FULL STOP), followed by −n occurrences of the code unit 0x0030 (DIGIT ZERO), followed by the code + # units of the k digits of the decimal representation of s + [sign, '0.', '0' * (-n), s].join + elsif k == 1 # single significant digit outside of -6 < n <= 21 + # return the String consisting of the code unit of the single digit of s, followed by code unit 0x0065 + # (LATIN SMALL LETTER E), followed by the code unit 0x002B (PLUS SIGN) or the code unit 0x002D + # (HYPHEN-MINUS) according to whether n−1 is positive or negative, followed by the code units of the decimal + # representation of the integer abs(n−1) (with no leading zeroes). + # + # This produces "1e-18", rather than Ruby's default "1.0e-18" + [sign, s, 'e', exponent_sign, exponent.abs].join + else # multiple significant digits outside of -6 < n <= 21 + # Return the String consisting of the code units of the most significant digit of the decimal representation + # of s, followed by code unit 0x002E (FULL STOP), followed by the code units of the remaining k−1 digits of + # the decimal representation of s, followed by code unit 0x0065 (LATIN SMALL LETTER E), followed by code unit + # 0x002B (PLUS SIGN) or the code unit 0x002D (HYPHEN-MINUS) according to whether n−1 is positive or negative, + # followed by the code units of the decimal representation of the integer abs(n−1) (with no leading zeroes). + [sign, s[0], '.', s[1..-1], 'e', exponent_sign, exponent.abs].join + end end end diff --git a/spec/number_spec.rb b/spec/number_spec.rb index 29f3d8e..e1cdd23 100644 --- a/spec/number_spec.rb +++ b/spec/number_spec.rb @@ -1,5 +1,11 @@ require_relative 'spec_helper' +def float_from_hex string + # pack and unpack can be used to reinterpret the same bytes as a different type, + # allowing us to copy testcases right out of the RFC + [string.to_i(16)].pack('Q>').unpack('G').first +end + describe "conversions" do { -1/0.0 => RangeError, @@ -17,14 +23,51 @@ 999999999999999700000 => '999999999999999700000', 999999999999999900000 => '999999999999999900000', 333333333.33333329 => '333333333.3333333', - # -5e-324 => '-5e-324', # Outside Ruby Range - # 1.0000000000000001e+23 => '1.0000000000000001e+23', # Outside Ruby Range - # 295147905179352830000 => '295147905179352830000', # Outside Ruby Range - #-1.7976931348623157e+308 => '-1.7976931348623157e+308', # Outside Ruby Range - #1.7976931348623157e+308 => '1.7976931348623157e+308', # Outside Ruby Range - #1e+23 => '1e+23', # Outside Ruby - #5e-324 => '5e-324', # Outside Ruby Range + -5e-324 => '-5e-324', + 1.0000000000000001e+23 => '1.0000000000000001e+23', + 295147905179352830000 => '295147905179352830000', + -1.7976931348623157e+308 => '-1.7976931348623157e+308', + 1.7976931348623157e+308 => '1.7976931348623157e+308', + 1e+23 => '1e+23', + 5e-324 => '5e-324', + + # Values from the RFC: https://www.rfc-editor.org/rfc/rfc8785.html#name-number-serialization-sample + # Additional hex values can be added by reversing the operations in #float_from_hex, e.g. + # [float].pack('G').unpack('Q>').first.to_s(16) + '7fffffffffffffff' => RangeError, # NaN + '7ff0000000000000' => RangeError, # Infinity + '0000000000000000' => '0', # Zero + '8000000000000000' => '0', # Negative zero + '0000000000000001' => '5e-324', # Smallest positive float + '8000000000000001' => '-5e-324', # Smallest negative (closest to zero) float + '7fefffffffffffff' => '1.7976931348623157e+308', # Largest positive float + 'ffefffffffffffff' => '-1.7976931348623157e+308',# Largetst negative (furthest fron zero) float + '4340000000000000' => '9007199254740992', + 'c340000000000000' => '-9007199254740992', + '4430000000000000' => '295147905179352830000', + '44b52d02c7e14af5' => '9.999999999999997e+22', + '44b52d02c7e14af6' => '1e+23', + '44b52d02c7e14af7' => '1.0000000000000001e+23', + '444b1ae4d6e2ef4e' => '999999999999999700000', + '444b1ae4d6e2ef4f' => '999999999999999900000', + '444b1ae4d6e2ef50' => '1e+21', + '3eb0c6f7a0b5ed8c' => '9.999999999999997e-7', + '3eb0c6f7a0b5ed8d' => '0.000001', + '41b3de4355555553' => '333333333.3333332', + '41b3de4355555554' => '333333333.33333325', + '41b3de4355555555' => '333333333.3333333', + '41b3de4355555556' => '333333333.3333334', + '41b3de4355555557' => '333333333.33333343', + 'becbf647612f3696' => '-0.0000033333333333333333', + '43143ff3c1cb0959' => '1424953923781206.2', + + # Additional values that trigger various edge cases + '4050a66666666666' => '66.6', # %.15E turns this into 66.59999999999999 + '3fb9999999999998' => '0.09999999999999998', # Calculating n using Math.log will lose precision }.each do |data, expected| + if data.is_a?(String) + data = float_from_hex(data) + end if expected.is_a?(String) it "converts #{data} to #{expected}" do expect(data.to_json_c14n).to eq expected