From 256df38cd5bf8f729f4f6989cb485b0e28add2fd Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Tue, 31 Mar 2026 15:31:50 -0400 Subject: [PATCH] Upgrade Core to `57e8c91ed68e3ee903526fd2f45cb16ca46759d8` Signed-off-by: Juan Cruz Viotti --- DEPENDENCIES | 2 +- test/packaging/find_package/CMakeLists.txt | 2 +- vendor/core/CMakeLists.txt | 102 +- vendor/core/DEPENDENCIES | 19 + vendor/core/cmake/Findmpdecimal.cmake | 146 - vendor/core/cmake/Findyaml.cmake | 103 - .../core/cmake/common/compiler/options.cmake | 5 +- vendor/core/cmake/common/defaults.cmake | 15 +- .../cmake/common/targets/executable.cmake | 44 + vendor/core/config.cmake.in | 74 +- vendor/core/src/core/crypto/CMakeLists.txt | 13 + vendor/core/src/core/crypto/crypto_sha256.cc | 232 + vendor/core/src/core/crypto/crypto_uuid.cc | 81 + .../crypto/include/sourcemeta/core/crypto.h | 16 + .../include/sourcemeta/core/crypto_sha256.h | 30 + .../include/sourcemeta/core/crypto_uuid.h | 27 + vendor/core/src/core/html/CMakeLists.txt | 6 +- vendor/core/src/core/html/encoder.cc | 74 - vendor/core/src/core/html/escape.cc | 160 +- .../core/html/include/sourcemeta/core/html.h | 3 +- .../include/sourcemeta/core/html_buffer.h | 93 + .../include/sourcemeta/core/html_elements.h | 450 - .../include/sourcemeta/core/html_encoder.h | 145 - .../include/sourcemeta/core/html_escape.h | 16 +- .../include/sourcemeta/core/html_writer.h | 466 + vendor/core/src/core/html/writer.cc | 48 + .../core/src/core/{uuid => ip}/CMakeLists.txt | 5 +- .../src/core/ip/include/sourcemeta/core/ip.h | 54 + vendor/core/src/core/ip/ipv4.cc | 56 + vendor/core/src/core/ip/ipv6.cc | 112 + vendor/core/src/core/json/CMakeLists.txt | 2 + vendor/core/src/core/json/construct.h | 648 ++ vendor/core/src/core/json/grammar.h | 3 + .../core/json/include/sourcemeta/core/json.h | 128 +- .../json/include/sourcemeta/core/json_array.h | 5 + .../json/include/sourcemeta/core/json_auto.h | 103 +- .../json/include/sourcemeta/core/json_hash.h | 178 +- .../include/sourcemeta/core/json_object.h | 48 +- .../json/include/sourcemeta/core/json_value.h | 312 +- vendor/core/src/core/json/json.cc | 161 +- vendor/core/src/core/json/json_value.cc | 391 +- vendor/core/src/core/json/parser.h | 1747 ++-- vendor/core/src/core/json/stringify.h | 24 +- .../include/sourcemeta/core/jsonpointer.h | 50 +- .../sourcemeta/core/jsonpointer_pointer.h | 117 +- .../sourcemeta/core/jsonpointer_position.h | 41 +- .../core/src/core/jsonpointer/jsonpointer.cc | 53 +- vendor/core/src/core/jsonpointer/parser.h | 107 +- vendor/core/src/core/jsonpointer/position.cc | 189 +- vendor/core/src/core/jsonpointer/stringify.h | 22 +- vendor/core/src/core/jsonschema/bundle.cc | 277 +- vendor/core/src/core/jsonschema/frame.cc | 473 +- vendor/core/src/core/jsonschema/helpers.h | 27 + .../include/sourcemeta/core/jsonschema.h | 48 +- .../sourcemeta/core/jsonschema_frame.h | 95 +- .../sourcemeta/core/jsonschema_transform.h | 12 +- .../sourcemeta/core/jsonschema_vocabularies.h | 36 +- vendor/core/src/core/jsonschema/jsonschema.cc | 129 +- .../src/core/jsonschema/known_resolver.in.cc | 412 +- .../core/src/core/jsonschema/vocabularies.cc | 11 +- vendor/core/src/core/jsonschema/walker.cc | 2 + .../core/md5/include/sourcemeta/core/md5.h | 39 - vendor/core/src/core/md5/md5.cc | 169 - vendor/core/src/core/punycode/CMakeLists.txt | 4 +- vendor/core/src/core/punycode/punycode.cc | 16 +- vendor/core/src/core/punycode/utf8.h | 87 - vendor/core/src/core/regex/preprocess.h | 118 +- vendor/core/src/core/regex/regex.cc | 33 +- vendor/core/src/core/semver/CMakeLists.txt | 9 + .../semver/include/sourcemeta/core/semver.h | 131 + .../include/sourcemeta/core/semver_error.h | 56 + vendor/core/src/core/semver/semver.cc | 462 + vendor/core/src/core/unicode/CMakeLists.txt | 6 + .../unicode/include/sourcemeta/core/unicode.h | 103 + vendor/core/src/core/unicode/unicode.cc | 116 + vendor/core/src/core/uri/CMakeLists.txt | 3 + vendor/core/src/core/uri/accessors.cc | 13 +- vendor/core/src/core/uri/canonicalize.cc | 44 +- vendor/core/src/core/uri/escaping.h | 196 +- vendor/core/src/core/uri/filesystem.cc | 4 +- .../core/uri/include/sourcemeta/core/uri.h | 48 +- vendor/core/src/core/uri/parse.cc | 486 +- vendor/core/src/core/uri/recompose.cc | 43 +- vendor/core/src/core/uri/resolution.cc | 13 +- .../core/src/core/uritemplate/CMakeLists.txt | 2 - .../sourcemeta/core/uritemplate_error.h | 20 + .../sourcemeta/core/uritemplate_router.h | 36 +- .../core/uritemplate/uritemplate_router.cc | 41 +- .../uritemplate/uritemplate_router_view.cc | 355 +- .../core/uuid/include/sourcemeta/core/uuid.h | 36 - vendor/core/src/core/uuid/uuid.cc | 34 - vendor/core/src/core/yaml/CMakeLists.txt | 7 +- .../core/yaml/include/sourcemeta/core/yaml.h | 128 +- .../yaml/include/sourcemeta/core/yaml_error.h | 62 +- .../include/sourcemeta/core/yaml_roundtrip.h | 80 + vendor/core/src/core/yaml/lexer.h | 1495 +++ vendor/core/src/core/yaml/parser.h | 1966 ++++ vendor/core/src/core/yaml/stringify.h | 876 ++ vendor/core/src/core/yaml/yaml.cc | 636 +- .../src/extension/alterschema/CMakeLists.txt | 4 + .../src/extension/alterschema/alterschema.cc | 11 +- .../alterschema/common/const_in_enum.h | 34 + .../alterschema/common/const_with_type.h | 4 +- .../alterschema/common/enum_with_type.h | 2 +- .../non_applicable_enum_validation_keywords.h | 2 +- .../non_applicable_type_specific_keywords.h | 4 +- .../common/oneof_to_anyof_disjoint_types.h | 4 +- .../alterschema/common/orphan_definitions.h | 33 +- .../required_properties_in_properties.h | 31 +- .../common/unknown_keywords_prefix.h | 20 + .../alterschema/linter/const_not_in_enum.h | 30 + .../alterschema/linter/forbid_empty_enum.h | 36 + .../alterschema/linter/invalid_external_ref.h | 112 + .../core/src/extension/build/CMakeLists.txt | 8 - .../src/extension/build/adapter_filesystem.cc | 114 - .../build/include/sourcemeta/core/build.h | 106 - .../core/build_adapter_filesystem.h | 58 - .../include/sourcemeta/core/build_types.h | 26 - .../src/extension/schemaconfig/CMakeLists.txt | 11 - .../include/sourcemeta/core/schemaconfig.h | 78 - .../sourcemeta/core/schemaconfig_error.h | 53 - .../core/src/extension/schemaconfig/parse.cc | 173 - .../extension/schemaconfig/schemaconfig.cc | 43 - vendor/core/src/lang/error/CMakeLists.txt | 6 + .../error/include/sourcemeta/core/error.h | 17 + .../include/sourcemeta/core/error_file.h | 38 + vendor/core/src/lang/io/CMakeLists.txt | 4 +- .../src/lang/io/include/sourcemeta/core/io.h | 35 + .../io/include/sourcemeta/core/io_temporary.h | 52 + vendor/core/src/lang/io/io.cc | 76 +- vendor/core/src/lang/io/io_temporary.cc | 66 + vendor/core/src/lang/numeric/CMakeLists.txt | 5 +- .../core/src/lang/numeric/big_coefficient.h | 754 ++ vendor/core/src/lang/numeric/decimal.cc | 2043 +++- .../numeric/include/sourcemeta/core/numeric.h | 1 + .../include/sourcemeta/core/numeric_decimal.h | 89 +- .../include/sourcemeta/core/numeric_parse.h | 6 + .../include/sourcemeta/core/numeric_uint128.h | 232 + vendor/core/src/lang/numeric/parse.cc | 20 +- .../options/CMakeLists.txt | 0 .../options/include/sourcemeta/core/options.h | 0 .../include/sourcemeta/core/options_error.h | 0 .../{extension => lang}/options/options.cc | 0 .../sourcemeta/core/parallel_for_each.h | 22 +- .../md5 => lang/preprocessor}/CMakeLists.txt | 4 +- .../include/sourcemeta/core/preprocessor.h | 12 + .../include/sourcemeta/core/process_error.h | 4 +- vendor/core/src/lang/process/spawn.cc | 17 +- vendor/core/vendor-mpdecimal.sh | 43 - vendor/core/vendor/mpdecimal/COPYRIGHT.txt | 23 - .../vendor/mpdecimal/libmpdec/basearith.c | 649 -- .../vendor/mpdecimal/libmpdec/basearith.h | 217 - vendor/core/vendor/mpdecimal/libmpdec/bits.h | 188 - .../vendor/mpdecimal/libmpdec/constants.c | 129 - .../vendor/mpdecimal/libmpdec/constants.h | 88 - .../core/vendor/mpdecimal/libmpdec/context.c | 285 - .../vendor/mpdecimal/libmpdec/convolute.c | 172 - .../vendor/mpdecimal/libmpdec/convolute.h | 48 - vendor/core/vendor/mpdecimal/libmpdec/crt.c | 178 - vendor/core/vendor/mpdecimal/libmpdec/crt.h | 45 - .../vendor/mpdecimal/libmpdec/difradix2.c | 171 - .../vendor/mpdecimal/libmpdec/difradix2.h | 46 - vendor/core/vendor/mpdecimal/libmpdec/fnt.c | 77 - vendor/core/vendor/mpdecimal/libmpdec/fnt.h | 46 - .../core/vendor/mpdecimal/libmpdec/fourstep.c | 242 - .../core/vendor/mpdecimal/libmpdec/fourstep.h | 46 - vendor/core/vendor/mpdecimal/libmpdec/io.c | 1610 --- vendor/core/vendor/mpdecimal/libmpdec/io.h | 61 - .../core/vendor/mpdecimal/libmpdec/mpalloc.c | 347 - .../core/vendor/mpdecimal/libmpdec/mpalloc.h | 53 - .../vendor/mpdecimal/libmpdec/mpdecimal.c | 9155 ----------------- .../vendor/mpdecimal/libmpdec/mpdecimal.h.in | 804 -- .../vendor/mpdecimal/libmpdec/mpdecimal32vc.h | 762 -- .../vendor/mpdecimal/libmpdec/mpdecimal64vc.h | 768 -- .../core/vendor/mpdecimal/libmpdec/mpsignal.c | 966 -- .../vendor/mpdecimal/libmpdec/numbertheory.c | 129 - .../vendor/mpdecimal/libmpdec/numbertheory.h | 75 - .../core/vendor/mpdecimal/libmpdec/sixstep.c | 212 - .../core/vendor/mpdecimal/libmpdec/sixstep.h | 46 - .../vendor/mpdecimal/libmpdec/transpose.c | 275 - .../vendor/mpdecimal/libmpdec/transpose.h | 60 - .../vendor/mpdecimal/libmpdec/typearith.h | 661 -- .../vendor/mpdecimal/libmpdec/umodarith.h | 645 -- .../vendor/mpdecimal/libmpdec/vcdiv64.asm | 47 - vendor/core/vendor/yaml/License | 20 - vendor/core/vendor/yaml/include/yaml.h | 1985 ---- vendor/core/vendor/yaml/src/api.c | 1393 --- vendor/core/vendor/yaml/src/dumper.c | 394 - vendor/core/vendor/yaml/src/emitter.c | 2358 ----- vendor/core/vendor/yaml/src/loader.c | 544 - vendor/core/vendor/yaml/src/parser.c | 1375 --- vendor/core/vendor/yaml/src/reader.c | 469 - vendor/core/vendor/yaml/src/scanner.c | 3598 ------- vendor/core/vendor/yaml/src/writer.c | 141 - vendor/core/vendor/yaml/src/yaml_private.h | 684 -- 195 files changed, 15119 insertions(+), 38134 deletions(-) create mode 100644 vendor/core/DEPENDENCIES delete mode 100644 vendor/core/cmake/Findmpdecimal.cmake delete mode 100644 vendor/core/cmake/Findyaml.cmake create mode 100644 vendor/core/src/core/crypto/CMakeLists.txt create mode 100644 vendor/core/src/core/crypto/crypto_sha256.cc create mode 100644 vendor/core/src/core/crypto/crypto_uuid.cc create mode 100644 vendor/core/src/core/crypto/include/sourcemeta/core/crypto.h create mode 100644 vendor/core/src/core/crypto/include/sourcemeta/core/crypto_sha256.h create mode 100644 vendor/core/src/core/crypto/include/sourcemeta/core/crypto_uuid.h delete mode 100644 vendor/core/src/core/html/encoder.cc create mode 100644 vendor/core/src/core/html/include/sourcemeta/core/html_buffer.h delete mode 100644 vendor/core/src/core/html/include/sourcemeta/core/html_elements.h delete mode 100644 vendor/core/src/core/html/include/sourcemeta/core/html_encoder.h create mode 100644 vendor/core/src/core/html/include/sourcemeta/core/html_writer.h create mode 100644 vendor/core/src/core/html/writer.cc rename vendor/core/src/core/{uuid => ip}/CMakeLists.txt (51%) create mode 100644 vendor/core/src/core/ip/include/sourcemeta/core/ip.h create mode 100644 vendor/core/src/core/ip/ipv4.cc create mode 100644 vendor/core/src/core/ip/ipv6.cc create mode 100644 vendor/core/src/core/json/construct.h delete mode 100644 vendor/core/src/core/md5/include/sourcemeta/core/md5.h delete mode 100644 vendor/core/src/core/md5/md5.cc delete mode 100644 vendor/core/src/core/punycode/utf8.h create mode 100644 vendor/core/src/core/semver/CMakeLists.txt create mode 100644 vendor/core/src/core/semver/include/sourcemeta/core/semver.h create mode 100644 vendor/core/src/core/semver/include/sourcemeta/core/semver_error.h create mode 100644 vendor/core/src/core/semver/semver.cc create mode 100644 vendor/core/src/core/unicode/CMakeLists.txt create mode 100644 vendor/core/src/core/unicode/include/sourcemeta/core/unicode.h create mode 100644 vendor/core/src/core/unicode/unicode.cc delete mode 100644 vendor/core/src/core/uuid/include/sourcemeta/core/uuid.h delete mode 100644 vendor/core/src/core/uuid/uuid.cc create mode 100644 vendor/core/src/core/yaml/include/sourcemeta/core/yaml_roundtrip.h create mode 100644 vendor/core/src/core/yaml/lexer.h create mode 100644 vendor/core/src/core/yaml/parser.h create mode 100644 vendor/core/src/core/yaml/stringify.h create mode 100644 vendor/core/src/extension/alterschema/common/const_in_enum.h create mode 100644 vendor/core/src/extension/alterschema/linter/const_not_in_enum.h create mode 100644 vendor/core/src/extension/alterschema/linter/forbid_empty_enum.h create mode 100644 vendor/core/src/extension/alterschema/linter/invalid_external_ref.h delete mode 100644 vendor/core/src/extension/build/CMakeLists.txt delete mode 100644 vendor/core/src/extension/build/adapter_filesystem.cc delete mode 100644 vendor/core/src/extension/build/include/sourcemeta/core/build.h delete mode 100644 vendor/core/src/extension/build/include/sourcemeta/core/build_adapter_filesystem.h delete mode 100644 vendor/core/src/extension/build/include/sourcemeta/core/build_types.h delete mode 100644 vendor/core/src/extension/schemaconfig/CMakeLists.txt delete mode 100644 vendor/core/src/extension/schemaconfig/include/sourcemeta/core/schemaconfig.h delete mode 100644 vendor/core/src/extension/schemaconfig/include/sourcemeta/core/schemaconfig_error.h delete mode 100644 vendor/core/src/extension/schemaconfig/parse.cc delete mode 100644 vendor/core/src/extension/schemaconfig/schemaconfig.cc create mode 100644 vendor/core/src/lang/error/CMakeLists.txt create mode 100644 vendor/core/src/lang/error/include/sourcemeta/core/error.h create mode 100644 vendor/core/src/lang/error/include/sourcemeta/core/error_file.h create mode 100644 vendor/core/src/lang/io/include/sourcemeta/core/io_temporary.h create mode 100644 vendor/core/src/lang/io/io_temporary.cc create mode 100644 vendor/core/src/lang/numeric/big_coefficient.h create mode 100644 vendor/core/src/lang/numeric/include/sourcemeta/core/numeric_uint128.h rename vendor/core/src/{extension => lang}/options/CMakeLists.txt (100%) rename vendor/core/src/{extension => lang}/options/include/sourcemeta/core/options.h (100%) rename vendor/core/src/{extension => lang}/options/include/sourcemeta/core/options_error.h (100%) rename vendor/core/src/{extension => lang}/options/options.cc (100%) rename vendor/core/src/{core/md5 => lang/preprocessor}/CMakeLists.txt (52%) create mode 100644 vendor/core/src/lang/preprocessor/include/sourcemeta/core/preprocessor.h delete mode 100755 vendor/core/vendor-mpdecimal.sh delete mode 100644 vendor/core/vendor/mpdecimal/COPYRIGHT.txt delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/basearith.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/basearith.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/bits.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/constants.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/constants.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/context.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/convolute.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/convolute.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/crt.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/crt.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/difradix2.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/difradix2.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/fnt.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/fnt.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/fourstep.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/fourstep.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/io.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/io.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpalloc.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpalloc.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpdecimal.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpdecimal.h.in delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpdecimal32vc.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpdecimal64vc.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpsignal.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/numbertheory.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/numbertheory.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/sixstep.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/sixstep.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/transpose.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/transpose.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/typearith.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/umodarith.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/vcdiv64.asm delete mode 100644 vendor/core/vendor/yaml/License delete mode 100644 vendor/core/vendor/yaml/include/yaml.h delete mode 100644 vendor/core/vendor/yaml/src/api.c delete mode 100644 vendor/core/vendor/yaml/src/dumper.c delete mode 100644 vendor/core/vendor/yaml/src/emitter.c delete mode 100644 vendor/core/vendor/yaml/src/loader.c delete mode 100644 vendor/core/vendor/yaml/src/parser.c delete mode 100644 vendor/core/vendor/yaml/src/reader.c delete mode 100644 vendor/core/vendor/yaml/src/scanner.c delete mode 100644 vendor/core/vendor/yaml/src/writer.c delete mode 100644 vendor/core/vendor/yaml/src/yaml_private.h diff --git a/DEPENDENCIES b/DEPENDENCIES index 478ffbf..f053cc8 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,2 +1,2 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core fe450b982907f99e542a0cfc78bc60d2b600ff7a +core https://github.com/sourcemeta/core 57e8c91ed68e3ee903526fd2f45cb16ca46759d8 diff --git a/test/packaging/find_package/CMakeLists.txt b/test/packaging/find_package/CMakeLists.txt index 4d87752..ae5574b 100644 --- a/test/packaging/find_package/CMakeLists.txt +++ b/test/packaging/find_package/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.18) project(codegen_hello VERSION 0.0.1 LANGUAGES CXX) -set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD 23) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) find_package(Codegen REQUIRED) diff --git a/vendor/core/CMakeLists.txt b/vendor/core/CMakeLists.txt index e6914a5..c12369e 100644 --- a/vendor/core/CMakeLists.txt +++ b/vendor/core/CMakeLists.txt @@ -3,15 +3,20 @@ project(core VERSION 0.0.0 LANGUAGES C CXX ASM_MASM DESCRIPTION "Sourcemeta Core list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") # Options +option(SOURCEMETA_CORE_LANG_PREPROCESSOR "Build the Sourcemeta Core language preprocessor library" ON) option(SOURCEMETA_CORE_LANG_IO "Build the Sourcemeta Core language I/O library" ON) option(SOURCEMETA_CORE_LANG_PROCESS "Build the Sourcemeta Core language Process library" ON) option(SOURCEMETA_CORE_LANG_PARALLEL "Build the Sourcemeta Core language parallel library" ON) option(SOURCEMETA_CORE_LANG_NUMERIC "Build the Sourcemeta Core language numeric library" ON) +option(SOURCEMETA_CORE_LANG_ERROR "Build the Sourcemeta Core language error library" ON) +option(SOURCEMETA_CORE_LANG_OPTIONS "Build the Sourcemeta Core Options library" ON) +option(SOURCEMETA_CORE_UNICODE "Build the Sourcemeta Core Unicode library" ON) option(SOURCEMETA_CORE_PUNYCODE "Build the Sourcemeta Core Punycode library" ON) option(SOURCEMETA_CORE_TIME "Build the Sourcemeta Core time library" ON) -option(SOURCEMETA_CORE_UUID "Build the Sourcemeta Core UUID library" ON) -option(SOURCEMETA_CORE_MD5 "Build the Sourcemeta Core MD5 library" ON) +option(SOURCEMETA_CORE_CRYPTO "Build the Sourcemeta Core Crypto library" ON) +option(SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL "Use system OpenSSL for the Sourcemeta Core Crypto library" OFF) option(SOURCEMETA_CORE_REGEX "Build the Sourcemeta Core Regex library" ON) +option(SOURCEMETA_CORE_IP "Build the Sourcemeta Core IP library" ON) option(SOURCEMETA_CORE_URI "Build the Sourcemeta Core URI library" ON) option(SOURCEMETA_CORE_URITEMPLATE "Build the Sourcemeta Core URI Template library" ON) option(SOURCEMETA_CORE_JSON "Build the Sourcemeta Core JSON library" ON) @@ -19,12 +24,10 @@ option(SOURCEMETA_CORE_JSONSCHEMA "Build the Sourcemeta Core JSON Schema library option(SOURCEMETA_CORE_JSONPOINTER "Build the Sourcemeta Core JSON Pointer library" ON) option(SOURCEMETA_CORE_JSONL "Build the Sourcemeta Core JSONL library" ON) option(SOURCEMETA_CORE_YAML "Build the Sourcemeta Core YAML library" ON) +option(SOURCEMETA_CORE_SEMVER "Build the Sourcemeta Core SemVer library" ON) option(SOURCEMETA_CORE_HTML "Build the Sourcemeta Core HTML library" ON) option(SOURCEMETA_CORE_EXTENSION_ALTERSCHEMA "Build the Sourcemeta Core AlterSchema library" ON) option(SOURCEMETA_CORE_EXTENSION_EDITORSCHEMA "Build the Sourcemeta Core EditorSchema library" ON) -option(SOURCEMETA_CORE_EXTENSION_SCHEMACONFIG "Build the Sourcemeta Core SchemaConfig library" ON) -option(SOURCEMETA_CORE_EXTENSION_OPTIONS "Build the Sourcemeta Core Options library" ON) -option(SOURCEMETA_CORE_EXTENSION_BUILD "Build the Sourcemeta Core Build library" ON) option(SOURCEMETA_CORE_TESTS "Build the Sourcemeta Core tests" OFF) option(SOURCEMETA_CORE_BENCHMARK "Build the Sourcemeta Core benchmarks" OFF) option(SOURCEMETA_CORE_DOCS "Build the Sourcemeta Core docs" OFF) @@ -59,6 +62,10 @@ if(SOURCEMETA_CORE_INSTALL) COMPONENT sourcemeta_${PROJECT_NAME}_dev) endif() +if(SOURCEMETA_CORE_LANG_PREPROCESSOR) + add_subdirectory(src/lang/preprocessor) +endif() + if(SOURCEMETA_CORE_LANG_IO) add_subdirectory(src/lang/io) endif() @@ -73,10 +80,21 @@ if(SOURCEMETA_CORE_LANG_PARALLEL) endif() if(SOURCEMETA_CORE_LANG_NUMERIC) - find_package(mpdecimal REQUIRED) add_subdirectory(src/lang/numeric) endif() +if(SOURCEMETA_CORE_LANG_ERROR) + add_subdirectory(src/lang/error) +endif() + +if(SOURCEMETA_CORE_LANG_OPTIONS) + add_subdirectory(src/lang/options) +endif() + +if(SOURCEMETA_CORE_UNICODE) + add_subdirectory(src/core/unicode) +endif() + if(SOURCEMETA_CORE_PUNYCODE) add_subdirectory(src/core/punycode) endif() @@ -85,12 +103,11 @@ if(SOURCEMETA_CORE_TIME) add_subdirectory(src/core/time) endif() -if(SOURCEMETA_CORE_UUID) - add_subdirectory(src/core/uuid) -endif() - -if(SOURCEMETA_CORE_MD5) - add_subdirectory(src/core/md5) +if(SOURCEMETA_CORE_CRYPTO) + if(SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL) + find_package(OpenSSL REQUIRED) + endif() + add_subdirectory(src/core/crypto) endif() if(SOURCEMETA_CORE_REGEX) @@ -98,6 +115,10 @@ if(SOURCEMETA_CORE_REGEX) add_subdirectory(src/core/regex) endif() +if(SOURCEMETA_CORE_IP) + add_subdirectory(src/core/ip) +endif() + if(SOURCEMETA_CORE_URI) add_subdirectory(src/core/uri) endif() @@ -123,10 +144,13 @@ if(SOURCEMETA_CORE_JSONL) endif() if(SOURCEMETA_CORE_YAML) - find_package(yaml REQUIRED) add_subdirectory(src/core/yaml) endif() +if(SOURCEMETA_CORE_SEMVER) + add_subdirectory(src/core/semver) +endif() + if(SOURCEMETA_CORE_HTML) add_subdirectory(src/core/html) endif() @@ -139,18 +163,6 @@ if(SOURCEMETA_CORE_EXTENSION_EDITORSCHEMA) add_subdirectory(src/extension/editorschema) endif() -if(SOURCEMETA_CORE_EXTENSION_SCHEMACONFIG) - add_subdirectory(src/extension/schemaconfig) -endif() - -if(SOURCEMETA_CORE_EXTENSION_OPTIONS) - add_subdirectory(src/extension/options) -endif() - -if(SOURCEMETA_CORE_EXTENSION_BUILD) - add_subdirectory(src/extension/build) -endif() - if(SOURCEMETA_CORE_ADDRESS_SANITIZER) sourcemeta_sanitizer(TYPE address) elseif(SOURCEMETA_CORE_UNDEFINED_SANITIZER) @@ -198,6 +210,18 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/numeric) endif() + if(SOURCEMETA_CORE_LANG_ERROR) + add_subdirectory(test/error) + endif() + + if(SOURCEMETA_CORE_LANG_OPTIONS) + add_subdirectory(test/options) + endif() + + if(SOURCEMETA_CORE_UNICODE) + add_subdirectory(test/unicode) + endif() + if(SOURCEMETA_CORE_PUNYCODE) add_subdirectory(test/punycode) endif() @@ -206,18 +230,18 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/time) endif() - if(SOURCEMETA_CORE_UUID) - add_subdirectory(test/uuid) - endif() - - if(SOURCEMETA_CORE_MD5) - add_subdirectory(test/md5) + if(SOURCEMETA_CORE_CRYPTO) + add_subdirectory(test/crypto) endif() if(SOURCEMETA_CORE_REGEX) add_subdirectory(test/regex) endif() + if(SOURCEMETA_CORE_IP) + add_subdirectory(test/ip) + endif() + if(SOURCEMETA_CORE_URI) add_subdirectory(test/uri) endif() @@ -246,6 +270,10 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/yaml) endif() + if(SOURCEMETA_CORE_SEMVER) + add_subdirectory(test/semver) + endif() + if(SOURCEMETA_CORE_HTML) add_subdirectory(test/html) endif() @@ -258,18 +286,6 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/editorschema) endif() - if(SOURCEMETA_CORE_EXTENSION_SCHEMACONFIG) - add_subdirectory(test/schemaconfig) - endif() - - if(SOURCEMETA_CORE_EXTENSION_OPTIONS) - add_subdirectory(test/options) - endif() - - if(SOURCEMETA_CORE_EXTENSION_BUILD) - add_subdirectory(test/build) - endif() - if(PROJECT_IS_TOP_LEVEL) # Otherwise we need the child project to link # against the sanitizers too. diff --git a/vendor/core/DEPENDENCIES b/vendor/core/DEPENDENCIES new file mode 100644 index 0000000..737c7c1 --- /dev/null +++ b/vendor/core/DEPENDENCIES @@ -0,0 +1,19 @@ +vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 +jsontestsuite https://github.com/nst/JSONTestSuite d64aefb55228d9584d3e5b2433f720ea8fd00c82 +yaml-test-suite https://github.com/yaml/yaml-test-suite data-2022-01-17 +jsonschema-2020-12 https://github.com/json-schema-org/json-schema-spec 769daad75a9553562333a8937a187741cb708c72 +jsonschema-2019-09 https://github.com/json-schema-org/json-schema-spec 41014ea723120ce70b314d72f863c6929d9f3cfd +jsonschema-draft7 https://github.com/json-schema-org/json-schema-spec 567f768506aaa33a38e552c85bf0586029ef1b32 +jsonschema-draft6 https://github.com/json-schema-org/json-schema-spec 59ed5f6fc6f6386e23ca51d7f31d7fe9cf696713 +jsonschema-draft4 https://github.com/json-schema-org/json-schema-spec 955d185db846cfca84269d9d711b10f4f3353d38 +jsonschema-draft3 https://github.com/json-schema-org/json-schema-spec 89912ad69fe15e006e8336a59e93bf7a1e46fa54 +jsonschema-draft2 https://github.com/json-schema-org/json-schema-spec 707f65070d09fe5baa1315bce4d31a66ff124171 +jsonschema-draft1 https://github.com/json-schema-org/json-schema-spec 2072feec9fc7a7ff0b2bb5b02c2d6742c554cc4a +jsonschema-draft0 https://github.com/json-schema-org/json-schema-spec 7ea575aef8d5c0183acbe6ff65b4c98ee9c236ec +openapi https://github.com/OAI/OpenAPI-Specification 74906beddddab9e555337031b2a8d8e9338c4972 +referencing-suite https://github.com/python-jsonschema/referencing-suite 61c4cc202b1e96ed5adcaf4842a595f68d659212 +uritemplate-test https://github.com/uri-templates/uritemplate-test 1eb27ab4462b9e5819dc47db99044f5fd1fa9bc7 +pyca-cryptography https://github.com/pyca/cryptography c4935a7021af37c38e0684b0546c1b4378518342 +pcre2 https://github.com/PCRE2Project/pcre2 pcre2-10.47 +googletest https://github.com/google/googletest a7f443b80b105f940225332ed3c31f2790092f47 +googlebenchmark https://github.com/google/benchmark 378fe693a1ef51500db21b11ff05a8018c5f0e55 diff --git a/vendor/core/cmake/Findmpdecimal.cmake b/vendor/core/cmake/Findmpdecimal.cmake deleted file mode 100644 index 01c31c7..0000000 --- a/vendor/core/cmake/Findmpdecimal.cmake +++ /dev/null @@ -1,146 +0,0 @@ -if(NOT mpdecimal_FOUND) - set(MPDECIMAL_DIR "${PROJECT_SOURCE_DIR}/vendor/mpdecimal") - set(MPDECIMAL_SOURCE_DIR "${MPDECIMAL_DIR}/libmpdec") - set(MPDECIMAL_BINARY_DIR "${PROJECT_BINARY_DIR}/mpdecimal") - - file(MAKE_DIRECTORY "${MPDECIMAL_BINARY_DIR}/include") - - if(MSVC) - configure_file( - "${MPDECIMAL_SOURCE_DIR}/mpdecimal64vc.h" - "${MPDECIMAL_BINARY_DIR}/include/mpdecimal.h" - COPYONLY) - set(MPD_CONFIG_LIST CONFIG_64 MASM) - else() - set(MPD_HEADER_CONFIG "/* ABI: 64-bit */") - - if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|AMD64") - set(MPD_CONFIG_LIST CONFIG_64 ASM) - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64") - set(MPD_CONFIG_LIST CONFIG_64 ANSI HAVE_UINT128_T) - else() - set(MPD_CONFIG_LIST CONFIG_64 ANSI) - endif() - - configure_file( - "${MPDECIMAL_SOURCE_DIR}/mpdecimal.h.in" - "${MPDECIMAL_BINARY_DIR}/include/mpdecimal.h" - @ONLY) - endif() - - set(MPDECIMAL_PUBLIC_HEADER "${MPDECIMAL_BINARY_DIR}/include/mpdecimal.h") - - set(MPDECIMAL_SOURCES - "${MPDECIMAL_SOURCE_DIR}/basearith.c" - "${MPDECIMAL_SOURCE_DIR}/constants.c" - "${MPDECIMAL_SOURCE_DIR}/context.c" - "${MPDECIMAL_SOURCE_DIR}/convolute.c" - "${MPDECIMAL_SOURCE_DIR}/crt.c" - "${MPDECIMAL_SOURCE_DIR}/difradix2.c" - "${MPDECIMAL_SOURCE_DIR}/fnt.c" - "${MPDECIMAL_SOURCE_DIR}/fourstep.c" - "${MPDECIMAL_SOURCE_DIR}/io.c" - "${MPDECIMAL_SOURCE_DIR}/mpalloc.c" - "${MPDECIMAL_SOURCE_DIR}/mpdecimal.c" - "${MPDECIMAL_SOURCE_DIR}/mpsignal.c" - "${MPDECIMAL_SOURCE_DIR}/numbertheory.c" - "${MPDECIMAL_SOURCE_DIR}/sixstep.c" - "${MPDECIMAL_SOURCE_DIR}/transpose.c") - - if(MSVC) - list(APPEND MPDECIMAL_SOURCES "${MPDECIMAL_SOURCE_DIR}/vcdiv64.asm") - endif() - - add_library(mpdecimal ${MPDECIMAL_SOURCES}) - sourcemeta_add_default_options(PRIVATE mpdecimal) - - if(SOURCEMETA_COMPILER_LLVM OR SOURCEMETA_COMPILER_GCC) - target_compile_options(mpdecimal PRIVATE -Wno-sign-conversion) - target_compile_options(mpdecimal PRIVATE -Wno-implicit-fallthrough) - target_compile_options(mpdecimal PRIVATE -Wno-conversion) - endif() - - if(SOURCEMETA_COMPILER_MSVC) - target_compile_options(mpdecimal PRIVATE /wd4200) - target_compile_options(mpdecimal PRIVATE /wd4702) - target_compile_options(mpdecimal PRIVATE /wd4996) - endif() - - target_include_directories(mpdecimal PRIVATE - "${MPDECIMAL_SOURCE_DIR}") - - target_include_directories(mpdecimal PUBLIC - "$" - "$") - - target_compile_definitions(mpdecimal PUBLIC MPD_CONFIG_64) - foreach(config_item ${MPD_CONFIG_LIST}) - target_compile_definitions(mpdecimal PRIVATE ${config_item}) - endforeach() - - target_compile_definitions(mpdecimal PRIVATE NDEBUG) - - if(SOURCEMETA_OS_LINUX) - target_compile_definitions(mpdecimal PRIVATE _GNU_SOURCE) - endif() - - if(UNIX AND NOT APPLE) - target_link_libraries(mpdecimal PRIVATE m) - endif() - - if(SOURCEMETA_COMPILER_LLVM OR SOURCEMETA_COMPILER_GCC) - target_compile_options(mpdecimal PRIVATE -Wall -Wextra -Wno-unknown-pragmas) - if(BUILD_SHARED_LIBS) - target_compile_options(mpdecimal PUBLIC -fvisibility=default) - endif() - endif() - - if(MSVC) - if(BUILD_SHARED_LIBS) - target_compile_definitions(mpdecimal PRIVATE BUILD_LIBMPDEC) - else() - target_compile_options(mpdecimal PRIVATE /wd4273) - target_compile_definitions(mpdecimal PUBLIC BUILD_LIBMPDEC) - endif() - endif() - - add_library(mpdecimal::mpdecimal ALIAS mpdecimal) - - set_target_properties(mpdecimal - PROPERTIES - OUTPUT_NAME mpdecimal - PUBLIC_HEADER "${MPDECIMAL_PUBLIC_HEADER}" - C_VISIBILITY_PRESET "default" - C_VISIBILITY_INLINES_HIDDEN FALSE - POSITION_INDEPENDENT_CODE ON - EXPORT_NAME mpdecimal) - - if(SOURCEMETA_CORE_INSTALL) - include(GNUInstallDirs) - install(TARGETS mpdecimal - EXPORT mpdecimal - PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" - COMPONENT sourcemeta_core_dev - RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" - COMPONENT sourcemeta_core - LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" - COMPONENT sourcemeta_core - NAMELINK_COMPONENT sourcemeta_core_dev - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" - COMPONENT sourcemeta_core_dev) - install(EXPORT mpdecimal - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/mpdecimal" - NAMESPACE mpdecimal:: - COMPONENT sourcemeta_core_dev) - - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/mpdecimal-config.cmake - "include(\"\${CMAKE_CURRENT_LIST_DIR}/mpdecimal.cmake\")\n" - "check_required_components(\"mpdecimal\")\n") - install(FILES - "${CMAKE_CURRENT_BINARY_DIR}/mpdecimal-config.cmake" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/mpdecimal" - COMPONENT sourcemeta_core_dev) - endif() - - set(mpdecimal_FOUND ON) -endif() diff --git a/vendor/core/cmake/Findyaml.cmake b/vendor/core/cmake/Findyaml.cmake deleted file mode 100644 index 2e4c160..0000000 --- a/vendor/core/cmake/Findyaml.cmake +++ /dev/null @@ -1,103 +0,0 @@ -if(NOT Yaml_FOUND) - set(YAML_DIR "${PROJECT_SOURCE_DIR}/vendor/yaml") - set(YAML_PUBLIC_HEADER "${YAML_DIR}/include/yaml.h") - - set(YAML_SOURCES - "${YAML_PUBLIC_HEADER}" - "${YAML_DIR}/src/api.c" - "${YAML_DIR}/src/dumper.c" - "${YAML_DIR}/src/emitter.c" - "${YAML_DIR}/src/loader.c" - "${YAML_DIR}/src/parser.c" - "${YAML_DIR}/src/reader.c" - "${YAML_DIR}/src/scanner.c" - "${YAML_DIR}/src/writer.c" - "${YAML_DIR}/src/yaml_private.h") - - add_library(yaml ${YAML_SOURCES}) - sourcemeta_add_default_options(PRIVATE yaml) - - if(SOURCEMETA_COMPILER_LLVM OR SOURCEMETA_COMPILER_GCC) - target_compile_options(yaml PRIVATE -Wno-implicit-function-declaration) - target_compile_options(yaml PRIVATE -Wno-int-to-pointer-cast) - target_compile_options(yaml PRIVATE -Wno-shadow) - target_compile_options(yaml PRIVATE -Wno-sign-conversion) - target_compile_options(yaml PRIVATE -Wno-shorten-64-to-32) - target_compile_options(yaml PRIVATE -Wno-newline-eof) - target_compile_options(yaml PRIVATE -Wno-conditional-uninitialized) - target_compile_options(yaml PRIVATE -Wno-implicit-int-conversion) - target_compile_options(yaml PRIVATE -Wno-conversion) - target_compile_options(yaml PRIVATE -Wno-unused-value) - endif() - - if(SOURCEMETA_COMPILER_MSVC) - target_compile_options(yaml PRIVATE /wd4996) - target_compile_options(yaml PRIVATE /wd4456) - target_compile_options(yaml PRIVATE /wd4457) - target_compile_options(yaml PRIVATE /wd4267) - target_compile_options(yaml PRIVATE /wd4244) - target_compile_options(yaml PRIVATE /wd4100) - target_compile_options(yaml PRIVATE /wd4245) - target_compile_options(yaml PRIVATE /wd4701) - target_compile_options(yaml PRIVATE /wd4702) - endif() - - if(SOURCEMETA_OS_LINUX) - message(STATUS "Compiling libyaml with _GNU_SOURCE") - # See https://github.com/3DSGuy/Project_CTR/issues/122 - target_compile_definitions(yaml PRIVATE _GNU_SOURCE) - endif() - - if(BUILD_SHARED_LIBS) - target_compile_definitions(yaml PUBLIC YAML_DECLARE_EXPORT) - else() - target_compile_definitions(yaml PUBLIC YAML_DECLARE_STATIC) - endif() - - target_include_directories(yaml PRIVATE "${YAML_DIR}/include") - target_include_directories(yaml PUBLIC - "$" - "$") - - target_compile_definitions(yaml PRIVATE YAML_VERSION_STRING="0.0.0") - target_compile_definitions(yaml PRIVATE YAML_VERSION_MAJOR=0) - target_compile_definitions(yaml PRIVATE YAML_VERSION_MINOR=0) - target_compile_definitions(yaml PRIVATE YAML_VERSION_PATCH=0) - - set_target_properties(yaml - PROPERTIES - OUTPUT_NAME yaml - PUBLIC_HEADER "${YAML_PUBLIC_HEADER}" - C_VISIBILITY_PRESET "default" - C_VISIBILITY_INLINES_HIDDEN FALSE - EXPORT_NAME yaml) - - if(SOURCEMETA_CORE_INSTALL) - include(GNUInstallDirs) - install(TARGETS yaml - EXPORT yaml - PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" - COMPONENT sourcemeta_core_dev - RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" - COMPONENT sourcemeta_core - LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" - COMPONENT sourcemeta_core - NAMELINK_COMPONENT sourcemeta_core_dev - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" - COMPONENT sourcemeta_core_dev) - install(EXPORT yaml - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/yaml" - NAMESPACE yaml:: - COMPONENT sourcemeta_core_dev) - - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/yaml-config.cmake - "include(\"\${CMAKE_CURRENT_LIST_DIR}/yaml.cmake\")\n" - "check_required_components(\"yaml\")\n") - install(FILES - "${CMAKE_CURRENT_BINARY_DIR}/yaml-config.cmake" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/yaml" - COMPONENT sourcemeta_core_dev) - endif() - - set(Yaml_FOUND ON) -endif() diff --git a/vendor/core/cmake/common/compiler/options.cmake b/vendor/core/cmake/common/compiler/options.cmake index 1c14a16..3775428 100644 --- a/vendor/core/cmake/common/compiler/options.cmake +++ b/vendor/core/cmake/common/compiler/options.cmake @@ -46,6 +46,10 @@ function(sourcemeta_add_default_options visibility target) # To improve how much GCC/Clang will vectorize -fno-math-errno + -fno-trapping-math + -fno-signed-zeros + -freciprocal-math + -fassociative-math # Assume that signed arithmetic overflow of addition, subtraction and # multiplication wraps around using twos-complement representation @@ -82,7 +86,6 @@ function(sourcemeta_add_default_options visibility target) -fslp-vectorize) elseif(SOURCEMETA_COMPILER_GCC) target_compile_options("${target}" ${visibility} - -fno-trapping-math # Newer versions of GCC (i.e. 14) seem to print a lot of false-positives here $<$,$>:-Wno-dangling-reference> # GCC seems to print a lot of false-positives here diff --git a/vendor/core/cmake/common/defaults.cmake b/vendor/core/cmake/common/defaults.cmake index 2385058..bb2ce6f 100644 --- a/vendor/core/cmake/common/defaults.cmake +++ b/vendor/core/cmake/common/defaults.cmake @@ -1,6 +1,6 @@ # Standards (sane modern defaults) if("CXX" IN_LIST SOURCEMETA_LANGUAGES) - set(CMAKE_CXX_STANDARD 20) + set(CMAKE_CXX_STANDARD 23) endif() if("C" IN_LIST SOURCEMETA_LANGUAGES) set(CMAKE_C_STANDARD 11) @@ -97,20 +97,21 @@ endif() # Note we don't enable LTO on RelWithDebInfo, as it breaks debugging symbols # on at least AppleClang, making stepping through source code impossible. +# LTO is applied globally because it is a whole-program optimization. +# Every translation unit must be compiled with LTO flags for the linker +# to perform cross-module optimization effectively. if(CMAKE_BUILD_TYPE STREQUAL "Release") if(SOURCEMETA_COMPILER_GCC AND NOT BUILD_SHARED_LIBS) message(STATUS "Enabling Fat LTO") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto -ffat-lto-objects") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto") + add_compile_options(-flto -ffat-lto-objects) + add_link_options(-flto) endif() # TODO: Make this work on Linux on LLVM if(SOURCEMETA_COMPILER_LLVM AND NOT BUILD_SHARED_LIBS AND APPLE) message(STATUS "Enabling Fat LTO") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto=full") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full") + add_compile_options(-flto=full) + add_link_options(-flto=full) endif() endif() diff --git a/vendor/core/cmake/common/targets/executable.cmake b/vendor/core/cmake/common/targets/executable.cmake index 4f5db98..63f61cc 100644 --- a/vendor/core/cmake/common/targets/executable.cmake +++ b/vendor/core/cmake/common/targets/executable.cmake @@ -30,5 +30,49 @@ function(sourcemeta_executable) add_executable("${TARGET_NAME}" ${SOURCEMETA_EXECUTABLE_SOURCES}) sourcemeta_add_default_options(PRIVATE ${TARGET_NAME}) + + # See https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html + # Position Independent Executable (PIE) for ASLR support + if(SOURCEMETA_COMPILER_LLVM OR SOURCEMETA_COMPILER_GCC) + target_compile_options(${TARGET_NAME} PRIVATE + $<$:-fPIE> + $<$:-fPIE> + $<$:-fPIE>) + target_link_options(${TARGET_NAME} PRIVATE + $<$:-pie> + $<$:-pie> + $<$:-pie>) + endif() + + # See https://learn.microsoft.com/en-us/cpp/build/reference/guard-enable-control-flow-guard + # See https://learn.microsoft.com/en-us/cpp/build/reference/cetcompat + if(SOURCEMETA_COMPILER_MSVC) + target_compile_options(${TARGET_NAME} PRIVATE /guard:cf) + target_link_options(${TARGET_NAME} PRIVATE /guard:cf /CETCOMPAT) + endif() + + # Linux-specific ELF linker hardening and compatibility options + if(SOURCEMETA_OS_LINUX AND (SOURCEMETA_COMPILER_LLVM OR SOURCEMETA_COMPILER_GCC)) + # Maximize compatibility of pre-built binaries across Linux distros + if(NOT BUILD_SHARED_LIBS) + target_link_options(${TARGET_NAME} PRIVATE -static-libstdc++ -static-libgcc) + endif() + target_link_options(${TARGET_NAME} PRIVATE + "LINKER:-z,nodlopen" + "LINKER:-z,noexecstack" + "LINKER:-z,relro" + "LINKER:-z,now" + "LINKER:--as-needed") + if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18") + include(CheckLinkerFlag) + check_linker_flag(CXX "LINKER:--no-copy-dt-needed-entries" + SOURCEMETA_LINKER_NO_COPY_DT_NEEDED) + if(SOURCEMETA_LINKER_NO_COPY_DT_NEEDED) + target_link_options(${TARGET_NAME} PRIVATE + "LINKER:--no-copy-dt-needed-entries") + endif() + endif() + endif() + set_target_properties("${TARGET_NAME}" PROPERTIES FOLDER "${FOLDER_NAME}") endfunction() diff --git a/vendor/core/config.cmake.in b/vendor/core/config.cmake.in index 2e23e90..0f93f6c 100644 --- a/vendor/core/config.cmake.in +++ b/vendor/core/config.cmake.in @@ -4,15 +4,17 @@ list(APPEND SOURCEMETA_CORE_COMPONENTS ${Core_FIND_COMPONENTS}) list(APPEND SOURCEMETA_CORE_COMPONENTS ${core_FIND_COMPONENTS}) if(NOT SOURCEMETA_CORE_COMPONENTS) + list(APPEND SOURCEMETA_CORE_COMPONENTS preprocessor) list(APPEND SOURCEMETA_CORE_COMPONENTS io) list(APPEND SOURCEMETA_CORE_COMPONENTS process) list(APPEND SOURCEMETA_CORE_COMPONENTS parallel) list(APPEND SOURCEMETA_CORE_COMPONENTS numeric) + list(APPEND SOURCEMETA_CORE_COMPONENTS unicode) list(APPEND SOURCEMETA_CORE_COMPONENTS punycode) list(APPEND SOURCEMETA_CORE_COMPONENTS time) - list(APPEND SOURCEMETA_CORE_COMPONENTS uuid) - list(APPEND SOURCEMETA_CORE_COMPONENTS md5) + list(APPEND SOURCEMETA_CORE_COMPONENTS crypto) list(APPEND SOURCEMETA_CORE_COMPONENTS regex) + list(APPEND SOURCEMETA_CORE_COMPONENTS ip) list(APPEND SOURCEMETA_CORE_COMPONENTS uri) list(APPEND SOURCEMETA_CORE_COMPONENTS uritemplate) list(APPEND SOURCEMETA_CORE_COMPONENTS json) @@ -20,18 +22,20 @@ if(NOT SOURCEMETA_CORE_COMPONENTS) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonpointer) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonschema) list(APPEND SOURCEMETA_CORE_COMPONENTS yaml) + list(APPEND SOURCEMETA_CORE_COMPONENTS semver) list(APPEND SOURCEMETA_CORE_COMPONENTS html) list(APPEND SOURCEMETA_CORE_COMPONENTS alterschema) list(APPEND SOURCEMETA_CORE_COMPONENTS editorschema) - list(APPEND SOURCEMETA_CORE_COMPONENTS schemaconfig) + list(APPEND SOURCEMETA_CORE_COMPONENTS error) list(APPEND SOURCEMETA_CORE_COMPONENTS options) - list(APPEND SOURCEMETA_CORE_COMPONENTS build) endif() include(CMakeFindDependencyMacro) foreach(component ${SOURCEMETA_CORE_COMPONENTS}) - if(component STREQUAL "io") + if(component STREQUAL "preprocessor") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") + elseif(component STREQUAL "io") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") elseif(component STREQUAL "process") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_process.cmake") @@ -39,66 +43,84 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) find_dependency(Threads) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_parallel.cmake") elseif(component STREQUAL "numeric") - find_dependency(mpdecimal CONFIG) + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") + elseif(component STREQUAL "unicode") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") elseif(component STREQUAL "punycode") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_punycode.cmake") elseif(component STREQUAL "time") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_time.cmake") - elseif(component STREQUAL "uuid") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uuid.cmake") - elseif(component STREQUAL "md5") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_md5.cmake") + elseif(component STREQUAL "crypto") + if(@SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL@) + find_dependency(OpenSSL) + endif() + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_crypto.cmake") elseif(component STREQUAL "regex") find_dependency(PCRE2 CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") + elseif(component STREQUAL "ip") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") elseif(component STREQUAL "uri") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") elseif(component STREQUAL "uritemplate") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uritemplate.cmake") elseif(component STREQUAL "json") - find_dependency(mpdecimal CONFIG) + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") elseif(component STREQUAL "jsonl") - find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonl.cmake") elseif(component STREQUAL "jsonpointer") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") - find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") elseif(component STREQUAL "jsonschema") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") - find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonschema.cmake") elseif(component STREQUAL "yaml") - find_dependency(mpdecimal CONFIG) + find_dependency(PCRE2 CONFIG) + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") - find_dependency(yaml CONFIG) - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_yaml.cmake") + elseif(component STREQUAL "semver") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_semver.cmake") elseif(component STREQUAL "html") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_html.cmake") elseif(component STREQUAL "alterschema") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") - find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") find_dependency(PCRE2 CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") @@ -106,27 +128,19 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonschema.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_alterschema.cmake") elseif(component STREQUAL "editorschema") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") - find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonschema.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_editorschema.cmake") - elseif(component STREQUAL "schemaconfig") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") - find_dependency(mpdecimal CONFIG) - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_schemaconfig.cmake") + elseif(component STREQUAL "error") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_error.cmake") elseif(component STREQUAL "options") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_options.cmake") - elseif(component STREQUAL "build") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_build.cmake") else() message(FATAL_ERROR "Unknown Sourcemeta Core component: ${component}") endif() diff --git a/vendor/core/src/core/crypto/CMakeLists.txt b/vendor/core/src/core/crypto/CMakeLists.txt new file mode 100644 index 0000000..2d7fe1f --- /dev/null +++ b/vendor/core/src/core/crypto/CMakeLists.txt @@ -0,0 +1,13 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME crypto + PRIVATE_HEADERS sha256.h uuid.h + SOURCES crypto_sha256.cc crypto_uuid.cc) + +if(SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL) + target_compile_definitions(sourcemeta_core_crypto + PRIVATE SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL) + target_link_libraries(sourcemeta_core_crypto PRIVATE OpenSSL::Crypto) +endif() + +if(SOURCEMETA_CORE_INSTALL) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME crypto) +endif() diff --git a/vendor/core/src/core/crypto/crypto_sha256.cc b/vendor/core/src/core/crypto/crypto_sha256.cc new file mode 100644 index 0000000..08f4e62 --- /dev/null +++ b/vendor/core/src/core/crypto/crypto_sha256.cc @@ -0,0 +1,232 @@ +#include + +#include // std::array +#include // std::uint32_t, std::uint64_t + +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL +#include // EVP_MD_CTX_new, EVP_DigestInit_ex, EVP_sha256, EVP_DigestUpdate, EVP_DigestFinal_ex, EVP_MD_CTX_free +#include // std::runtime_error +#else +#include // std::memcpy +#endif + +namespace { +constexpr std::array HEX_DIGITS{{'0', '1', '2', '3', '4', '5', '6', + '7', '8', '9', 'a', 'b', 'c', 'd', + 'e', 'f', '\0'}}; +} // namespace + +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL + +namespace sourcemeta::core { + +auto sha256(const std::string_view input, std::ostream &output) -> void { + auto *context = EVP_MD_CTX_new(); + if (context == nullptr) { + throw std::runtime_error("Could not allocate OpenSSL digest context"); + } + + if (EVP_DigestInit_ex(context, EVP_sha256(), nullptr) != 1 || + EVP_DigestUpdate(context, input.data(), input.size()) != 1) { + EVP_MD_CTX_free(context); + throw std::runtime_error("Could not compute SHA-256 digest"); + } + + std::array digest{}; + unsigned int length = 0; + if (EVP_DigestFinal_ex(context, digest.data(), &length) != 1) { + EVP_MD_CTX_free(context); + throw std::runtime_error("Could not finalize SHA-256 digest"); + } + + EVP_MD_CTX_free(context); + + // TODO: Use std::views::enumerate once libc++ supports it + // (__cpp_lib_ranges_enumerate) + for (std::uint64_t index = 0; index < 32u; ++index) { + output.put(HEX_DIGITS[(digest[index] >> 4u) & 0x0fu]); + output.put(HEX_DIGITS[digest[index] & 0x0fu]); + } +} + +} // namespace sourcemeta::core + +#else + +namespace { + +inline constexpr auto rotate_right(std::uint32_t value, + std::uint64_t count) noexcept + -> std::uint32_t { + return (value >> count) | (value << (32u - count)); +} + +// FIPS 180-4 Section 4.1.2 logical functions +inline constexpr auto big_sigma_0(std::uint32_t value) noexcept + -> std::uint32_t { + return rotate_right(value, 2u) ^ rotate_right(value, 13u) ^ + rotate_right(value, 22u); +} + +inline constexpr auto big_sigma_1(std::uint32_t value) noexcept + -> std::uint32_t { + return rotate_right(value, 6u) ^ rotate_right(value, 11u) ^ + rotate_right(value, 25u); +} + +inline constexpr auto small_sigma_0(std::uint32_t value) noexcept + -> std::uint32_t { + return rotate_right(value, 7u) ^ rotate_right(value, 18u) ^ (value >> 3u); +} + +inline constexpr auto small_sigma_1(std::uint32_t value) noexcept + -> std::uint32_t { + return rotate_right(value, 17u) ^ rotate_right(value, 19u) ^ (value >> 10u); +} + +// Equivalent to (x & y) ^ (~x & z) but avoids a bitwise NOT +inline constexpr auto choice(std::uint32_t x, std::uint32_t y, + std::uint32_t z) noexcept -> std::uint32_t { + return z ^ (x & (y ^ z)); +} + +inline constexpr auto majority(std::uint32_t x, std::uint32_t y, + std::uint32_t z) noexcept -> std::uint32_t { + return (x & y) ^ (x & z) ^ (y & z); +} + +inline auto sha256_process_block(const unsigned char *block, + std::array &state) noexcept + -> void { + // First 32 bits of the fractional parts of the cube roots + // of the first 64 prime numbers (FIPS 180-4 Section 4.2.2) + static constexpr std::array round_constants = { + {0x428a2f98U, 0x71374491U, 0xb5c0fbcfU, 0xe9b5dba5U, 0x3956c25bU, + 0x59f111f1U, 0x923f82a4U, 0xab1c5ed5U, 0xd807aa98U, 0x12835b01U, + 0x243185beU, 0x550c7dc3U, 0x72be5d74U, 0x80deb1feU, 0x9bdc06a7U, + 0xc19bf174U, 0xe49b69c1U, 0xefbe4786U, 0x0fc19dc6U, 0x240ca1ccU, + 0x2de92c6fU, 0x4a7484aaU, 0x5cb0a9dcU, 0x76f988daU, 0x983e5152U, + 0xa831c66dU, 0xb00327c8U, 0xbf597fc7U, 0xc6e00bf3U, 0xd5a79147U, + 0x06ca6351U, 0x14292967U, 0x27b70a85U, 0x2e1b2138U, 0x4d2c6dfcU, + 0x53380d13U, 0x650a7354U, 0x766a0abbU, 0x81c2c92eU, 0x92722c85U, + 0xa2bfe8a1U, 0xa81a664bU, 0xc24b8b70U, 0xc76c51a3U, 0xd192e819U, + 0xd6990624U, 0xf40e3585U, 0x106aa070U, 0x19a4c116U, 0x1e376c08U, + 0x2748774cU, 0x34b0bcb5U, 0x391c0cb3U, 0x4ed8aa4aU, 0x5b9cca4fU, + 0x682e6ff3U, 0x748f82eeU, 0x78a5636fU, 0x84c87814U, 0x8cc70208U, + 0x90befffaU, 0xa4506cebU, 0xbef9a3f7U, 0xc67178f2U}}; + + // Decode 16 big-endian 32-bit words from the block + std::array schedule; + for (std::uint64_t word_index = 0; word_index < 16u; ++word_index) { + const std::uint64_t byte_index = word_index * 4u; + schedule[word_index] = + (static_cast(block[byte_index]) << 24u) | + (static_cast(block[byte_index + 1u]) << 16u) | + (static_cast(block[byte_index + 2u]) << 8u) | + static_cast(block[byte_index + 3u]); + } + + // Extend the message schedule (FIPS 180-4 Section 6.2.2 step 1) + for (std::uint64_t index = 16u; index < 64u; ++index) { + schedule[index] = + small_sigma_1(schedule[index - 2u]) + schedule[index - 7u] + + small_sigma_0(schedule[index - 15u]) + schedule[index - 16u]; + } + + auto working = state; + + // Compression function (FIPS 180-4 Section 6.2.2 step 3) + for (std::uint64_t round_index = 0u; round_index < 64u; ++round_index) { + const auto temporary_1 = working[7] + big_sigma_1(working[4]) + + choice(working[4], working[5], working[6]) + + round_constants[round_index] + + schedule[round_index]; + const auto temporary_2 = + big_sigma_0(working[0]) + majority(working[0], working[1], working[2]); + + working[7] = working[6]; + working[6] = working[5]; + working[5] = working[4]; + working[4] = working[3] + temporary_1; + working[3] = working[2]; + working[2] = working[1]; + working[1] = working[0]; + working[0] = temporary_1 + temporary_2; + } + + for (std::uint64_t index = 0u; index < 8u; ++index) { + state[index] += working[index]; + } +} + +} // namespace + +namespace sourcemeta::core { + +auto sha256(const std::string_view input, std::ostream &output) -> void { + // Initial hash values: first 32 bits of the fractional parts of the + // square roots of the first 8 primes (FIPS 180-4 Section 5.3.3) + std::array state{}; + state[0] = 0x6a09e667U; + state[1] = 0xbb67ae85U; + state[2] = 0x3c6ef372U; + state[3] = 0xa54ff53aU; + state[4] = 0x510e527fU; + state[5] = 0x9b05688cU; + state[6] = 0x1f83d9abU; + state[7] = 0x5be0cd19U; + + const auto *const input_bytes = + reinterpret_cast(input.data()); + const std::size_t input_length = input.size(); + + // Process all full 64-byte blocks directly from the input (streaming) + std::size_t processed_bytes = 0u; + while (input_length - processed_bytes >= 64u) { + sha256_process_block(input_bytes + processed_bytes, state); + processed_bytes += 64u; + } + + // Prepare the final block(s) (one or two 64-byte blocks) + std::array final_block{}; + const std::size_t remaining_bytes = input_length - processed_bytes; + if (remaining_bytes > 0u) { + std::memcpy(final_block.data(), input_bytes + processed_bytes, + remaining_bytes); + } + + // Append the 0x80 byte after the message data + final_block[remaining_bytes] = 0x80u; + + // Append length in bits as big-endian 64-bit at the end of the padding + const std::uint64_t message_length_bits = + static_cast(input_length) * 8ull; + + if (remaining_bytes < 56u) { + for (std::uint64_t index = 0u; index < 8u; ++index) { + final_block[56u + index] = static_cast( + (message_length_bits >> (8u * (7u - index))) & 0xffu); + } + sha256_process_block(final_block.data(), state); + } else { + for (std::uint64_t index = 0u; index < 8u; ++index) { + final_block[64u + 56u + index] = static_cast( + (message_length_bits >> (8u * (7u - index))) & 0xffu); + } + + sha256_process_block(final_block.data(), state); + sha256_process_block(final_block.data() + 64u, state); + } + + for (std::uint64_t state_index = 0u; state_index < 8u; ++state_index) { + const auto value = state[state_index]; + for (std::uint64_t nibble = 0u; nibble < 8u; ++nibble) { + const auto shift = 28u - nibble * 4u; + output.put(HEX_DIGITS[(value >> shift) & 0x0fu]); + } + } +} + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/crypto/crypto_uuid.cc b/vendor/core/src/core/crypto/crypto_uuid.cc new file mode 100644 index 0000000..cb7f674 --- /dev/null +++ b/vendor/core/src/core/crypto/crypto_uuid.cc @@ -0,0 +1,81 @@ +#include + +#include // std::array +#include // std::size_t +#include // std::string_view + +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL +#include // RAND_bytes +#include // std::runtime_error +#else +#include // std::random_device, std::mt19937, std::uniform_int_distribution +#endif + +namespace sourcemeta::core { + +// See RFC 9562 Section 5.4 +// Format: xxxxxxxx-xxxx-4xxx-Nxxx-xxxxxxxxxxxx +// where 4 is the version and N is the variant (8, 9, a, or b) +auto uuidv4() -> std::string { + static constexpr std::string_view digits = "0123456789abcdef"; + static constexpr std::string_view variant_digits = "89ab"; + static constexpr std::array dash = { + {false, false, false, false, true, false, true, false, true, false, true, + false, false, false, false, false}}; + +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL + std::array random_bytes{}; + if (RAND_bytes(random_bytes.data(), static_cast(random_bytes.size())) != + 1) { + throw std::runtime_error("Could not generate random bytes with OpenSSL"); + } +#else + static std::random_device device; + static std::mt19937 generator{device()}; + std::uniform_int_distribution distribution(0, + 15); + std::uniform_int_distribution + variant_distribution(0, 3); +#endif + + std::string result; + result.reserve(36); + for (std::size_t index = 0; index < dash.size(); ++index) { + if (dash[index]) { + result += '-'; + } + +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL + const auto high_nibble = (random_bytes[index] >> 4u) & 0x0fu; + const auto low_nibble = random_bytes[index] & 0x0fu; +#endif + + // RFC 9562 Section 5.4: version bits (48-51) must be 0b0100 + if (index == 6) { + result += '4'; + // RFC 9562 Section 5.4: variant bits (64-65) must be 0b10 + } else if (index == 8) { +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL + result += variant_digits[high_nibble & 0x03u]; +#else + result += variant_digits[variant_distribution(generator)]; +#endif + } else { +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL + result += digits[high_nibble]; +#else + result += digits[distribution(generator)]; +#endif + } + +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL + result += digits[low_nibble]; +#else + result += digits[distribution(generator)]; +#endif + } + + return result; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/crypto/include/sourcemeta/core/crypto.h b/vendor/core/src/core/crypto/include/sourcemeta/core/crypto.h new file mode 100644 index 0000000..332cc84 --- /dev/null +++ b/vendor/core/src/core/crypto/include/sourcemeta/core/crypto.h @@ -0,0 +1,16 @@ +#ifndef SOURCEMETA_CORE_CRYPTO_H_ +#define SOURCEMETA_CORE_CRYPTO_H_ + +/// @defgroup crypto Crypto +/// @brief Cryptographic hash functions and UUID generation. +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +#include +#include + +#endif diff --git a/vendor/core/src/core/crypto/include/sourcemeta/core/crypto_sha256.h b/vendor/core/src/core/crypto/include/sourcemeta/core/crypto_sha256.h new file mode 100644 index 0000000..34c1583 --- /dev/null +++ b/vendor/core/src/core/crypto/include/sourcemeta/core/crypto_sha256.h @@ -0,0 +1,30 @@ +#ifndef SOURCEMETA_CORE_CRYPTO_SHA256_H_ +#define SOURCEMETA_CORE_CRYPTO_SHA256_H_ + +#ifndef SOURCEMETA_CORE_CRYPTO_EXPORT +#include +#endif + +#include // std::ostream +#include // std::string_view + +namespace sourcemeta::core { + +/// @ingroup crypto +/// Hash a string using SHA-256. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// std::ostringstream result; +/// sourcemeta::core::sha256("foo bar", result); +/// std::cout << result.str() << "\n"; +/// ``` +auto SOURCEMETA_CORE_CRYPTO_EXPORT sha256(const std::string_view input, + std::ostream &output) -> void; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/crypto/include/sourcemeta/core/crypto_uuid.h b/vendor/core/src/core/crypto/include/sourcemeta/core/crypto_uuid.h new file mode 100644 index 0000000..1439a9a --- /dev/null +++ b/vendor/core/src/core/crypto/include/sourcemeta/core/crypto_uuid.h @@ -0,0 +1,27 @@ +#ifndef SOURCEMETA_CORE_CRYPTO_UUID_H_ +#define SOURCEMETA_CORE_CRYPTO_UUID_H_ + +#ifndef SOURCEMETA_CORE_CRYPTO_EXPORT +#include +#endif + +#include // std::string + +namespace sourcemeta::core { + +/// @ingroup crypto +/// Generate a random UUID v4 string. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// std::cout << sourcemeta::core::uuidv4() << "\n"; +/// ``` +/// +/// See https://www.rfc-editor.org/rfc/rfc9562#name-uuid-version-4 +SOURCEMETA_CORE_CRYPTO_EXPORT auto uuidv4() -> std::string; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/html/CMakeLists.txt b/vendor/core/src/core/html/CMakeLists.txt index 6b35797..6085589 100644 --- a/vendor/core/src/core/html/CMakeLists.txt +++ b/vendor/core/src/core/html/CMakeLists.txt @@ -1,7 +1,9 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME html - PRIVATE_HEADERS escape.h encoder.h elements.h - SOURCES escape.cc encoder.cc) + PRIVATE_HEADERS buffer.h escape.h writer.h + SOURCES escape.cc writer.cc) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME html) endif() + +target_link_libraries(sourcemeta_core_html PUBLIC sourcemeta::core::preprocessor) diff --git a/vendor/core/src/core/html/encoder.cc b/vendor/core/src/core/html/encoder.cc deleted file mode 100644 index ffddbc5..0000000 --- a/vendor/core/src/core/html/encoder.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include - -#include // std::ostream -#include // std::ostringstream -#include // std::string - -namespace sourcemeta::core { - -auto HTML::render() const -> std::string { - std::ostringstream output_stream; - output_stream << "<" << this->tag_name; - - // Render attributes - for (const auto &[attribute_name, attribute_value] : this->attributes) { - std::string escaped_value{attribute_value}; - html_escape(escaped_value); - output_stream << " " << attribute_name << "=\"" << escaped_value << "\""; - } - - if (this->self_closing) { - output_stream << " />"; - return output_stream.str(); - } - - output_stream << ">"; - - // Render children - if (this->child_elements.empty()) { - output_stream << "tag_name << ">"; - } else if (this->child_elements.size() == 1 && - std::get_if(&this->child_elements[0])) { - // Inline single text node - output_stream << this->render(this->child_elements[0]); - output_stream << "tag_name << ">"; - } else { - // Block level children - for (const auto &child_element : this->child_elements) { - output_stream << this->render(child_element); - } - output_stream << "tag_name << ">"; - } - - return output_stream.str(); -} - -auto HTML::render(const HTMLNode &child_element) const -> std::string { - if (const auto *text = std::get_if(&child_element)) { - std::string escaped_text{*text}; - html_escape(escaped_text); - return escaped_text; - } else if (const auto *raw_html = std::get_if(&child_element)) { - return raw_html->content; - } else if (const auto *html_element = std::get_if(&child_element)) { - return html_element->render(); - } - return ""; -} - -auto HTML::push_back(const HTMLNode &child) -> HTML & { - this->child_elements.push_back(child); - return *this; -} - -auto HTML::push_back(HTMLNode &&child) -> HTML & { - this->child_elements.push_back(std::move(child)); - return *this; -} - -auto operator<<(std::ostream &output_stream, const HTML &html_element) - -> std::ostream & { - return output_stream << html_element.render(); -} - -} // namespace sourcemeta::core diff --git a/vendor/core/src/core/html/escape.cc b/vendor/core/src/core/html/escape.cc index 6070d72..57ffbc8 100644 --- a/vendor/core/src/core/html/escape.cc +++ b/vendor/core/src/core/html/escape.cc @@ -5,8 +5,7 @@ namespace sourcemeta::core { auto html_escape(std::string &text) -> void { - std::size_t write_position{0}; - std::size_t original_size{text.size()}; + const std::size_t original_size{text.size()}; // First pass: count how much space we need std::size_t required_size{0}; @@ -35,60 +34,139 @@ auto html_escape(std::string &text) -> void { return; } - // Resize string to accommodate escaped characters - text.resize(required_size); + // Write escaped characters backwards to avoid overwriting unprocessed data + text.resize_and_overwrite(required_size, + [original_size](char *buffer, std::size_t count) { + auto read_position = original_size; + auto write_position = count; - // Second pass: work backwards to avoid overwriting data - std::size_t read_position{original_size}; - write_position = required_size; + while (read_position > 0) { + --read_position; + const auto character = buffer[read_position]; - while (read_position > 0) { - --read_position; - char character = text[read_position]; + switch (character) { + case '&': + write_position -= 5; + buffer[write_position] = '&'; + buffer[write_position + 1] = 'a'; + buffer[write_position + 2] = 'm'; + buffer[write_position + 3] = 'p'; + buffer[write_position + 4] = ';'; + break; + case '<': + write_position -= 4; + buffer[write_position] = '&'; + buffer[write_position + 1] = 'l'; + buffer[write_position + 2] = 't'; + buffer[write_position + 3] = ';'; + break; + case '>': + write_position -= 4; + buffer[write_position] = '&'; + buffer[write_position + 1] = 'g'; + buffer[write_position + 2] = 't'; + buffer[write_position + 3] = ';'; + break; + case '"': + write_position -= 6; + buffer[write_position] = '&'; + buffer[write_position + 1] = 'q'; + buffer[write_position + 2] = 'u'; + buffer[write_position + 3] = 'o'; + buffer[write_position + 4] = 't'; + buffer[write_position + 5] = ';'; + break; + case '\'': + write_position -= 5; + buffer[write_position] = '&'; + buffer[write_position + 1] = '#'; + buffer[write_position + 2] = '3'; + buffer[write_position + 3] = '9'; + buffer[write_position + 4] = ';'; + break; + default: + --write_position; + buffer[write_position] = character; + } + } + return count; + }); +} + +static auto needs_escape(const std::string_view input) -> bool { + for (const char character : input) { + switch (character) { + case '&': + case '<': + case '>': + case '"': + case '\'': + return true; + default: + break; + } + } + + return false; +} + +auto html_escape_append(std::string &output, const std::string_view input) + -> void { + if (!needs_escape(input)) { + output += input; + return; + } + + for (const char character : input) { + switch (character) { + case '&': + output += "&"; + break; + case '<': + output += "<"; + break; + case '>': + output += ">"; + break; + case '"': + output += """; + break; + case '\'': + output += "'"; + break; + default: + output += character; + } + } +} + +auto html_escape_append(HTMLBuffer &output, const std::string_view input) + -> void { + if (!needs_escape(input)) { + output.append(input); + return; + } + + for (const char character : input) { switch (character) { case '&': - write_position -= 5; - text[write_position] = '&'; - text[write_position + 1] = 'a'; - text[write_position + 2] = 'm'; - text[write_position + 3] = 'p'; - text[write_position + 4] = ';'; + output.append("&"); break; case '<': - write_position -= 4; - text[write_position] = '&'; - text[write_position + 1] = 'l'; - text[write_position + 2] = 't'; - text[write_position + 3] = ';'; + output.append("<"); break; case '>': - write_position -= 4; - text[write_position] = '&'; - text[write_position + 1] = 'g'; - text[write_position + 2] = 't'; - text[write_position + 3] = ';'; + output.append(">"); break; case '"': - write_position -= 6; - text[write_position] = '&'; - text[write_position + 1] = 'q'; - text[write_position + 2] = 'u'; - text[write_position + 3] = 'o'; - text[write_position + 4] = 't'; - text[write_position + 5] = ';'; + output.append("""); break; case '\'': - write_position -= 5; - text[write_position] = '&'; - text[write_position + 1] = '#'; - text[write_position + 2] = '3'; - text[write_position + 3] = '9'; - text[write_position + 4] = ';'; + output.append("'"); break; default: - --write_position; - text[write_position] = character; + output.append(character); } } } diff --git a/vendor/core/src/core/html/include/sourcemeta/core/html.h b/vendor/core/src/core/html/include/sourcemeta/core/html.h index b2a0928..23853c6 100644 --- a/vendor/core/src/core/html/include/sourcemeta/core/html.h +++ b/vendor/core/src/core/html/include/sourcemeta/core/html.h @@ -11,7 +11,6 @@ /// #include /// ``` -#include -#include +#include #endif diff --git a/vendor/core/src/core/html/include/sourcemeta/core/html_buffer.h b/vendor/core/src/core/html/include/sourcemeta/core/html_buffer.h new file mode 100644 index 0000000..ae8a4fa --- /dev/null +++ b/vendor/core/src/core/html/include/sourcemeta/core/html_buffer.h @@ -0,0 +1,93 @@ +#ifndef SOURCEMETA_CORE_HTML_BUFFER_H_ +#define SOURCEMETA_CORE_HTML_BUFFER_H_ + +#ifndef SOURCEMETA_CORE_HTML_EXPORT +#include +#endif + +#include + +#include // std::memcpy +#include // std::ostream +#include // std::string +#include // std::string_view + +namespace sourcemeta::core { + +/// @ingroup html +/// A fast append-only string buffer +class SOURCEMETA_CORE_HTML_EXPORT HTMLBuffer { +public: + HTMLBuffer() = default; + HTMLBuffer(const HTMLBuffer &) = delete; + auto operator=(const HTMLBuffer &) -> HTMLBuffer & = delete; + HTMLBuffer(HTMLBuffer &&) = delete; + auto operator=(HTMLBuffer &&) -> HTMLBuffer & = delete; + + SOURCEMETA_FORCEINLINE inline auto reserve(const std::size_t bytes) -> void { + this->buffer_.resize(bytes); + this->cursor_ = this->buffer_.data(); + this->end_ = this->cursor_ + bytes; + } + + SOURCEMETA_FORCEINLINE inline auto append(const char character) -> void { + if (!this->cursor_ || this->cursor_ >= this->end_) [[unlikely]] { + this->grow(1); + } + + *this->cursor_ = character; + ++this->cursor_; + } + + SOURCEMETA_FORCEINLINE inline auto append(const std::string_view data) + -> void { + const auto length{data.size()}; + if (length == 0) { + return; + } + + const auto remaining{ + this->cursor_ ? static_cast(this->end_ - this->cursor_) + : 0uz}; + if (remaining < length) [[unlikely]] { + this->grow(length); + } + + std::memcpy(this->cursor_, data.data(), length); + this->cursor_ += length; + } + + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto str() + -> const std::string & { + if (this->cursor_) { + this->buffer_.resize( + static_cast(this->cursor_ - this->buffer_.data())); + this->cursor_ = nullptr; + this->end_ = nullptr; + } + + return this->buffer_; + } + + auto write(std::ostream &stream) -> void; + +private: + auto grow(std::size_t needed) -> void; + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251) +#endif + std::string buffer_; +#if defined(_MSC_VER) +#pragma warning(default : 4251) +#endif + char *cursor_{nullptr}; + char *end_{nullptr}; +}; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/html/include/sourcemeta/core/html_elements.h b/vendor/core/src/core/html/include/sourcemeta/core/html_elements.h deleted file mode 100644 index a721826..0000000 --- a/vendor/core/src/core/html/include/sourcemeta/core/html_elements.h +++ /dev/null @@ -1,450 +0,0 @@ -#ifndef SOURCEMETA_CORE_HTML_ELEMENTS_H_ -#define SOURCEMETA_CORE_HTML_ELEMENTS_H_ - -#include - -namespace sourcemeta::core::html { - -#ifndef DOXYGEN -#define HTML_VOID_ELEMENT(name) \ - inline auto name() -> HTML { return HTML(#name, true); } \ - inline auto name(HTMLAttributes attributes) -> HTML { \ - return HTML(#name, std::move(attributes), true); \ - } - -#define HTML_CONTAINER_ELEMENT_NAMED(name, tag) \ - inline auto name(HTMLAttributes attributes) -> HTML { \ - return HTML(#tag, std::move(attributes)); \ - } \ - template \ - inline auto name(HTMLAttributes attributes, Children &&...children) \ - -> HTML { \ - return HTML(#tag, std::move(attributes), \ - std::forward(children)...); \ - } \ - template \ - inline auto name(Children &&...children) -> HTML { \ - return HTML(#tag, std::forward(children)...); \ - } - -#define HTML_CONTAINER_ELEMENT(name) HTML_CONTAINER_ELEMENT_NAMED(name, name) - -#define HTML_COMPACT_ELEMENT(name) \ - inline auto name(HTMLAttributes attributes) -> HTML { \ - return HTML(#name, std::move(attributes)); \ - } \ - template \ - inline auto name(HTMLAttributes attributes, Children &&...children) \ - -> HTML { \ - return HTML(#name, std::move(attributes), \ - std::forward(children)...); \ - } \ - template \ - inline auto name(Children &&...children) -> HTML { \ - return HTML(#name, std::forward(children)...); \ - } - -#define HTML_VOID_ATTR_ELEMENT(name) \ - inline auto name(HTMLAttributes attributes) -> HTML { \ - return HTML(#name, std::move(attributes), true); \ - } -#endif - -/// @ingroup html -inline auto raw(std::string html_content) -> HTMLRaw { - return HTMLRaw{std::move(html_content)}; -} - -// ============================================================================= -// Document Structure Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(html) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(base) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(head) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(link) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(meta) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(style) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(title) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(body) - -// ============================================================================= -// Content Sectioning Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(address) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(article) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(aside) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(footer) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(header) - -/// @ingroup html -HTML_COMPACT_ELEMENT(h1) -/// @ingroup html -HTML_COMPACT_ELEMENT(h2) -/// @ingroup html -HTML_COMPACT_ELEMENT(h3) -/// @ingroup html -HTML_COMPACT_ELEMENT(h4) -/// @ingroup html -HTML_COMPACT_ELEMENT(h5) -/// @ingroup html -HTML_COMPACT_ELEMENT(h6) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(hgroup) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(main) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(nav) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(section) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(search) - -// ============================================================================= -// Text Content Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(blockquote) - -/// @ingroup html -HTML_COMPACT_ELEMENT(dd) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(div) - -/// @ingroup html -HTML_COMPACT_ELEMENT(dl) - -/// @ingroup html -HTML_COMPACT_ELEMENT(dt) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(figcaption) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(figure) - -/// @ingroup html -HTML_VOID_ELEMENT(hr) - -/// @ingroup html -HTML_COMPACT_ELEMENT(li) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(menu) - -/// @ingroup html -HTML_COMPACT_ELEMENT(ol) - -/// @ingroup html -HTML_COMPACT_ELEMENT(p) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(pre) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(ul) - -// ============================================================================= -// Inline Text Semantics Elements -// ============================================================================= - -/// @ingroup html -HTML_COMPACT_ELEMENT(a) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(abbr) - -/// @ingroup html -HTML_COMPACT_ELEMENT(b) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(bdi) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(bdo) - -/// @ingroup html -HTML_VOID_ELEMENT(br) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(cite) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(code) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(data) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(dfn) - -/// @ingroup html -HTML_COMPACT_ELEMENT(em) - -/// @ingroup html -HTML_COMPACT_ELEMENT(i) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(kbd) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(mark) - -/// @ingroup html -HTML_COMPACT_ELEMENT(q) - -/// @ingroup html -HTML_COMPACT_ELEMENT(rp) - -/// @ingroup html -HTML_COMPACT_ELEMENT(rt) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(ruby) - -/// @ingroup html -HTML_COMPACT_ELEMENT(s) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(samp) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(small) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(span) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(strong) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(sub) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(sup) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(time) - -/// @ingroup html -HTML_COMPACT_ELEMENT(u) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(var) - -/// @ingroup html -HTML_VOID_ELEMENT(wbr) - -// ============================================================================= -// Image and Multimedia Elements -// ============================================================================= - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(area) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(audio) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(img) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(map) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(track) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(video) - -// ============================================================================= -// Embedded Content Elements -// ============================================================================= - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(embed) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(iframe) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(object) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(picture) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(portal) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(source) - -// ============================================================================= -// Scripting Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(canvas) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(noscript) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(script) - -// ============================================================================= -// Demarcating Edits Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(del) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(ins) - -// ============================================================================= -// Table Content Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(caption) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(col) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(colgroup) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(table) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(tbody) - -/// @ingroup html -HTML_COMPACT_ELEMENT(td) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(tfoot) - -/// @ingroup html -HTML_COMPACT_ELEMENT(th) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(thead) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(tr) - -// ============================================================================= -// Forms Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(button) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(datalist) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(fieldset) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(form) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(input) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(label) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(legend) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(meter) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(optgroup) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(option) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(output) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(progress) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(select) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(textarea) - -// ============================================================================= -// Interactive Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(details) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(dialog) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(summary) - -// ============================================================================= -// Web Components Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(slot) - -/// @ingroup html -HTML_CONTAINER_ELEMENT_NAMED(template_, template) - -#ifndef DOXYGEN -#undef HTML_VOID_ELEMENT -#undef HTML_CONTAINER_ELEMENT -#undef HTML_CONTAINER_ELEMENT_NAMED -#undef HTML_COMPACT_ELEMENT -#undef HTML_VOID_ATTR_ELEMENT -#endif - -} // namespace sourcemeta::core::html - -#endif diff --git a/vendor/core/src/core/html/include/sourcemeta/core/html_encoder.h b/vendor/core/src/core/html/include/sourcemeta/core/html_encoder.h deleted file mode 100644 index 7be0433..0000000 --- a/vendor/core/src/core/html/include/sourcemeta/core/html_encoder.h +++ /dev/null @@ -1,145 +0,0 @@ -#ifndef SOURCEMETA_CORE_HTML_ENCODER_H_ -#define SOURCEMETA_CORE_HTML_ENCODER_H_ - -#ifndef SOURCEMETA_CORE_HTML_EXPORT -#include -#endif - -#include - -#include // std::ostream -#include // std::string -#include // std::pair -#include // std::variant, std::holds_alternative, std::get -#include // std::vector - -namespace sourcemeta::core { - -/// @ingroup html -using HTMLAttributes = std::vector>; - -#ifndef DOXYGEN -// Forward declaration -class HTML; -#endif - -/// @ingroup html -/// Raw HTML content wrapper for unescaped content -struct SOURCEMETA_CORE_HTML_EXPORT HTMLRaw { -// Exporting symbols that depends on the standard C++ library is considered -// safe. -// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN -#if defined(_MSC_VER) -#pragma warning(disable : 4251) -#endif - std::string content; -#if defined(_MSC_VER) -#pragma warning(default : 4251) -#endif - explicit HTMLRaw(std::string html_content) - : content{std::move(html_content)} {} -}; - -/// @ingroup html -/// A node can be either a string (text node), raw HTML content, or another HTML -/// element -using HTMLNode = std::variant; - -/// @ingroup html -/// An HTML element that can be rendered to a string. Elements can contain -/// attributes and child nodes. -/// -/// For example: -/// -/// ```cpp -/// #include -/// #include -/// #include -/// -/// using namespace sourcemeta::core::html; -/// -/// std::ostringstream result; -/// result << div(h1("Title"), p("Content")); -/// assert(result.str() == "

Title

Content

"); -/// ``` -class SOURCEMETA_CORE_HTML_EXPORT HTML { -public: - HTML(std::string tag, bool self_closing_tag = false) - : tag_name(std::move(tag)), self_closing(self_closing_tag) {} - - HTML(std::string tag, HTMLAttributes tag_attributes, - bool self_closing_tag = false) - : tag_name(std::move(tag)), attributes(std::move(tag_attributes)), - self_closing(self_closing_tag) {} - - HTML(std::string tag, HTMLAttributes tag_attributes, - std::vector children) - : tag_name(std::move(tag)), attributes(std::move(tag_attributes)), - child_elements(std::move(children)), self_closing(false) {} - - HTML(std::string tag, HTMLAttributes tag_attributes, - std::vector children) - : tag_name(std::move(tag)), attributes(std::move(tag_attributes)), - self_closing(false) { - this->child_elements.reserve(children.size()); - for (auto &child_element : children) { - this->child_elements.emplace_back(std::move(child_element)); - } - } - - HTML(std::string tag, std::vector children) - : tag_name(std::move(tag)), child_elements(std::move(children)), - self_closing(false) {} - - HTML(std::string tag, std::vector children) - : tag_name(std::move(tag)), self_closing(false) { - this->child_elements.reserve(children.size()); - for (auto &child_element : children) { - this->child_elements.emplace_back(std::move(child_element)); - } - } - - template - HTML(std::string tag, HTMLAttributes tag_attributes, Children &&...children) - : tag_name(std::move(tag)), attributes(std::move(tag_attributes)), - self_closing(false) { - (this->child_elements.push_back(std::forward(children)), ...); - } - - template - HTML(std::string tag, Children &&...children) - : tag_name(std::move(tag)), self_closing(false) { - (this->child_elements.push_back(std::forward(children)), ...); - } - - [[nodiscard]] auto render() const -> std::string; - - auto push_back(const HTMLNode &child) -> HTML &; - auto push_back(HTMLNode &&child) -> HTML &; - - // Stream operator declaration - friend SOURCEMETA_CORE_HTML_EXPORT auto - operator<<(std::ostream &output_stream, const HTML &html_element) - -> std::ostream &; - -private: -// Exporting symbols that depends on the standard C++ library is considered -// safe. -// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN -#if defined(_MSC_VER) -#pragma warning(disable : 4251) -#endif - std::string tag_name; - HTMLAttributes attributes; - std::vector child_elements; -#if defined(_MSC_VER) -#pragma warning(default : 4251) -#endif - bool self_closing; - - [[nodiscard]] auto render(const HTMLNode &child_element) const -> std::string; -}; - -} // namespace sourcemeta::core - -#endif diff --git a/vendor/core/src/core/html/include/sourcemeta/core/html_escape.h b/vendor/core/src/core/html/include/sourcemeta/core/html_escape.h index 2d5d11e..4062c41 100644 --- a/vendor/core/src/core/html/include/sourcemeta/core/html_escape.h +++ b/vendor/core/src/core/html/include/sourcemeta/core/html_escape.h @@ -5,7 +5,10 @@ #include #endif -#include // std::string +#include + +#include // std::string +#include // std::string_view namespace sourcemeta::core { @@ -33,6 +36,17 @@ namespace sourcemeta::core { SOURCEMETA_CORE_HTML_EXPORT auto html_escape(std::string &text) -> void; +/// @ingroup html +/// Append the HTML-escaped form of `input` directly to `output`, +/// without allocating a temporary string. +SOURCEMETA_CORE_HTML_EXPORT +auto html_escape_append(std::string &output, std::string_view input) -> void; + +/// @ingroup html +/// Append the HTML-escaped form of `input` directly to a buffer. +SOURCEMETA_CORE_HTML_EXPORT +auto html_escape_append(HTMLBuffer &output, std::string_view input) -> void; + } // namespace sourcemeta::core #endif diff --git a/vendor/core/src/core/html/include/sourcemeta/core/html_writer.h b/vendor/core/src/core/html/include/sourcemeta/core/html_writer.h new file mode 100644 index 0000000..384f130 --- /dev/null +++ b/vendor/core/src/core/html/include/sourcemeta/core/html_writer.h @@ -0,0 +1,466 @@ +#ifndef SOURCEMETA_CORE_HTML_WRITER_H_ +#define SOURCEMETA_CORE_HTML_WRITER_H_ + +#ifndef SOURCEMETA_CORE_HTML_EXPORT +#include +#endif + +#include +#include +#include + +#include // assert +#include // std::string_view +#include // std::vector + +namespace sourcemeta::core { + +/// @ingroup html +/// A streaming HTML writer that renders directly to a string buffer. +/// No intermediate DOM tree is built. Elements are serialized as methods +/// are called. +/// +/// ```cpp +/// #include +/// #include +/// +/// sourcemeta::core::HTMLWriter document; +/// document.div().attribute("class", "greeting"); +/// document.h1("Hello"); +/// document.p("World"); +/// document.close(); +/// ``` +class SOURCEMETA_CORE_HTML_EXPORT HTMLWriter { +public: + /// Pre-allocate the output buffer + SOURCEMETA_FORCEINLINE inline auto reserve(std::size_t bytes) -> void { + this->buffer_.reserve(bytes); + } + + /// Close the most recently opened element + SOURCEMETA_FORCEINLINE inline auto close() -> HTMLWriter & { + this->flush_open_tag(); + assert(!this->tag_stack_.empty()); + this->buffer_.append("buffer_.append(this->tag_stack_.back()); + this->buffer_.append(">"); + this->tag_stack_.pop_back(); + return *this; + } + + /// Add an attribute to the currently open tag. Must be called + /// immediately after an element method and before any content. + SOURCEMETA_FORCEINLINE inline auto attribute(std::string_view name, + std::string_view value) + -> HTMLWriter & { + assert(this->tag_open_); + this->buffer_.append(" "); + this->buffer_.append(name); + this->buffer_.append("=\""); + html_escape_append(this->buffer_, value); + this->buffer_.append("\""); + return *this; + } + + /// Write HTML-escaped text content + SOURCEMETA_FORCEINLINE inline auto text(std::string_view content) + -> HTMLWriter & { + this->flush_open_tag(); + html_escape_append(this->buffer_, content); + return *this; + } + + /// Write raw HTML content (not escaped) + SOURCEMETA_FORCEINLINE inline auto raw(std::string_view content) + -> HTMLWriter & { + this->flush_open_tag(); + this->buffer_.append(content); + return *this; + } + + /// Get the rendered HTML string + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto str() + -> const std::string & { + this->flush_open_tag(); + return this->buffer_.str(); + } + + /// Write the rendered HTML to an output stream + auto write(std::ostream &stream) -> void; + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251) +#endif + +#ifndef DOXYGEN +// Macro to generate container element methods. +// Container elements write on open and on close(). +// Overloads: +// .tag() open with no attributes +// .tag(text) open, write escaped text, close (shorthand) +#define HTML_WRITER_CONTAINER(name) \ + SOURCEMETA_FORCEINLINE inline auto name() -> HTMLWriter & { \ + this->open_tag(#name); \ + return *this; \ + } \ + /* NOLINTNEXTLINE(bugprone-macro-parentheses) */ \ + SOURCEMETA_FORCEINLINE inline auto name(std::string_view text_content) \ + -> HTMLWriter & { \ + this->open_tag(#name); \ + this->text(text_content); \ + this->close(); \ + return *this; \ + } + +// Same as above but with a different C++ method name than the HTML tag +#define HTML_WRITER_CONTAINER_NAMED(name, tag) \ + SOURCEMETA_FORCEINLINE inline auto name() -> HTMLWriter & { \ + this->open_tag(#tag); \ + return *this; \ + } \ + /* NOLINTNEXTLINE(bugprone-macro-parentheses) */ \ + SOURCEMETA_FORCEINLINE inline auto name(std::string_view text_content) \ + -> HTMLWriter & { \ + this->open_tag(#tag); \ + this->text(text_content); \ + this->close(); \ + return *this; \ + } + +// Macro to generate void element methods. +// Void elements are self-closing: or +#define HTML_WRITER_VOID(name) \ + /* NOLINTNEXTLINE(bugprone-macro-parentheses) */ \ + SOURCEMETA_FORCEINLINE inline auto name() -> HTMLWriter & { \ + this->void_tag(#name); \ + return *this; \ + } +#endif + + // ========================================================================= + // Document Structure Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(html) + /// @ingroup html + HTML_WRITER_VOID(base) + /// @ingroup html + HTML_WRITER_CONTAINER(head) + /// @ingroup html + HTML_WRITER_VOID(link) + /// @ingroup html + HTML_WRITER_VOID(meta) + /// @ingroup html + HTML_WRITER_CONTAINER(style) + /// @ingroup html + HTML_WRITER_CONTAINER(title) + /// @ingroup html + HTML_WRITER_CONTAINER(body) + + // ========================================================================= + // Content Sectioning Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(address) + /// @ingroup html + HTML_WRITER_CONTAINER(article) + /// @ingroup html + HTML_WRITER_CONTAINER(aside) + /// @ingroup html + HTML_WRITER_CONTAINER(footer) + /// @ingroup html + HTML_WRITER_CONTAINER(header) + /// @ingroup html + HTML_WRITER_CONTAINER(h1) + /// @ingroup html + HTML_WRITER_CONTAINER(h2) + /// @ingroup html + HTML_WRITER_CONTAINER(h3) + /// @ingroup html + HTML_WRITER_CONTAINER(h4) + /// @ingroup html + HTML_WRITER_CONTAINER(h5) + /// @ingroup html + HTML_WRITER_CONTAINER(h6) + /// @ingroup html + HTML_WRITER_CONTAINER(hgroup) + /// @ingroup html + HTML_WRITER_CONTAINER(main) + /// @ingroup html + HTML_WRITER_CONTAINER(nav) + /// @ingroup html + HTML_WRITER_CONTAINER(section) + /// @ingroup html + HTML_WRITER_CONTAINER(search) + + // ========================================================================= + // Text Content Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(blockquote) + /// @ingroup html + HTML_WRITER_CONTAINER(dd) + /// @ingroup html + HTML_WRITER_CONTAINER(div) + /// @ingroup html + HTML_WRITER_CONTAINER(dl) + /// @ingroup html + HTML_WRITER_CONTAINER(dt) + /// @ingroup html + HTML_WRITER_CONTAINER(figcaption) + /// @ingroup html + HTML_WRITER_CONTAINER(figure) + /// @ingroup html + HTML_WRITER_VOID(hr) + /// @ingroup html + HTML_WRITER_CONTAINER(li) + /// @ingroup html + HTML_WRITER_CONTAINER(menu) + /// @ingroup html + HTML_WRITER_CONTAINER(ol) + /// @ingroup html + HTML_WRITER_CONTAINER(p) + /// @ingroup html + HTML_WRITER_CONTAINER(pre) + /// @ingroup html + HTML_WRITER_CONTAINER(ul) + + // ========================================================================= + // Inline Text Semantics Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(a) + /// @ingroup html + HTML_WRITER_CONTAINER(abbr) + /// @ingroup html + HTML_WRITER_CONTAINER(b) + /// @ingroup html + HTML_WRITER_CONTAINER(bdi) + /// @ingroup html + HTML_WRITER_CONTAINER(bdo) + /// @ingroup html + HTML_WRITER_VOID(br) + /// @ingroup html + HTML_WRITER_CONTAINER(cite) + /// @ingroup html + HTML_WRITER_CONTAINER(code) + /// @ingroup html + HTML_WRITER_CONTAINER(data) + /// @ingroup html + HTML_WRITER_CONTAINER(dfn) + /// @ingroup html + HTML_WRITER_CONTAINER(em) + /// @ingroup html + HTML_WRITER_CONTAINER(i) + /// @ingroup html + HTML_WRITER_CONTAINER(kbd) + /// @ingroup html + HTML_WRITER_CONTAINER(mark) + /// @ingroup html + HTML_WRITER_CONTAINER(q) + /// @ingroup html + HTML_WRITER_CONTAINER(rp) + /// @ingroup html + HTML_WRITER_CONTAINER(rt) + /// @ingroup html + HTML_WRITER_CONTAINER(ruby) + /// @ingroup html + HTML_WRITER_CONTAINER(s) + /// @ingroup html + HTML_WRITER_CONTAINER(samp) + /// @ingroup html + HTML_WRITER_CONTAINER(small) + /// @ingroup html + HTML_WRITER_CONTAINER(span) + /// @ingroup html + HTML_WRITER_CONTAINER(strong) + /// @ingroup html + HTML_WRITER_CONTAINER(sub) + /// @ingroup html + HTML_WRITER_CONTAINER(sup) + /// @ingroup html + HTML_WRITER_CONTAINER(time) + /// @ingroup html + HTML_WRITER_CONTAINER(u) + /// @ingroup html + HTML_WRITER_CONTAINER(var) + /// @ingroup html + HTML_WRITER_VOID(wbr) + + // ========================================================================= + // Image and Multimedia Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_VOID(area) + /// @ingroup html + HTML_WRITER_CONTAINER(audio) + /// @ingroup html + HTML_WRITER_VOID(img) + /// @ingroup html + HTML_WRITER_CONTAINER(map) + /// @ingroup html + HTML_WRITER_VOID(track) + /// @ingroup html + HTML_WRITER_CONTAINER(video) + + // ========================================================================= + // Embedded Content Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_VOID(embed) + /// @ingroup html + HTML_WRITER_CONTAINER(iframe) + /// @ingroup html + HTML_WRITER_CONTAINER(object) + /// @ingroup html + HTML_WRITER_CONTAINER(picture) + /// @ingroup html + HTML_WRITER_CONTAINER(portal) + /// @ingroup html + HTML_WRITER_VOID(source) + + // ========================================================================= + // Scripting Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(canvas) + /// @ingroup html + HTML_WRITER_CONTAINER(noscript) + /// @ingroup html + HTML_WRITER_CONTAINER(script) + + // ========================================================================= + // Demarcating Edits Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(del) + /// @ingroup html + HTML_WRITER_CONTAINER(ins) + + // ========================================================================= + // Table Content Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(caption) + /// @ingroup html + HTML_WRITER_VOID(col) + /// @ingroup html + HTML_WRITER_CONTAINER(colgroup) + /// @ingroup html + HTML_WRITER_CONTAINER(table) + /// @ingroup html + HTML_WRITER_CONTAINER(tbody) + /// @ingroup html + HTML_WRITER_CONTAINER(td) + /// @ingroup html + HTML_WRITER_CONTAINER(tfoot) + /// @ingroup html + HTML_WRITER_CONTAINER(th) + /// @ingroup html + HTML_WRITER_CONTAINER(thead) + /// @ingroup html + HTML_WRITER_CONTAINER(tr) + + // ========================================================================= + // Forms Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(button) + /// @ingroup html + HTML_WRITER_CONTAINER(datalist) + /// @ingroup html + HTML_WRITER_CONTAINER(fieldset) + /// @ingroup html + HTML_WRITER_CONTAINER(form) + /// @ingroup html + HTML_WRITER_VOID(input) + /// @ingroup html + HTML_WRITER_CONTAINER(label) + /// @ingroup html + HTML_WRITER_CONTAINER(legend) + /// @ingroup html + HTML_WRITER_CONTAINER(meter) + /// @ingroup html + HTML_WRITER_CONTAINER(optgroup) + /// @ingroup html + HTML_WRITER_CONTAINER(option) + /// @ingroup html + HTML_WRITER_CONTAINER(output) + /// @ingroup html + HTML_WRITER_CONTAINER(progress) + /// @ingroup html + HTML_WRITER_CONTAINER(select) + /// @ingroup html + HTML_WRITER_CONTAINER(textarea) + + // ========================================================================= + // Interactive Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(details) + /// @ingroup html + HTML_WRITER_CONTAINER(dialog) + /// @ingroup html + HTML_WRITER_CONTAINER(summary) + + // ========================================================================= + // Web Components Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(slot) + /// @ingroup html + HTML_WRITER_CONTAINER_NAMED(template_, template) + +#ifndef DOXYGEN +#undef HTML_WRITER_CONTAINER +#undef HTML_WRITER_CONTAINER_NAMED +#undef HTML_WRITER_VOID +#endif + +private: + SOURCEMETA_FORCEINLINE inline auto open_tag(std::string_view tag) -> void { + this->flush_open_tag(); + this->buffer_.append("<"); + this->buffer_.append(tag); + this->tag_stack_.push_back(tag); + this->tag_open_ = true; + this->tag_open_is_void_ = false; + } + + SOURCEMETA_FORCEINLINE inline auto void_tag(std::string_view tag) -> void { + this->flush_open_tag(); + this->buffer_.append("<"); + this->buffer_.append(tag); + this->tag_open_ = true; + this->tag_open_is_void_ = true; + } + + auto flush_open_tag() -> void; + + HTMLBuffer buffer_; + std::vector tag_stack_; + bool tag_open_{false}; + bool tag_open_is_void_{false}; +#if defined(_MSC_VER) +#pragma warning(default : 4251) +#endif +}; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/html/writer.cc b/vendor/core/src/core/html/writer.cc new file mode 100644 index 0000000..ee6775a --- /dev/null +++ b/vendor/core/src/core/html/writer.cc @@ -0,0 +1,48 @@ +#include + +#include // std::ostream + +namespace sourcemeta::core { + +auto HTMLBuffer::grow(const std::size_t needed) -> void { + const auto current_size{ + this->cursor_ + ? static_cast(this->cursor_ - this->buffer_.data()) + : 0}; + auto new_capacity{this->buffer_.empty() ? 1024uz : this->buffer_.size() * 2}; + while (new_capacity < current_size + needed) { + new_capacity *= 2; + } + + this->buffer_.resize(new_capacity); + this->cursor_ = this->buffer_.data() + current_size; + this->end_ = this->buffer_.data() + new_capacity; +} + +auto HTMLBuffer::write(std::ostream &stream) -> void { + if (this->cursor_) { + const auto size{ + static_cast(this->cursor_ - this->buffer_.data())}; + stream.write(this->buffer_.data(), static_cast(size)); + } +} + +auto HTMLWriter::flush_open_tag() -> void { + if (this->tag_open_) { + if (this->tag_open_is_void_) { + this->buffer_.append(" />"); + } else { + this->buffer_.append(">"); + } + + this->tag_open_ = false; + this->tag_open_is_void_ = false; + } +} + +auto HTMLWriter::write(std::ostream &stream) -> void { + this->flush_open_tag(); + this->buffer_.write(stream); +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/uuid/CMakeLists.txt b/vendor/core/src/core/ip/CMakeLists.txt similarity index 51% rename from vendor/core/src/core/uuid/CMakeLists.txt rename to vendor/core/src/core/ip/CMakeLists.txt index 324b05a..3c2fdd7 100644 --- a/vendor/core/src/core/uuid/CMakeLists.txt +++ b/vendor/core/src/core/ip/CMakeLists.txt @@ -1,5 +1,6 @@ -sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME uuid SOURCES uuid.cc) +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME ip + SOURCES ipv4.cc ipv6.cc) if(SOURCEMETA_CORE_INSTALL) - sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME uuid) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME ip) endif() diff --git a/vendor/core/src/core/ip/include/sourcemeta/core/ip.h b/vendor/core/src/core/ip/include/sourcemeta/core/ip.h new file mode 100644 index 0000000..72860f6 --- /dev/null +++ b/vendor/core/src/core/ip/include/sourcemeta/core/ip.h @@ -0,0 +1,54 @@ +#ifndef SOURCEMETA_CORE_IP_H_ +#define SOURCEMETA_CORE_IP_H_ + +#ifndef SOURCEMETA_CORE_IP_EXPORT +#include +#endif + +#include // std::string_view + +/// @defgroup ip IP +/// @brief IPv4 (RFC 3986) and IPv6 (RFC 3986, RFC 4291) address validation. +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +namespace sourcemeta::core { + +/// @ingroup ip +/// Check whether the given string is a valid IPv4 address per RFC 3986 +/// Section 3.2.2. For example: +/// +/// ```cpp +/// #include +/// +/// #include +/// +/// assert(sourcemeta::core::is_ipv4("192.168.1.1")); +/// assert(!sourcemeta::core::is_ipv4("999.0.0.1")); +/// ``` +SOURCEMETA_CORE_IP_EXPORT +auto is_ipv4(std::string_view address) -> bool; + +/// @ingroup ip +/// Check whether the given string is a valid IPv6 address per RFC 3986 +/// Section 3.2.2 and RFC 4291 Section 2.2. The input must not include +/// surrounding brackets. For example: +/// +/// ```cpp +/// #include +/// +/// #include +/// +/// assert(sourcemeta::core::is_ipv6("2001:db8::1")); +/// assert(!sourcemeta::core::is_ipv6("not an address")); +/// ``` +SOURCEMETA_CORE_IP_EXPORT +auto is_ipv6(std::string_view address) -> bool; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/ip/ipv4.cc b/vendor/core/src/core/ip/ipv4.cc new file mode 100644 index 0000000..656935a --- /dev/null +++ b/vendor/core/src/core/ip/ipv4.cc @@ -0,0 +1,56 @@ +#include + +namespace sourcemeta::core { + +static constexpr auto is_digit(const char character) -> bool { + return character >= '0' && character <= '9'; +} + +auto is_ipv4(const std::string_view address) -> bool { + if (address.empty()) { + return false; + } + + std::string_view::size_type position{0}; + unsigned int octet_count{0}; + + while (octet_count < 4) { + if (position >= address.size()) { + return false; + } + + if (!is_digit(address[position])) { + return false; + } + + const auto octet_start{position}; + unsigned int value{0}; + while (position < address.size() && is_digit(address[position])) { + value = value * 10 + static_cast(address[position] - '0'); + position += 1; + } + + const auto octet_length{position - octet_start}; + + if (octet_length > 1 && address[octet_start] == '0') { + return false; + } + + if (octet_length > 3 || value > 255) { + return false; + } + + octet_count += 1; + + if (octet_count < 4) { + if (position >= address.size() || address[position] != '.') { + return false; + } + position += 1; + } + } + + return position == address.size(); +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/ip/ipv6.cc b/vendor/core/src/core/ip/ipv6.cc new file mode 100644 index 0000000..099be9b --- /dev/null +++ b/vendor/core/src/core/ip/ipv6.cc @@ -0,0 +1,112 @@ +#include + +#include // std::array +#include // std::uint8_t + +namespace sourcemeta::core { + +static constexpr auto make_hex_table() -> std::array { + std::array table{}; + for (auto index{0u}; index < 256; index++) { + table[index] = (index >= '0' && index <= '9') || + (index >= 'a' && index <= 'f') || + (index >= 'A' && index <= 'F'); + } + return table; +} + +static constexpr auto HEX_TABLE{make_hex_table()}; + +static constexpr auto is_hex_digit(const char character) -> bool { + return HEX_TABLE[static_cast(character)]; +} + +auto is_ipv6(const std::string_view address) -> bool { + if (address.empty()) { + return false; + } + + const auto size{address.size()}; + + if (address.front() == '[' || address.back() == ']') { + return false; + } + + const auto double_colon{address.find("::")}; + const bool has_compression{double_colon != std::string_view::npos}; + + if (has_compression && + address.find("::", double_colon + 2) != std::string_view::npos) { + return false; + } + + if (address.front() == ':' && (!has_compression || double_colon != 0)) { + return false; + } + if (address.back() == ':' && + (!has_compression || double_colon + 1 != size - 1)) { + return false; + } + + unsigned int group_count{0}; + std::string_view::size_type position{0}; + + while (position < size) { + if (has_compression && position == double_colon) { + position += 2; + continue; + } + + const auto group_start{position}; + unsigned int hex_count{0}; + bool found_dot{false}; + + while (position < size) { + const auto character{address[position]}; + if (character == ':') { + break; + } + if (character == '.') { + found_dot = true; + break; + } + if (!is_hex_digit(character)) { + return false; + } + hex_count += 1; + position += 1; + } + + if (found_dot) { + if (!is_ipv4(address.substr(group_start))) { + return false; + } + group_count += 2; + break; + } + + if (hex_count == 0 || hex_count > 4) { + return false; + } + + group_count += 1; + + if (position < size && address[position] == ':') { + if (has_compression && position == double_colon) { + continue; + } + position += 1; + if (position >= size) { + return false; + } + } + } + + if (has_compression) { + return group_count < 8; + } + + return group_count == 8; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/json/CMakeLists.txt b/vendor/core/src/core/json/CMakeLists.txt index 0dc3310..a9c0590 100644 --- a/vendor/core/src/core/json/CMakeLists.txt +++ b/vendor/core/src/core/json/CMakeLists.txt @@ -7,4 +7,6 @@ if(SOURCEMETA_CORE_INSTALL) endif() target_link_libraries(sourcemeta_core_json PRIVATE sourcemeta::core::io) +target_link_libraries(sourcemeta_core_json PRIVATE sourcemeta::core::unicode) target_link_libraries(sourcemeta_core_json PUBLIC sourcemeta::core::numeric) +target_link_libraries(sourcemeta_core_json PUBLIC sourcemeta::core::preprocessor) diff --git a/vendor/core/src/core/json/construct.h b/vendor/core/src/core/json/construct.h new file mode 100644 index 0000000..c2cd567 --- /dev/null +++ b/vendor/core/src/core/json/construct.h @@ -0,0 +1,648 @@ +#ifndef SOURCEMETA_CORE_JSON_CONSTRUCT_H_ +#define SOURCEMETA_CORE_JSON_CONSTRUCT_H_ + +#include +#include + +#include +#include + +#include "parser.h" + +#include // assert +#include // std::size_t +#include // std::uint64_t, std::uint32_t +#include // std::memchr +#include // std::reference_wrapper +#include // std::invalid_argument +#include // std::move +#include // std::vector + +namespace sourcemeta::core { + +namespace internal { + +inline auto unescape_string(const char *data, const std::uint32_t length) -> + typename JSON::String { + typename JSON::String result; + const char *cursor{data}; + const char *string_end{data + length}; + + if (!std::memchr(data, '\\', length)) { + result.append(data, length); + return result; + } + + result.reserve(length); + while (cursor < string_end) { + const char *scan{cursor}; + while (scan < string_end && *scan != '\\') { + scan++; + } + + if (scan > cursor) { + result.append(cursor, static_cast(scan - cursor)); + cursor = scan; + } + + if (cursor >= string_end) { + break; + } + + assert(*cursor == '\\'); + cursor++; + assert(cursor < string_end); + + switch (*cursor++) { + case '"': + result.push_back('"'); + break; + case '\\': + result.push_back('\\'); + break; + case '/': + result.push_back('/'); + break; + case 'b': + result.push_back('\b'); + break; + case 'f': + result.push_back('\f'); + break; + case 'n': + result.push_back('\n'); + break; + case 'r': + result.push_back('\r'); + break; + case 't': + result.push_back('\t'); + break; + case 'u': { + auto parse_hex4 = [](const char *&position) -> unsigned long { + unsigned long value{0}; + for (std::size_t index = 0; index < 4; index++) { + const char hex_char{*position++}; + unsigned long digit; + if (hex_char >= '0' && hex_char <= '9') { + digit = static_cast(hex_char - '0'); + } else if (hex_char >= 'a' && hex_char <= 'f') { + digit = static_cast(hex_char - 'a') + 10; + } else if (hex_char >= 'A' && hex_char <= 'F') { + digit = static_cast(hex_char - 'A') + 10; + } else { + digit = 0; + } + value = (value << 4) | digit; + } + return value; + }; + + auto code_point{parse_hex4(cursor)}; + if (code_point >= 0xD800 && code_point <= 0xDBFF) { + assert(cursor + 6 <= string_end); + cursor += 2; + const auto low{parse_hex4(cursor)}; + code_point = 0x10000 + ((code_point - 0xD800) << 10) + (low - 0xDC00); + } + + sourcemeta::core::codepoint_to_utf8(static_cast(code_point), + result); + break; + } + default: + break; + } + } + + return result; +} + +inline auto construct_number(const char *data, const std::uint32_t length) + -> JSON { + const bool has_dot{std::memchr(data, '.', length) != nullptr}; + const bool has_exponent{std::memchr(data, 'e', length) != nullptr || + std::memchr(data, 'E', length) != nullptr}; + + if (has_exponent) { + try { + return JSON{Decimal{std::string_view{data, length}}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } + } + + if (has_dot) { + std::size_t first_nonzero_position{JSON::String::npos}; + const auto decimal_position{static_cast( + static_cast(std::memchr(data, '.', length)) - data)}; + for (std::size_t index = 0; index < length; index++) { + if (index != decimal_position && data[index] != '0' && + data[index] != '-') { + first_nonzero_position = index; + break; + } + } + + if (first_nonzero_position == JSON::String::npos) { + first_nonzero_position = 0; + } + + const auto decimal_after_first_nonzero{decimal_position > + first_nonzero_position}; + const auto significant_digits{length - first_nonzero_position - + (decimal_after_first_nonzero ? 1 : 0)}; + constexpr std::size_t MAX_SAFE_SIGNIFICANT_DIGITS{15}; + if (significant_digits > MAX_SAFE_SIGNIFICANT_DIGITS) { + try { + return JSON{Decimal{std::string_view{data, length}}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } + } + + const typename JSON::String string_value{data, length}; + const auto double_result{sourcemeta::core::to_double(string_value)}; + if (double_result.has_value()) { + return JSON{double_result.value()}; + } + try { + return JSON{Decimal{string_value}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } + } + + auto digit_length = length; + if (digit_length > 0 && data[0] == '-') { + digit_length--; + } + + if (digit_length <= 19) { + const typename JSON::String string_value{data, length}; + const auto int_result{sourcemeta::core::to_int64_t(string_value)}; + if (int_result.has_value()) { + return JSON{int_result.value()}; + } + try { + return JSON{Decimal{string_value}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } + } + + try { + return JSON{Decimal{std::string_view{data, length}}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } +} + +inline auto post_column_for(const TapeEntry &entry) -> std::uint64_t { + switch (entry.type) { + case TapeType::True: + return entry.column + 3; + case TapeType::False: + return entry.column + 4; + case TapeType::Null: + return entry.column + 3; + case TapeType::String: + case TapeType::Key: + return entry.column + entry.length + 1; + case TapeType::Number: + return entry.column + entry.length - 1; + default: + return entry.column; + } +} + +} // namespace internal + +// NOLINTBEGIN(cppcoreguidelines-avoid-goto,bugprone-use-after-move) + +#define CALLBACK_PRE(value_type, entry_ref, context, index, property) \ + if (callback) { \ + callback(JSON::ParsePhase::Pre, JSON::Type::value_type, (entry_ref).line, \ + (entry_ref).column, context, index, property); \ + } + +#define CALLBACK_POST(value_type, post_line, post_column) \ + if (callback) { \ + callback(JSON::ParsePhase::Post, JSON::Type::value_type, post_line, \ + post_column, JSON::ParseContext::Root, 0, empty_property); \ + } + +inline auto construct_json(const char *buffer, + const std::vector &tape, + const JSON::ParseCallback &callback, JSON &output) + -> void { + using Result = JSON; + enum class Container : std::uint8_t { Array, Object }; + std::vector levels; + std::vector> frames; + levels.reserve(32); + frames.reserve(32); + typename Result::String key; + typename Result::Object::hash_type key_hash; + std::uint64_t key_line{0}; + std::uint64_t key_column{0}; + std::size_t tape_index{0}; + static const JSON::String empty_property; + + if (tape.empty()) { + throw JSONParseError(1, 1); + } + + const auto &entry{tape[tape_index]}; + switch (entry.type) { + case TapeType::True: + CALLBACK_PRE(Boolean, entry, JSON::ParseContext::Root, 0, empty_property); + CALLBACK_POST(Boolean, entry.line, internal::post_column_for(entry)); + output = JSON{true}; + return; + case TapeType::False: + CALLBACK_PRE(Boolean, entry, JSON::ParseContext::Root, 0, empty_property); + CALLBACK_POST(Boolean, entry.line, internal::post_column_for(entry)); + output = JSON{false}; + return; + case TapeType::Null: + CALLBACK_PRE(Null, entry, JSON::ParseContext::Root, 0, empty_property); + CALLBACK_POST(Null, entry.line, internal::post_column_for(entry)); + output = JSON{nullptr}; + return; + case TapeType::String: { + CALLBACK_PRE(String, entry, JSON::ParseContext::Root, 0, empty_property); + auto value{Result{ + internal::unescape_string(buffer + entry.offset, entry.length)}}; + CALLBACK_POST(String, entry.line, internal::post_column_for(entry)); + output = std::move(value); + return; + } + case TapeType::Number: { + auto value = + internal::construct_number(buffer + entry.offset, entry.length); + if (value.is_integer()) { + CALLBACK_PRE(Integer, entry, JSON::ParseContext::Root, 0, + empty_property); + CALLBACK_POST(Integer, entry.line, internal::post_column_for(entry)); + } else if (value.is_decimal()) { + CALLBACK_PRE(Decimal, entry, JSON::ParseContext::Root, 0, + empty_property); + CALLBACK_POST(Decimal, entry.line, internal::post_column_for(entry)); + } else { + CALLBACK_PRE(Real, entry, JSON::ParseContext::Root, 0, empty_property); + CALLBACK_POST(Real, entry.line, internal::post_column_for(entry)); + } + output = std::move(value); + return; + } + case TapeType::ArrayStart: + CALLBACK_PRE(Array, entry, JSON::ParseContext::Root, 0, empty_property); + goto do_construct_array; + case TapeType::ObjectStart: + CALLBACK_PRE(Object, entry, JSON::ParseContext::Root, 0, empty_property); + goto do_construct_object; + default: + throw JSONParseError(1, 1); + } + + /* + * Construct an array + */ + +do_construct_array: { + const auto &array_entry{tape[tape_index]}; + assert(array_entry.type == TapeType::ArrayStart); + const auto child_count{array_entry.count}; + tape_index++; + + if (levels.empty()) { + levels.push_back(Container::Array); + output = Result::make_array(); + frames.emplace_back(output); + } else if (levels.back() == Container::Array) { + levels.push_back(Container::Array); + frames.back().get().push_back(Result::make_array()); + frames.emplace_back(frames.back().get().back()); + } else if (levels.back() == Container::Object) { + levels.push_back(Container::Array); + frames.back().get().assign(key, Result::make_array()); + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Array, key_line, key_column, + JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + frames.emplace_back(frames.back().get().at(key)); + } + + frames.back().get().as_array().reserve(child_count); + + if (child_count == 0) { + assert(tape[tape_index].type == TapeType::ArrayEnd); + const auto &end_entry{tape[tape_index]}; + tape_index++; + CALLBACK_POST(Array, end_entry.line, end_entry.column); + goto do_construct_container_end; + } + + goto do_construct_array_item; +} + +do_construct_array_item: { + assert(!levels.empty()); + assert(levels.back() == Container::Array); + const auto &item_entry{tape[tape_index]}; + + switch (item_entry.type) { + case TapeType::ArrayStart: + CALLBACK_PRE(Array, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), empty_property); + goto do_construct_array; + case TapeType::ObjectStart: + CALLBACK_PRE(Object, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), empty_property); + goto do_construct_object; + case TapeType::True: + CALLBACK_PRE(Boolean, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), empty_property); + frames.back().get().push_back(JSON{true}); + tape_index++; + CALLBACK_POST(Boolean, item_entry.line, + internal::post_column_for(item_entry)); + goto do_construct_array_item_separator; + case TapeType::False: + CALLBACK_PRE(Boolean, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), empty_property); + frames.back().get().push_back(JSON{false}); + tape_index++; + CALLBACK_POST(Boolean, item_entry.line, + internal::post_column_for(item_entry)); + goto do_construct_array_item_separator; + case TapeType::Null: + CALLBACK_PRE(Null, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), empty_property); + frames.back().get().push_back(JSON{nullptr}); + tape_index++; + CALLBACK_POST(Null, item_entry.line, + internal::post_column_for(item_entry)); + goto do_construct_array_item_separator; + case TapeType::String: + CALLBACK_PRE(String, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), empty_property); + frames.back().get().push_back(Result{internal::unescape_string( + buffer + item_entry.offset, item_entry.length)}); + tape_index++; + CALLBACK_POST(String, item_entry.line, + internal::post_column_for(item_entry)); + goto do_construct_array_item_separator; + case TapeType::Number: { + const auto current_index{frames.back().get().size()}; + auto value = internal::construct_number(buffer + item_entry.offset, + item_entry.length); + if (value.is_integer()) { + CALLBACK_PRE(Integer, item_entry, JSON::ParseContext::Index, + current_index, empty_property); + } else if (value.is_decimal()) { + CALLBACK_PRE(Decimal, item_entry, JSON::ParseContext::Index, + current_index, empty_property); + } else { + CALLBACK_PRE(Real, item_entry, JSON::ParseContext::Index, current_index, + empty_property); + } + const auto value_type{value.type()}; + frames.back().get().push_back(std::move(value)); + tape_index++; + if (value_type == JSON::Type::Integer) { + CALLBACK_POST(Integer, item_entry.line, + internal::post_column_for(item_entry)); + } else if (value_type == JSON::Type::Decimal) { + CALLBACK_POST(Decimal, item_entry.line, + internal::post_column_for(item_entry)); + } else { + CALLBACK_POST(Real, item_entry.line, + internal::post_column_for(item_entry)); + } + goto do_construct_array_item_separator; + } + default: + throw JSONParseError(1, 1); + } +} + +do_construct_array_item_separator: + if (tape[tape_index].type == TapeType::ArrayEnd) { + const auto &end_entry{tape[tape_index]}; + tape_index++; + CALLBACK_POST(Array, end_entry.line, end_entry.column); + goto do_construct_container_end; + } + + goto do_construct_array_item; + + /* + * Construct an object + */ + +do_construct_object: { + const auto &object_entry{tape[tape_index]}; + assert(object_entry.type == TapeType::ObjectStart); + const auto property_count{object_entry.count}; + tape_index++; + + if (levels.empty()) { + levels.push_back(Container::Object); + output = Result::make_object(); + frames.emplace_back(output); + } else if (levels.back() == Container::Array) { + levels.push_back(Container::Object); + frames.back().get().push_back(Result::make_object()); + frames.emplace_back(frames.back().get().back()); + } else if (levels.back() == Container::Object) { + levels.push_back(Container::Object); + frames.back().get().assign(key, Result::make_object()); + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Object, key_line, key_column, + JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + frames.emplace_back(frames.back().get().at(key)); + } + + frames.back().get().as_object().reserve(property_count); + + if (property_count == 0) { + assert(tape[tape_index].type == TapeType::ObjectEnd); + const auto &end_entry{tape[tape_index]}; + tape_index++; + CALLBACK_POST(Object, end_entry.line, end_entry.column); + goto do_construct_container_end; + } + + goto do_construct_object_key; +} + +do_construct_object_key: { + assert(!levels.empty()); + assert(levels.back() == Container::Object); + const auto &key_entry{tape[tape_index]}; + assert(key_entry.type == TapeType::Key); + const char *key_data{buffer + key_entry.offset}; + const auto key_length{key_entry.length}; + if (std::memchr(key_data, '\\', key_length)) { + key = internal::unescape_string(key_data, key_length); + key_hash = frames.back().get().as_object().hash(key); + } else { + key.assign(key_data, key_length); + key_hash = frames.back().get().as_object().hash(key_data, key_length); + } + key_line = key_entry.line; + key_column = key_entry.column; + tape_index++; + goto do_construct_object_value; +} + +do_construct_object_value: { + const auto &value_entry{tape[tape_index]}; + + switch (value_entry.type) { + case TapeType::ArrayStart: + goto do_construct_array; + case TapeType::ObjectStart: + goto do_construct_object; + case TapeType::True: + frames.back().get().assign_assume_new(std::move(key), JSON{true}, + key_hash); + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Boolean, key_line, + key_column, JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + tape_index++; + CALLBACK_POST(Boolean, value_entry.line, + internal::post_column_for(value_entry)); + goto do_construct_object_property_end; + case TapeType::False: + frames.back().get().assign_assume_new(std::move(key), JSON{false}, + key_hash); + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Boolean, key_line, + key_column, JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + tape_index++; + CALLBACK_POST(Boolean, value_entry.line, + internal::post_column_for(value_entry)); + goto do_construct_object_property_end; + case TapeType::Null: + frames.back().get().assign_assume_new(std::move(key), JSON{nullptr}, + key_hash); + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Null, key_line, key_column, + JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + tape_index++; + CALLBACK_POST(Null, value_entry.line, + internal::post_column_for(value_entry)); + goto do_construct_object_property_end; + case TapeType::String: + frames.back().get().assign_assume_new( + std::move(key), + Result{internal::unescape_string(buffer + value_entry.offset, + value_entry.length)}, + key_hash); + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::String, key_line, + key_column, JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + tape_index++; + CALLBACK_POST(String, value_entry.line, + internal::post_column_for(value_entry)); + goto do_construct_object_property_end; + case TapeType::Number: { + auto value = internal::construct_number(buffer + value_entry.offset, + value_entry.length); + const auto value_type{value.type()}; + frames.back().get().assign_assume_new(std::move(key), std::move(value), + key_hash); + if (callback) { + if (value_type == JSON::Type::Integer) { + callback(JSON::ParsePhase::Pre, JSON::Type::Integer, key_line, + key_column, JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } else if (value_type == JSON::Type::Decimal) { + callback(JSON::ParsePhase::Pre, JSON::Type::Decimal, key_line, + key_column, JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } else { + callback(JSON::ParsePhase::Pre, JSON::Type::Real, key_line, + key_column, JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + } + tape_index++; + if (value_type == JSON::Type::Integer) { + CALLBACK_POST(Integer, value_entry.line, + internal::post_column_for(value_entry)); + } else if (value_type == JSON::Type::Decimal) { + CALLBACK_POST(Decimal, value_entry.line, + internal::post_column_for(value_entry)); + } else { + CALLBACK_POST(Real, value_entry.line, + internal::post_column_for(value_entry)); + } + goto do_construct_object_property_end; + } + default: + throw JSONParseError(1, 1); + } +} + +do_construct_object_property_end: + if (tape[tape_index].type == TapeType::ObjectEnd) { + const auto &end_entry{tape[tape_index]}; + tape_index++; + CALLBACK_POST(Object, end_entry.line, end_entry.column); + goto do_construct_container_end; + } + + goto do_construct_object_key; + + /* + * Finish constructing a container + */ + +do_construct_container_end: + assert(!levels.empty()); + if (levels.size() == 1) { + return; + } + + frames.pop_back(); + levels.pop_back(); + if (levels.back() == Container::Array) { + goto do_construct_array_item_separator; + } else { + goto do_construct_object_property_end; + } +} + +// NOLINTEND(cppcoreguidelines-avoid-goto,bugprone-use-after-move) + +#undef CALLBACK_PRE +#undef CALLBACK_POST + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/json/grammar.h b/vendor/core/src/core/json/grammar.h index 093df10..6b9338a 100644 --- a/vendor/core/src/core/json/grammar.h +++ b/vendor/core/src/core/json/grammar.h @@ -72,14 +72,17 @@ static constexpr CharT token_object_delimiter{'\u002C'}; // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf // Boolean +template static constexpr CharT token_true{'\u0074'}; template static constexpr std::basic_string_view constant_true{ "\u0074\u0072\u0075\u0065"}; +template static constexpr CharT token_false{'\u0066'}; template static constexpr std::basic_string_view constant_false{ "\u0066\u0061\u006C\u0073\u0065"}; // Null +template static constexpr CharT token_null{'\u006E'}; template static constexpr std::basic_string_view constant_null{ "\u006E\u0075\u006C\u006C"}; diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json.h b/vendor/core/src/core/json/include/sourcemeta/core/json.h index 1052e7c..99148b1 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json.h @@ -11,12 +11,16 @@ #include // NOLINTEND(misc-include-cleaner) -#include // std::uint64_t -#include // std::filesystem +#include + +#include // std::uint64_t +#include // std::filesystem +#include // std::formatter, std::format_context, std::format_parse_context, std::format_to #include // std::basic_ifstream #include // std::initializer_list #include // std::basic_istream #include // std::basic_ostream +#include // std::ostringstream #include // std::basic_string /// @defgroup json JSON @@ -49,8 +53,8 @@ namespace sourcemeta::core { /// /// If parsing fails, sourcemeta::core::JSONParseError will be thrown. SOURCEMETA_CORE_JSON_EXPORT -auto parse_json(std::basic_istream &stream, - const JSON::ParseCallback &callback = nullptr) -> JSON; +auto parse_json(std::basic_istream &stream) + -> JSON; /// @ingroup json /// @@ -68,8 +72,8 @@ auto parse_json(std::basic_istream &stream, /// /// If parsing fails, sourcemeta::core::JSONParseError will be thrown. SOURCEMETA_CORE_JSON_EXPORT -auto parse_json(const std::basic_string &input, - const JSON::ParseCallback &callback = nullptr) -> JSON; +auto parse_json(const std::basic_string &input) + -> JSON; /// @ingroup json /// @@ -90,8 +94,7 @@ auto parse_json(const std::basic_string &input, /// ``` SOURCEMETA_CORE_JSON_EXPORT auto parse_json(std::basic_istream &stream, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback = nullptr) -> JSON; + std::uint64_t &line, std::uint64_t &column) -> JSON; /// @ingroup json /// @@ -110,8 +113,7 @@ auto parse_json(std::basic_istream &stream, /// ``` SOURCEMETA_CORE_JSON_EXPORT auto parse_json(const std::basic_string &input, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback = nullptr) -> JSON; + std::uint64_t &line, std::uint64_t &column) -> JSON; /// @ingroup json /// @@ -128,10 +130,74 @@ auto parse_json(const std::basic_string &input, /// std::cout << std::endl; /// ``` /// +/// If parsing fails, sourcemeta::core::JSONFileParseError will be thrown. +SOURCEMETA_CORE_JSON_EXPORT +auto read_json(const std::filesystem::path &path) -> JSON; + +/// @ingroup json +/// +/// Parse a JSON document from a C++ standard input stream into an existing +/// JSON value, invoking the given callback during parsing. The result is +/// constructed directly into the given reference rather than returned by value +/// to ensure that references passed through the parse callback (such as object +/// property names) remain valid after parsing completes. +/// +/// If parsing fails, sourcemeta::core::JSONParseError will be thrown. +SOURCEMETA_CORE_JSON_EXPORT +auto parse_json(std::basic_istream &stream, + JSON &output, const JSON::ParseCallback &callback) -> void; + +/// @ingroup json +/// +/// Parse a JSON document from a JSON string into an existing JSON value, +/// invoking the given callback during parsing. The result is constructed +/// directly into the given reference rather than returned by value to ensure +/// that references passed through the parse callback (such as object property +/// names) remain valid after parsing completes. +/// /// If parsing fails, sourcemeta::core::JSONParseError will be thrown. SOURCEMETA_CORE_JSON_EXPORT -auto read_json(const std::filesystem::path &path, - const JSON::ParseCallback &callback = nullptr) -> JSON; +auto parse_json(const std::basic_string &input, + JSON &output, const JSON::ParseCallback &callback) -> void; + +/// @ingroup json +/// +/// Parse a JSON document from a C++ standard input stream into an existing +/// JSON value, passing your own `line` and `column` read/write position +/// indicators and invoking the given callback during parsing. The result is +/// constructed directly into the given reference rather than returned by value +/// to ensure that references passed through the parse callback (such as object +/// property names) remain valid after parsing completes. +SOURCEMETA_CORE_JSON_EXPORT +auto parse_json(std::basic_istream &stream, + std::uint64_t &line, std::uint64_t &column, JSON &output, + const JSON::ParseCallback &callback) -> void; + +/// @ingroup json +/// +/// Parse a JSON document from a JSON string into an existing JSON value, +/// passing your own `line` and `column` read/write position indicators and +/// invoking the given callback during parsing. The result is constructed +/// directly into the given reference rather than returned by value to ensure +/// that references passed through the parse callback (such as object property +/// names) remain valid after parsing completes. +SOURCEMETA_CORE_JSON_EXPORT +auto parse_json(const std::basic_string &input, + std::uint64_t &line, std::uint64_t &column, JSON &output, + const JSON::ParseCallback &callback) -> void; + +/// @ingroup json +/// +/// A convenience function to parse a JSON document from a file into an existing +/// JSON value, invoking the given callback during parsing. The result is +/// constructed directly into the given reference rather than returned by value +/// to ensure that references passed through the parse callback (such as object +/// property names) remain valid after parsing completes. +/// +/// If parsing fails, sourcemeta::core::JSONFileParseError will be thrown. +SOURCEMETA_CORE_JSON_EXPORT +auto read_json(const std::filesystem::path &path, JSON &output, + const JSON::ParseCallback &callback) -> void; /// @ingroup json /// @@ -228,9 +294,43 @@ auto operator<<(std::basic_ostream &stream, /// {sourcemeta::core::JSON::Type::Object, /// sourcemeta::core::JSON::Type::Array}); /// ``` -SOURCEMETA_CORE_JSON_EXPORT -auto make_set(std::initializer_list types) -> JSON::TypeSet; +SOURCEMETA_FORCEINLINE inline auto +make_set(std::initializer_list types) -> JSON::TypeSet { + JSON::TypeSet result; + for (const auto type : types) { + result.set(static_cast(type)); + } + return result; +} } // namespace sourcemeta::core +template <> struct std::formatter { + constexpr auto parse(std::format_parse_context &context) + -> decltype(context.begin()) { + return context.begin(); + } + + auto format(const sourcemeta::core::JSON &value, + std::format_context &context) const -> decltype(context.out()) { + std::ostringstream stream; + stream << value; + return std::format_to(context.out(), "{}", stream.str()); + } +}; + +template <> struct std::formatter { + constexpr auto parse(std::format_parse_context &context) + -> decltype(context.begin()) { + return context.begin(); + } + + auto format(const sourcemeta::core::JSON::Type value, + std::format_context &context) const -> decltype(context.out()) { + std::ostringstream stream; + stream << value; + return std::format_to(context.out(), "{}", stream.str()); + } +}; + #endif diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_array.h b/vendor/core/src/core/json/include/sourcemeta/core/json_array.h index 808cafb..f719449 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_array.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_array.h @@ -97,6 +97,11 @@ template class JSONArray { return this->data.size(); } + /// Reserve capacity for a given number of elements + auto reserve(const size_type capacity) -> void { + this->data.reserve(capacity); + } + private: friend Value; // Exporting symbols that depends on the standard C++ library is considered diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_auto.h b/vendor/core/src/core/json/include/sourcemeta/core/json_auto.h index 0597671..1e2bb61 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_auto.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_auto.h @@ -3,27 +3,24 @@ #include -#include // std::sort -#include // std::bitset -#include // assert -#include // std::chrono -#include // std::same_as, std::constructible_from +#include // std::sort +#include // std::bitset +#include // assert +#include // std::chrono +#include // std::same_as, std::constructible_from, std::invocable, std::invocable #include // std::filesystem #include // std::function #include // std::optional, std::nullopt, std::bad_optional_access #include // std::tuple, std::apply, std::tuple_element_t, std::tuple_size, std::tuple_size_v -#include // std::false_type, std::true_type, std::void_t, std::is_enum_v, std::underlying_type_t, std::is_same_v, std::is_base_of_v, std::remove_cvref_t +#include // std::false_type, std::true_type, std::is_enum_v, std::underlying_type_t, std::is_same_v, std::is_base_of_v, std::remove_cvref_t #include // std::pair, std:::make_index_sequence, std::index_sequence #include // std::variant, std::variant_size_v, std::variant_alternative_t, std::visit namespace sourcemeta::core { /// @ingroup json -template -struct json_auto_has_mapped_type : std::false_type {}; template -struct json_auto_has_mapped_type> - : std::true_type {}; +concept json_auto_has_mapped_type = requires { typename T::mapped_type; }; /// @ingroup json template struct json_auto_is_basic_string : std::false_type {}; @@ -57,14 +54,10 @@ concept json_auto_has_method_to = requires(const T value) { /// @ingroup json /// Container-like classes can opt-out from automatic JSON /// serialisation by setting `using json_auto = std::false_type;` -template -struct json_auto_supports_auto_impl : std::true_type {}; -template -struct json_auto_supports_auto_impl> - : std::bool_constant< - !std::is_same_v> {}; template -concept json_auto_supports_auto = json_auto_supports_auto_impl::value; +concept json_auto_supports_auto = !requires { + typename T::json_auto; +} || !std::is_same_v; /// @ingroup json template @@ -74,7 +67,7 @@ concept json_auto_list_like = typename T::const_iterator; { type.cbegin() } -> std::same_as; { type.cend() } -> std::same_as; - } && json_auto_supports_auto && !json_auto_has_mapped_type::value && + } && json_auto_supports_auto && !json_auto_has_mapped_type && !json_auto_has_method_from && !json_auto_has_method_to && !json_auto_is_basic_string::value; @@ -87,19 +80,14 @@ concept json_auto_map_like = typename T::key_type; { type.cbegin() } -> std::same_as; { type.cend() } -> std::same_as; - } && json_auto_supports_auto && json_auto_has_mapped_type::value && + } && json_auto_supports_auto && json_auto_has_mapped_type && !json_auto_has_method_from && !json_auto_has_method_to && std::is_same_v; -/// @ingroup json -template -struct json_auto_has_reverse_iterator : std::false_type {}; - /// @ingroup json template -struct json_auto_has_reverse_iterator> - : std::true_type {}; +concept json_auto_has_reverse_iterator = + requires { typename T::reverse_iterator; }; /// @ingroup json template struct json_auto_is_pair : std::false_type {}; @@ -202,17 +190,10 @@ template requires std::is_same_v auto to_json(const T &hash) -> JSON { auto result{JSON::make_array()}; -#if defined(__SIZEOF_INT128__) result.push_back(JSON{static_cast(hash.a >> 64)}); result.push_back(JSON{static_cast(hash.a)}); result.push_back(JSON{static_cast(hash.b >> 64)}); result.push_back(JSON{static_cast(hash.b)}); -#else - result.push_back(JSON{static_cast(hash.a)}); - result.push_back(JSON{static_cast(hash.b)}); - result.push_back(JSON{static_cast(hash.c)}); - result.push_back(JSON{static_cast(hash.d)}); -#endif return result; } @@ -228,21 +209,17 @@ auto from_json(const JSON &value) -> std::optional { return std::nullopt; } -#if defined(__SIZEOF_INT128__) - return T{(static_cast<__uint128_t>( + using uint128_type = JSON::Object::hash_type::type; + return T{(static_cast( static_cast(value.at(0).to_integer())) << 64) | - static_cast(value.at(1).to_integer()), - (static_cast<__uint128_t>( + static_cast( + static_cast(value.at(1).to_integer())), + (static_cast( static_cast(value.at(2).to_integer())) << 64) | - static_cast(value.at(3).to_integer())}; -#else - return T{static_cast(value.at(0).to_integer()), - static_cast(value.at(1).to_integer()), - static_cast(value.at(2).to_integer()), - static_cast(value.at(3).to_integer())}; -#endif + static_cast( + static_cast(value.at(3).to_integer()))}; } /// @ingroup json @@ -404,7 +381,7 @@ auto to_json(typename T::const_iterator begin, typename T::const_iterator end) } // To guarantee ordering across implementations - if constexpr (!json_auto_has_reverse_iterator::value) { + if constexpr (!json_auto_has_reverse_iterator) { std::sort(result.as_array().begin(), result.as_array().end()); } @@ -412,11 +389,10 @@ auto to_json(typename T::const_iterator begin, typename T::const_iterator end) } /// @ingroup json -template -auto to_json( - typename T::const_iterator begin, typename T::const_iterator end, - const std::function &callback) - -> JSON { +template F> +auto to_json(typename T::const_iterator begin, typename T::const_iterator end, + const F &callback) -> JSON { // TODO: Extend `make_array` to optionally take iterators, etc auto result{JSON::make_array()}; for (auto iterator = begin; iterator != end; ++iterator) { @@ -424,7 +400,7 @@ auto to_json( } // To guarantee ordering across implementations - if constexpr (!json_auto_has_reverse_iterator::value) { + if constexpr (!json_auto_has_reverse_iterator) { std::sort(result.as_array().begin(), result.as_array().end()); } @@ -437,11 +413,9 @@ template auto to_json(const T &value) -> JSON { } /// @ingroup json -template -auto to_json( - const T &value, - const std::function &callback) - -> JSON { +template F> +auto to_json(const T &value, const F &callback) -> JSON { return to_json(value.cbegin(), value.cend(), callback); } @@ -524,11 +498,10 @@ template auto to_json(const T &value) -> JSON { } /// @ingroup json -template -auto to_json( - typename T::const_iterator begin, typename T::const_iterator end, - const std::function &callback) - -> JSON { +template F> +auto to_json(typename T::const_iterator begin, typename T::const_iterator end, + const F &callback) -> JSON { auto result{JSON::make_object()}; for (auto iterator = begin; iterator != end; ++iterator) { result.assign(iterator->first, callback(iterator->second)); @@ -581,11 +554,9 @@ auto from_json( } /// @ingroup json -template -auto to_json( - const T &value, - const std::function &callback) - -> JSON { +template F> +auto to_json(const T &value, const F &callback) -> JSON { return to_json(value.cbegin(), value.cend(), callback); } diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_hash.h b/vendor/core/src/core/json/include/sourcemeta/core/json_hash.h index 49ee106..728fdc9 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_hash.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_hash.h @@ -1,8 +1,9 @@ #ifndef SOURCEMETA_CORE_JSON_HASH_H_ #define SOURCEMETA_CORE_JSON_HASH_H_ +#include + #include // assert -#include // std::uint64_t #include // std::memcpy #include // std::reference_wrapper @@ -29,111 +30,98 @@ template struct HashJSON { /// @ingroup json template struct PropertyHashJSON { struct hash_type { - // For performance when the platform allows it -#if defined(__SIZEOF_INT128__) - using type = __uint128_t; - type a{0}; - type b{0}; -#else - using type = std::uint64_t; + using type = sourcemeta::core::uint128_t; type a{0}; type b{0}; - type c{0}; - type d{0}; -#endif - inline auto operator==(const hash_type &other) const noexcept -> bool { -#if defined(__SIZEOF_INT128__) - return this->a == other.a && this->b == other.b; -#else - return this->a == other.a && this->b == other.b && this->c == other.c && - this->d == other.d; -#endif - } + auto operator==(const hash_type &) const noexcept -> bool = default; }; [[nodiscard]] - inline auto perfect(const T &value, const std::size_t size) const noexcept + inline auto perfect(const char *data, const std::size_t size) const noexcept -> hash_type { hash_type result; - assert(!value.empty()); - // Copy starting a byte 2 - std::memcpy(reinterpret_cast(&result) + 1, value.data(), size); + assert(size > 0); + std::memcpy(reinterpret_cast(&result) + 1, data, size); return result; } + // GCC does not optimise well across implicit type conversions such as + // std::string to std::string_view, so we provide separate overloads with + // duplicated logic instead of unifying on a single parameter type + inline auto operator()(const T &value) const noexcept -> hash_type { const auto size{value.size()}; switch (size) { case 0: return {}; case 1: - return this->perfect(value, 1); + return this->perfect(value.data(), 1); case 2: - return this->perfect(value, 2); + return this->perfect(value.data(), 2); case 3: - return this->perfect(value, 3); + return this->perfect(value.data(), 3); case 4: - return this->perfect(value, 4); + return this->perfect(value.data(), 4); case 5: - return this->perfect(value, 5); + return this->perfect(value.data(), 5); case 6: - return this->perfect(value, 6); + return this->perfect(value.data(), 6); case 7: - return this->perfect(value, 7); + return this->perfect(value.data(), 7); case 8: - return this->perfect(value, 8); + return this->perfect(value.data(), 8); case 9: - return this->perfect(value, 9); + return this->perfect(value.data(), 9); case 10: - return this->perfect(value, 10); + return this->perfect(value.data(), 10); case 11: - return this->perfect(value, 11); + return this->perfect(value.data(), 11); case 12: - return this->perfect(value, 12); + return this->perfect(value.data(), 12); case 13: - return this->perfect(value, 13); + return this->perfect(value.data(), 13); case 14: - return this->perfect(value, 14); + return this->perfect(value.data(), 14); case 15: - return this->perfect(value, 15); + return this->perfect(value.data(), 15); case 16: - return this->perfect(value, 16); + return this->perfect(value.data(), 16); case 17: - return this->perfect(value, 17); + return this->perfect(value.data(), 17); case 18: - return this->perfect(value, 18); + return this->perfect(value.data(), 18); case 19: - return this->perfect(value, 19); + return this->perfect(value.data(), 19); case 20: - return this->perfect(value, 20); + return this->perfect(value.data(), 20); case 21: - return this->perfect(value, 21); + return this->perfect(value.data(), 21); case 22: - return this->perfect(value, 22); + return this->perfect(value.data(), 22); case 23: - return this->perfect(value, 23); + return this->perfect(value.data(), 23); case 24: - return this->perfect(value, 24); + return this->perfect(value.data(), 24); case 25: - return this->perfect(value, 25); + return this->perfect(value.data(), 25); case 26: - return this->perfect(value, 26); + return this->perfect(value.data(), 26); case 27: - return this->perfect(value, 27); + return this->perfect(value.data(), 27); case 28: - return this->perfect(value, 28); + return this->perfect(value.data(), 28); case 29: - return this->perfect(value, 29); + return this->perfect(value.data(), 29); case 30: - return this->perfect(value, 30); + return this->perfect(value.data(), 30); case 31: - return this->perfect(value, 31); + return this->perfect(value.data(), 31); default: // This case is specifically designed to be constant with regards to // string length, and to exploit the fact that most JSON objects don't // have a lot of entries, so hash collision is not as common - auto hash = this->perfect(value, 31); + auto hash = this->perfect(value.data(), 31); hash.a |= 1 + (size + static_cast(value.front()) + static_cast(value.back())) % @@ -143,6 +131,86 @@ template struct PropertyHashJSON { } } + inline auto operator()(const char *data, + const std::size_t size) const noexcept -> hash_type { + switch (size) { + case 0: + return {}; + case 1: + return this->perfect(data, 1); + case 2: + return this->perfect(data, 2); + case 3: + return this->perfect(data, 3); + case 4: + return this->perfect(data, 4); + case 5: + return this->perfect(data, 5); + case 6: + return this->perfect(data, 6); + case 7: + return this->perfect(data, 7); + case 8: + return this->perfect(data, 8); + case 9: + return this->perfect(data, 9); + case 10: + return this->perfect(data, 10); + case 11: + return this->perfect(data, 11); + case 12: + return this->perfect(data, 12); + case 13: + return this->perfect(data, 13); + case 14: + return this->perfect(data, 14); + case 15: + return this->perfect(data, 15); + case 16: + return this->perfect(data, 16); + case 17: + return this->perfect(data, 17); + case 18: + return this->perfect(data, 18); + case 19: + return this->perfect(data, 19); + case 20: + return this->perfect(data, 20); + case 21: + return this->perfect(data, 21); + case 22: + return this->perfect(data, 22); + case 23: + return this->perfect(data, 23); + case 24: + return this->perfect(data, 24); + case 25: + return this->perfect(data, 25); + case 26: + return this->perfect(data, 26); + case 27: + return this->perfect(data, 27); + case 28: + return this->perfect(data, 28); + case 29: + return this->perfect(data, 29); + case 30: + return this->perfect(data, 30); + case 31: + return this->perfect(data, 31); + default: + // This case is specifically designed to be constant with regards to + // string length, and to exploit the fact that most JSON objects don't + // have a lot of entries, so hash collision is not as common + auto hash = this->perfect(data, 31); + hash.a |= 1 + (size + static_cast(data[0]) + + static_cast(data[size - 1])) % + // Make sure the property hash can never exceed 8 bits + 255; + return hash; + } + } + [[nodiscard]] inline auto is_perfect(const hash_type &hash) const noexcept -> bool { // If there is anything written past the first byte, diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_object.h b/vendor/core/src/core/json/include/sourcemeta/core/json_object.h index 86da877..c364faa 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_object.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_object.h @@ -6,7 +6,7 @@ #include // std::size_t #include // std::initializer_list #include // std::advance -#include // std::pair, std::move +#include // std::pair, std::move, std::unreachable #include // std::vector namespace sourcemeta::core { @@ -124,6 +124,13 @@ template class JSONObject { return this->hasher(key); } + /// Compute a hash from raw data + [[nodiscard]] inline auto hash(const char *raw_data, + const std::size_t raw_size) const noexcept + -> hash_type { + return hasher(raw_data, raw_size); + } + /// Attempt to find an entry by key [[nodiscard]] inline auto find(const Key &key) const -> const_iterator { const auto key_hash{this->hash(key)}; @@ -183,6 +190,11 @@ template class JSONObject { /// Check if the object is empty [[nodiscard]] inline auto empty() const -> bool { return this->data.empty(); } + /// Reserve capacity for a given number of entries + inline auto reserve(const size_type capacity) -> void { + this->data.reserve(capacity); + } + /// Access an object entry by its underlying positional index [[nodiscard]] inline auto at(const size_type index) const -> const Entry & { return this->data.at(index); @@ -208,12 +220,7 @@ template class JSONObject { } } -// See https://en.cppreference.com/w/cpp/utility/unreachable -#if defined(_MSC_VER) && !defined(__clang__) - __assume(false); -#else - __builtin_unreachable(); -#endif + std::unreachable(); } /// Access an object entry by its key name @@ -235,12 +242,7 @@ template class JSONObject { } } -// See https://en.cppreference.com/w/cpp/utility/unreachable -#if defined(_MSC_VER) && !defined(__clang__) - __assume(false); -#else - __builtin_unreachable(); -#endif + std::unreachable(); } /// Try to access an object entry by its underlying positional index @@ -365,6 +367,24 @@ template class JSONObject { return key_hash; } + /// Emplace an object property with a pre-computed hash + inline auto emplace_assume_new(Key &&key, mapped_type &&value, + const hash_type key_hash) -> void { + this->data.push_back({std::move(key), std::move(value), key_hash}); + } + + /// Emplace an object property with a pre-computed hash + inline auto emplace_assume_new(const Key &key, mapped_type &&value, + const hash_type key_hash) -> void { + this->data.push_back({key, std::move(value), key_hash}); + } + + /// Get the key of the last-inserted property + [[nodiscard]] inline auto back_key() const noexcept -> const Key & { + assert(!this->data.empty()); + return this->data.back().first; + } + /// Remove every property in the object inline auto clear() noexcept -> void { this->data.clear(); } @@ -437,7 +457,7 @@ template class JSONObject { #if defined(_MSC_VER) #pragma warning(disable : 4251) #endif - Hash hasher; + static constexpr Hash hasher{}; underlying_type data; #if defined(_MSC_VER) #pragma warning(default : 4251) diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_value.h b/vendor/core/src/core/json/include/sourcemeta/core/json_value.h index 92689ae..47ffa24 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_value.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_value.h @@ -10,10 +10,12 @@ #include #include +#include #include // std::any_of #include // std::bitset #include // assert +#include // std::modf, std::trunc, std::isinf, std::isnan #include // std::size_t #include // std::int64_t, std::uint8_t #include // std::less, std::reference_wrapper, std::function @@ -23,7 +25,7 @@ #include // std::basic_istringstream #include // std::basic_string, std::char_traits #include // std::basic_string_view -#include // std::enable_if_t, std::is_same_v +#include // std::is_same_v #include // std::pair namespace sourcemeta::core { @@ -76,7 +78,7 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { using ParseCallback = std::function; + const std::size_t index, const String &property)>; /// A comparison function between object property keys. /// See https://en.cppreference.com/w/cpp/named_req/Compare @@ -372,7 +374,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{true}; /// assert(document.is_boolean()); /// ``` - [[nodiscard]] auto is_boolean() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_boolean() const noexcept + -> bool { + return this->current_type == Type::Boolean; + } /// Check if the input JSON document is null. For example: /// @@ -383,7 +388,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{nullptr}; /// assert(document.is_null()); /// ``` - [[nodiscard]] auto is_null() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_null() const noexcept + -> bool { + return this->current_type == Type::Null; + } /// Check if the input JSON document is an integer. For example: /// @@ -394,7 +402,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{5}; /// assert(document.is_integer()); /// ``` - [[nodiscard]] auto is_integer() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_integer() const noexcept + -> bool { + return this->current_type == Type::Integer; + } /// Check if the input JSON document is a real type. For example: /// @@ -405,7 +416,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{3.14}; /// assert(document.is_real()); /// ``` - [[nodiscard]] auto is_real() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_real() const noexcept + -> bool { + return this->current_type == Type::Real; + } /// Check if the input JSON document is an integer, a real number that /// represents an integer, or an integer decimal. For example: @@ -417,7 +431,21 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{5.0}; /// assert(document.is_integral()); /// ``` - [[nodiscard]] auto is_integral() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_integral() const noexcept + -> bool { + switch (this->type()) { + case Type::Integer: + return true; + case Type::Real: { + Real integral_part = 0.0; + return std::modf(this->to_real(), &integral_part) == 0.0; + } + case Type::Decimal: + return this->to_decimal().is_integral(); + default: + return false; + } + } /// Check if the input JSON document is either an integer or a real type. For /// example: @@ -431,7 +459,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(real.is_number()); /// assert(integer.is_number()); /// ``` - [[nodiscard]] auto is_number() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_number() const noexcept + -> bool { + return this->is_integer() || this->is_real() || this->is_decimal(); + } /// Check if the input JSON document is either a positive integer or a /// positive real number. Zero is considered to be positive. For example: @@ -456,7 +487,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{"foo"}; /// assert(document.is_string()); /// ``` - [[nodiscard]] auto is_string() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_string() const noexcept + -> bool { + return this->current_type == Type::String; + } /// Check if the input JSON document is an array. For example: /// @@ -468,7 +502,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// document=sourcemeta::core::parse_json("[ 1, 2, 3 ]"); /// assert(document.is_array()); /// ``` - [[nodiscard]] auto is_array() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_array() const noexcept + -> bool { + return this->current_type == Type::Array; + } /// Check if the input JSON document is an object. For example: /// @@ -480,7 +517,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// document=sourcemeta::core::parse_json("{ \"foo\": 1 }"); /// assert(document.is_object()); /// ``` - [[nodiscard]] auto is_object() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_object() const noexcept + -> bool { + return this->current_type == Type::Object; + } /// Check if the input JSON document is an arbitrary precision decimal value. /// For example: @@ -493,7 +533,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{value}; /// assert(document.is_decimal()); /// ``` - [[nodiscard]] auto is_decimal() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_decimal() const noexcept + -> bool { + return this->current_type == Type::Decimal; + } /// Get the type of the JSON document. For example: /// @@ -504,7 +547,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{true}; /// assert(document.type() == sourcemeta::core::JSON::Type::Boolean); /// ``` - [[nodiscard]] auto type() const noexcept -> Type; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto type() const noexcept + -> Type { + return this->current_type; + } /* * Type conversion @@ -521,7 +567,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.is_boolean()); /// assert(document.to_boolean()); /// ``` - [[nodiscard]] auto to_boolean() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto to_boolean() const noexcept + -> bool { + assert(this->is_boolean()); + return this->data_boolean; + } /// Convert a JSON instance into a signed integer value. The result of this /// method is undefined unless the JSON instance holds an integer value. For @@ -535,7 +585,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.is_integer()); /// assert(document.to_integer() == 5); /// ``` - [[nodiscard]] auto to_integer() const noexcept -> Integer; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto to_integer() const noexcept + -> Integer { + assert(this->is_integer()); + return this->data_integer; + } /// Convert a JSON instance into an IEEE 64-bit floating-point value. The /// result of this method is undefined unless the JSON instance holds a real @@ -549,7 +603,13 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.is_real()); /// assert(document.to_real() == 3.14); /// ``` - [[nodiscard]] auto to_real() const noexcept -> Real; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto to_real() const noexcept + -> Real { + assert(this->is_real()); + assert(!std::isinf(this->data_real)); + assert(!std::isnan(this->data_real)); + return this->data_real; + } /// Convert a JSON instance into a decimal value. The result of this method /// is undefined unless the JSON instance holds a decimal value. For example: @@ -563,7 +623,13 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.is_decimal()); /// assert(document.to_decimal().to_int64() == 1234567890); /// ``` - [[nodiscard]] auto to_decimal() const noexcept -> const Decimal &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto to_decimal() const noexcept + -> const Decimal & { + assert(this->is_decimal()); + assert(this->data_decimal->is_finite()); + assert(!this->data_decimal->is_nan()); + return *this->data_decimal; + } /// Convert a JSON instance into a standard string value. The result of this /// method is undefined unless the JSON instance holds a string value. For @@ -577,7 +643,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.is_string()); /// assert(document.to_string() == "foo"); /// ``` - [[nodiscard]] auto to_string() const noexcept -> const String &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto to_string() const noexcept + -> const String & { + assert(this->is_string()); + return this->data_string; + } /// Get a standard input string stream from a JSON string. The result of this /// method is undefined unless the JSON instance holds a string value. For @@ -613,7 +683,14 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// << "\n"; /// }); /// ``` - [[nodiscard]] auto as_array() const noexcept -> const Array &; + // TODO: Merge const/non-const overloads of as_array, as_object, at, front, + // back using deducing this once Apple Clang supports it + // (__cpp_explicit_this_parameter) + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto as_array() const noexcept + -> const Array & { + assert(this->is_array()); + return this->data_array; + } /// Get the JSON document as an array instance. This is convenient /// for using mutable iterators on the array. For example: @@ -627,7 +704,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("[ 1, 2, 3 ]"); /// std::sort(document.as_array().begin(), document.as_array().end()); /// ``` - [[nodiscard]] auto as_array() noexcept -> Array &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto as_array() noexcept + -> Array & { + assert(this->is_array()); + return this->data_array; + } /// Get the JSON document as an object instance. This is convenient /// for using constant iterators on the object. For example: @@ -651,7 +732,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// << "\n"; /// }); /// ``` - [[nodiscard]] auto as_object() noexcept -> Object &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto as_object() noexcept + -> Object & { + assert(this->is_object()); + return this->data_object; + } /// Get the JSON document as an object instance. This is convenient /// for using mutable iterators on the object. For example: @@ -671,7 +756,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// value += sourcemeta::core::JSON{1}; /// } /// ``` - [[nodiscard]] auto as_object() const noexcept -> const Object &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto as_object() const noexcept + -> const Object & { + assert(this->is_object()); + return this->data_object; + } /// Get the JSON numeric document as a real number if it is not one already. /// For example: @@ -683,7 +772,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{5}; /// assert(document.as_real() == 5.0); /// ``` - [[nodiscard]] auto as_real() const noexcept -> Real; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto as_real() const noexcept + -> Real { + assert(this->is_number()); + return this->is_real() ? this->to_real() + : static_cast(this->to_integer()); + } /// Get the JSON numeric document as an integer number if it is not one /// already. If the number is a real number, truncation will take place. For @@ -696,7 +790,15 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{5.3}; /// assert(document.as_integer() == 5); /// ``` - [[nodiscard]] auto as_integer() const noexcept -> Integer; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto as_integer() const noexcept + -> Integer { + assert(this->is_number()); + if (this->is_integer()) { + return this->to_integer(); + } else { + return static_cast(std::trunc(this->to_real())); + } + } /* * Getters @@ -720,8 +822,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("{ \"1\": "foo" }"); /// assert(my_array.at(1).to_string() == "foo"); /// ``` - [[nodiscard]] auto at(const typename Array::size_type index) const - -> const JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + at(const typename Array::size_type index) const -> const JSON & { + assert(this->is_array()); + assert(index < this->size()); + return this->data_array.data.at(index); + } /// This method retrieves a element by its index. If the input JSON instance /// is an object, a property that corresponds to the stringified integer will @@ -741,7 +847,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("{ \"1\": "foo" }"); /// assert(my_array.at(1).to_string() == "foo"); /// ``` - [[nodiscard]] auto at(const typename Array::size_type index) -> JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + at(const typename Array::size_type index) -> JSON & { + assert(this->is_array()); + assert(index < this->size()); + return this->data_array.data.at(index); + } /// This method retrieves an object element. /// @@ -755,7 +866,13 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("{ \"foo\": 1, \"bar\": 2 }"); /// assert(my_object.at("bar").to_integer() == 2); /// ``` - [[nodiscard]] auto at(const String &key) const -> const JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto at(const String &key) const + -> const JSON & { + assert(this->is_object()); + assert(this->defines(key)); + const auto &object{this->data_object}; + return object.at(key, object.hash(key)); + } /// This method retrieves an object element given a pre-calculated property /// hash. @@ -771,9 +888,13 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(my_object.at("bar", /// my_object.as_object().hash("bar")).to_integer() == 2); /// ``` - [[nodiscard]] auto at(const String &key, - const typename Object::hash_type hash) const - -> const JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + at(const String &key, const typename Object::hash_type hash) const + -> const JSON & { + assert(this->is_object()); + assert(this->defines(key)); + return this->data_object.at(key, hash); + } /// This method retrieves an object element. /// @@ -787,7 +908,13 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("{ \"foo\": 1, \"bar\": 2 }"); /// assert(my_object.at("bar").to_integer() == 2); /// ``` - [[nodiscard]] auto at(const String &key) -> JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto at(const String &key) + -> JSON & { + assert(this->is_object()); + assert(this->defines(key)); + auto &object{this->data_object}; + return object.at(key, object.hash(key)); + } /// This method retrieves an object element given a pre-calculated property /// hash. @@ -803,8 +930,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(my_object.at("bar", /// my_object.as_object().hash("bar")).to_integer() == 2); /// ``` - [[nodiscard]] auto at(const String &key, - const typename Object::hash_type hash) -> JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + at(const String &key, const typename Object::hash_type hash) -> JSON & { + assert(this->is_object()); + assert(this->defines(key)); + return this->data_object.at(key, hash); + } /// This method retrieves an object property or a user provided value if such /// property is not defined. @@ -866,7 +997,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("[ 1, 2, 3 ]"); /// assert(document.front().to_integer() == 1); /// ``` - [[nodiscard]] auto front() -> JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto front() -> JSON & { + assert(this->is_array()); + assert(!this->empty()); + return this->data_array.data.front(); + } /// This method retrieves a reference to the first element of a JSON array. /// This method is undefined if the input JSON instance is an empty array. For @@ -880,7 +1015,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("[ 1, 2, 3 ]"); /// assert(document.front().to_integer() == 1); /// ``` - [[nodiscard]] auto front() const -> const JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto front() const + -> const JSON & { + assert(this->is_array()); + assert(!this->empty()); + return this->data_array.data.front(); + } /// This method retrieves a reference to the last element of a JSON array. /// This method is undefined if the input JSON instance is an empty array. For @@ -894,7 +1034,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("[ 1, 2, 3 ]"); /// assert(document.back().to_integer() == 3); /// ``` - [[nodiscard]] auto back() -> JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto back() -> JSON & { + assert(this->is_array()); + assert(!this->empty()); + return this->data_array.data.back(); + } /// This method retrieves a reference to the last element of a JSON array. /// This method is undefined if the input JSON instance is an empty array. For @@ -908,7 +1052,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("[ 1, 2, 3 ]"); /// assert(document.back().to_integer() == 3); /// ``` - [[nodiscard]] auto back() const -> const JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto back() const + -> const JSON & { + assert(this->is_array()); + assert(!this->empty()); + return this->data_array.data.back(); + } /* * Read operations @@ -934,7 +1083,15 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(my_array.size() == 2); /// assert(my_string.size() == 3); /// ``` - [[nodiscard]] auto size() const -> std::size_t; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto size() const -> std::size_t { + if (this->is_object()) { + return this->object_size(); + } else if (this->is_array()) { + return this->array_size(); + } else { + return this->string_size(); + } + } /// If the input JSON instance is a string, return its logical length. /// @@ -947,7 +1104,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON my_string{"foo"}; /// assert(my_string.string_size() == 3); /// ``` - [[nodiscard]] auto string_size() const -> std::size_t; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto string_size() const + -> std::size_t { + assert(this->is_string()); + return JSON::size(this->data_string); + } /// If the input JSON instance is an array, return its number of elements. /// @@ -961,7 +1122,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("[ 1, 2 ]"); /// assert(my_array.array_size() == 2); /// ``` - [[nodiscard]] auto array_size() const -> std::size_t; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto array_size() const + -> std::size_t { + assert(this->is_array()); + return this->data_array.data.size(); + } /// If the input JSON instance is an object, return its number of pairs. /// @@ -975,7 +1140,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("{ \"foo\": 1 }"); /// assert(my_object.object_size() == 1); /// ``` - [[nodiscard]] auto object_size() const -> std::size_t; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto object_size() const + -> std::size_t { + assert(this->is_object()); + return this->data_object.size(); + } /// If the input JSON instance is string, input JSON instance is a string, /// return its number of bytes. For example: @@ -988,7 +1157,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("\"\\uD83D\\uDCA9\"")}; /// assert(my_string.size() == 2); /// ``` - [[nodiscard]] auto byte_size() const -> std::size_t; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto byte_size() const + -> std::size_t { + assert(this->is_string()); + return this->data_string.size(); + } /// Estimate the byte size occupied by the given parsed JSON instance (not its /// stringified representation). Keep in mind that as the method name implies, @@ -1056,7 +1229,15 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(my_array.empty()); /// assert(my_string.empty()); /// ``` - [[nodiscard]] auto empty() const -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto empty() const -> bool { + if (this->is_object()) { + return this->data_object.empty(); + } else if (this->is_array()) { + return this->data_array.data.empty(); + } else { + return this->data_string.empty(); + } + } /// This method checks whether an input JSON object defines a specific key /// and returns the value if it does. For example: @@ -1071,7 +1252,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const auto result = document.try_at("foo"); /// EXPECT_TRUE(result); /// EXPECT_EQ(result->to_integer(), 1); - [[nodiscard]] auto try_at(const String &key) const -> const JSON *; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + try_at(const String &key) const -> const JSON * { + assert(this->is_object()); + const auto &object{this->data_object}; + return object.try_at(key, object.hash(key)); + } /// This method checks, given a pre-calculated hash, whether an input JSON /// object defines a specific key and returns the value if it does. For @@ -1088,9 +1274,13 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// document.as_object().hash("foo")); /// EXPECT_TRUE(result); /// EXPECT_EQ(result->to_integer(), 1); - [[nodiscard]] auto try_at(const String &key, - const typename Object::hash_type hash) const - -> const JSON *; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + try_at(const String &key, const typename Object::hash_type hash) const + -> const JSON * { + assert(this->is_object()); + const auto &object{this->data_object}; + return object.try_at(key, hash); + } /// This method checks whether an input JSON object defines a specific key. /// For example: @@ -1104,7 +1294,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.defines("foo")); /// assert(!document.defines("bar")); /// ``` - [[nodiscard]] auto defines(const String &key) const -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + defines(const String &key) const -> bool { + assert(this->is_object()); + const auto &object{this->data_object}; + return object.defines(key, object.hash(key)); + } /// This method checks whether an input JSON object defines a specific key /// given a pre-calculated property hash. For example: @@ -1120,9 +1315,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.defines("bar", /// document.as_object().hash("bar"))); /// ``` - [[nodiscard]] auto defines(const String &key, - const typename Object::hash_type hash) const - -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + defines(const String &key, const typename Object::hash_type hash) const + -> bool { + assert(this->is_object()); + return this->data_object.defines(key, hash); + } /// This method checks whether an input JSON object defines a specific integer /// key. For example: @@ -1136,8 +1334,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.defines(0)); /// assert(!document.defines(1)); /// ``` - [[nodiscard]] auto defines(const typename Array::size_type index) const - -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + defines(const typename Array::size_type index) const -> bool { + return this->defines(std::to_string(index)); + } /// This method checks whether an input JSON object defines at least one given /// key. @@ -1438,6 +1638,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// ``` auto assign_assume_new(String &&key, JSON &&value) -> void; + /// This method sets an object key with a pre-computed hash + auto assign_assume_new(String &&key, JSON &&value, Object::hash_type hash) + -> void; + /// This method deletes an object key. For example: /// /// ```cpp diff --git a/vendor/core/src/core/json/json.cc b/vendor/core/src/core/json/json.cc index 4122480..4a62973 100644 --- a/vendor/core/src/core/json/json.cc +++ b/vendor/core/src/core/json/json.cc @@ -4,6 +4,7 @@ #include #include +#include "construct.h" #include "parser.h" #include "stringify.h" @@ -13,43 +14,169 @@ #include // std::ifstream #include // std::basic_istream #include // std::basic_ostream +#include // std::basic_ostringstream #include // std::make_error_code, std::errc +#include // std::vector namespace sourcemeta::core { +static auto internal_parse_json(const char *&cursor, const char *end, + std::uint64_t &line, std::uint64_t &column, + const JSON::ParseCallback &callback, + const bool track_positions, JSON &output) + -> void { + const char *buffer_start{cursor}; + std::vector tape; + tape.reserve(static_cast(end - cursor) / 8); + if (callback || track_positions) { + scan_json(cursor, end, buffer_start, line, column, tape); + } else { + try { + scan_json(cursor, end, buffer_start, line, column, tape); + } catch (const JSONParseError &) { + cursor = buffer_start; + tape.clear(); + line = 1; + column = 0; + scan_json(cursor, end, buffer_start, line, column, tape); + } + } + construct_json(buffer_start, tape, callback, output); +} + +static auto internal_parse_json(const char *&cursor, const char *end, + std::uint64_t &line, std::uint64_t &column, + const bool track_positions) -> JSON { + JSON output{nullptr}; + internal_parse_json(cursor, end, line, column, nullptr, track_positions, + output); + return output; +} + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) auto parse_json(std::basic_istream &stream, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback) -> JSON { - return internal_parse_json(stream, line, column, callback); + std::uint64_t &line, std::uint64_t &column) -> JSON { + const auto start_position{stream.tellg()}; + std::basic_ostringstream buffer; + buffer << stream.rdbuf(); + const auto input{buffer.str()}; + const char *cursor{input.data()}; + const char *end{input.data() + input.size()}; + auto result{internal_parse_json(cursor, end, line, column, true)}; + if (start_position != static_cast(-1)) { + const auto consumed{static_cast(cursor - input.data())}; + stream.clear(); + stream.seekg(start_position + consumed); + } + + return result; } auto parse_json(const std::basic_string &input, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback) -> JSON { - return internal_parse_json(input, line, column, callback); + std::uint64_t &line, std::uint64_t &column) -> JSON { + const char *cursor{input.data()}; + return internal_parse_json(cursor, input.data() + input.size(), line, column, + true); +} + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +auto parse_json(std::basic_istream &stream) + -> JSON { + const auto start_position{stream.tellg()}; + std::basic_ostringstream buffer; + buffer << stream.rdbuf(); + const auto input{buffer.str()}; + const char *cursor{input.data()}; + const char *end{input.data() + input.size()}; + std::uint64_t line{1}; + std::uint64_t column{0}; + auto result{internal_parse_json(cursor, end, line, column, false)}; + if (start_position != static_cast(-1)) { + const auto consumed{static_cast(cursor - input.data())}; + stream.clear(); + stream.seekg(start_position + consumed); + } + return result; +} + +auto parse_json(const std::basic_string &input) + -> JSON { + std::uint64_t line{1}; + std::uint64_t column{0}; + const char *cursor{input.data()}; + return internal_parse_json(cursor, input.data() + input.size(), line, column, + false); +} + +auto read_json(const std::filesystem::path &path) -> JSON { + auto stream{read_file(path)}; + try { + return parse_json(stream); + } catch (const JSONParseError &error) { + // For producing better error messages + throw JSONFileParseError(path, error); + } +} + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +auto parse_json(std::basic_istream &stream, + std::uint64_t &line, std::uint64_t &column, JSON &output, + const JSON::ParseCallback &callback) -> void { + const auto start_position{stream.tellg()}; + std::basic_ostringstream buffer; + buffer << stream.rdbuf(); + const auto input{buffer.str()}; + const char *cursor{input.data()}; + const char *end{input.data() + input.size()}; + internal_parse_json(cursor, end, line, column, callback, true, output); + if (start_position != static_cast(-1)) { + const auto consumed{static_cast(cursor - input.data())}; + stream.clear(); + stream.seekg(start_position + consumed); + } +} + +auto parse_json(const std::basic_string &input, + std::uint64_t &line, std::uint64_t &column, JSON &output, + const JSON::ParseCallback &callback) -> void { + const char *cursor{input.data()}; + internal_parse_json(cursor, input.data() + input.size(), line, column, + callback, true, output); } // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) auto parse_json(std::basic_istream &stream, - const JSON::ParseCallback &callback) -> JSON { + JSON &output, const JSON::ParseCallback &callback) -> void { + const auto start_position{stream.tellg()}; + std::basic_ostringstream buffer; + buffer << stream.rdbuf(); + const auto input{buffer.str()}; + const char *cursor{input.data()}; + const char *end{input.data() + input.size()}; std::uint64_t line{1}; std::uint64_t column{0}; - return parse_json(stream, line, column, callback); + internal_parse_json(cursor, end, line, column, callback, false, output); + if (start_position != static_cast(-1)) { + const auto consumed{static_cast(cursor - input.data())}; + stream.clear(); + stream.seekg(start_position + consumed); + } } auto parse_json(const std::basic_string &input, - const JSON::ParseCallback &callback) -> JSON { + JSON &output, const JSON::ParseCallback &callback) -> void { std::uint64_t line{1}; std::uint64_t column{0}; - return parse_json(input, line, column, callback); + const char *cursor{input.data()}; + internal_parse_json(cursor, input.data() + input.size(), line, column, + callback, false, output); } -auto read_json(const std::filesystem::path &path, - const JSON::ParseCallback &callback) -> JSON { +auto read_json(const std::filesystem::path &path, JSON &output, + const JSON::ParseCallback &callback) -> void { auto stream{read_file(path)}; try { - return parse_json(stream, callback); + parse_json(stream, output, callback); } catch (const JSONParseError &error) { // For producing better error messages throw JSONFileParseError(path, error); @@ -106,12 +233,4 @@ auto operator<<(std::basic_ostream &stream, } } -auto make_set(std::initializer_list types) -> JSON::TypeSet { - JSON::TypeSet result; - for (const auto type : types) { - result.set(static_cast(type)); - } - return result; -} - } // namespace sourcemeta::core diff --git a/vendor/core/src/core/json/json_value.cc b/vendor/core/src/core/json/json_value.cc index 6140dc0..0feab58 100644 --- a/vendor/core/src/core/json/json_value.cc +++ b/vendor/core/src/core/json/json_value.cc @@ -1,19 +1,19 @@ #include #include -#include // std::find +#include // std::ranges::contains, std::ranges::fold_left #include // assert -#include // std::isinf, std::isnan, std::modf, std::trunc +#include // std::isinf, std::isnan, std::modf #include // std::size_t #include // std::int64_t #include // std::reference_wrapper #include // std::initializer_list -#include // std::transform +#include // std::construct_at #include // std::basic_istringstream #include // std::invalid_argument #include // std::to_string #include // std::basic_string_view -#include // std::move +#include // std::exchange, std::move #include // std::vector namespace sourcemeta::core { @@ -52,46 +52,45 @@ JSON::JSON(const bool value) : current_type{Type::Boolean} { JSON::JSON(const std::nullptr_t) {} JSON::JSON(const String &value) : current_type{Type::String} { - new (&this->data_string) String{value}; + std::construct_at(&this->data_string, value); } JSON::JSON(const std::basic_string_view &value) : current_type{Type::String} { - new (&this->data_string) String{value}; + std::construct_at(&this->data_string, value); } JSON::JSON(const Char *const value) : current_type{Type::String} { - new (&this->data_string) String{value}; + std::construct_at(&this->data_string, value); } JSON::JSON(std::initializer_list values) : current_type{Type::Array} { - new (&this->data_array) Array{values}; - -// For some reason, if we construct a JSON by passing a single -// JSON as argument, GCC and MSVC, in some circumstances will -// prefer this initializer list constructor over the default copy constructor, -// effectively creating an array of a single element. We couldn't find a nicer -// way to force them to pick the correct constructor. This is a hacky (and -// potentially inefficient?) way to "fix it up" to get consistent behavior -// across compilers. +// For direct-list-initialization (e.g. JSON x{other_json}), the C++ standard +// mandates that initializer_list constructors are preferred over copy/move +// constructors. GCC and MSVC follow this strictly, so a single-element brace +// init ends up here instead of the copy constructor. Handle this case before +// constructing the array to avoid an unnecessary heap allocation. #if defined(__GNUC__) || defined(_MSC_VER) if (values.size() == 1) { + this->current_type = Type::Null; this->operator=(*values.begin()); + return; } #endif + std::construct_at(&this->data_array, values); } JSON::JSON(const Array &value) : current_type{Type::Array} { - new (&this->data_array) Array{value}; + std::construct_at(&this->data_array, value); } JSON::JSON(std::initializer_list values) : current_type{Type::Object} { - new (&this->data_object) Object{values}; + std::construct_at(&this->data_object, values); } JSON::JSON(const Object &value) : current_type{Type::Object} { - new (&this->data_object) Object{value}; + std::construct_at(&this->data_object, value); } JSON::JSON(const Decimal &value) : current_type{Type::Decimal} { @@ -122,13 +121,13 @@ JSON::JSON(const JSON &other) : current_type{other.current_type} { this->data_real = other.data_real; break; case Type::String: - new (&this->data_string) String{other.data_string}; + std::construct_at(&this->data_string, other.data_string); break; case Type::Array: - new (&this->data_array) Array{other.data_array}; + std::construct_at(&this->data_array, other.data_array); break; case Type::Object: - new (&this->data_object) Object{other.data_object}; + std::construct_at(&this->data_object, other.data_object); break; case Type::Decimal: this->data_decimal = new Decimal{*other.data_decimal}; @@ -150,20 +149,19 @@ JSON::JSON(JSON &&other) noexcept : current_type{other.current_type} { this->data_real = other.data_real; break; case Type::String: - new (&this->data_string) String{std::move(other.data_string)}; + std::construct_at(&this->data_string, std::move(other.data_string)); other.current_type = Type::Null; break; case Type::Array: - new (&this->data_array) Array{std::move(other.data_array)}; + std::construct_at(&this->data_array, std::move(other.data_array)); other.current_type = Type::Null; break; case Type::Object: - new (&this->data_object) Object{std::move(other.data_object)}; + std::construct_at(&this->data_object, std::move(other.data_object)); other.current_type = Type::Null; break; case Type::Decimal: - this->data_decimal = other.data_decimal; - other.data_decimal = nullptr; + this->data_decimal = std::exchange(other.data_decimal, nullptr); other.current_type = Type::Null; break; default: @@ -185,13 +183,13 @@ auto JSON::operator=(const JSON &other) -> JSON & { this->data_real = other.data_real; break; case Type::String: - new (&this->data_string) String{other.data_string}; + std::construct_at(&this->data_string, other.data_string); break; case Type::Array: - new (&this->data_array) Array{other.data_array}; + std::construct_at(&this->data_array, other.data_array); break; case Type::Object: - new (&this->data_object) Object{other.data_object}; + std::construct_at(&this->data_object, other.data_object); break; case Type::Decimal: this->data_decimal = new Decimal{*other.data_decimal}; @@ -217,20 +215,19 @@ auto JSON::operator=(JSON &&other) noexcept -> JSON & { this->data_real = other.data_real; break; case Type::String: - new (&this->data_string) String{std::move(other.data_string)}; + std::construct_at(&this->data_string, std::move(other.data_string)); other.current_type = Type::Null; break; case Type::Array: - new (&this->data_array) Array{std::move(other.data_array)}; + std::construct_at(&this->data_array, std::move(other.data_array)); other.current_type = Type::Null; break; case Type::Object: - new (&this->data_object) Object{std::move(other.data_object)}; + std::construct_at(&this->data_object, std::move(other.data_object)); other.current_type = Type::Null; break; case Type::Decimal: - this->data_decimal = other.data_decimal; - other.data_decimal = nullptr; + this->data_decimal = std::exchange(other.data_decimal, nullptr); other.current_type = Type::Null; break; default: @@ -418,41 +415,6 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & { return *this = *this - substractive; } -[[nodiscard]] auto JSON::is_boolean() const noexcept -> bool { - return this->current_type == Type::Boolean; -} - -[[nodiscard]] auto JSON::is_null() const noexcept -> bool { - return this->current_type == Type::Null; -} - -[[nodiscard]] auto JSON::is_integer() const noexcept -> bool { - return this->current_type == Type::Integer; -} - -[[nodiscard]] auto JSON::is_real() const noexcept -> bool { - return this->current_type == Type::Real; -} - -[[nodiscard]] auto JSON::is_integral() const noexcept -> bool { - switch (this->type()) { - case Type::Integer: - return true; - case Type::Real: { - Real integral = 0.0; - return std::modf(this->to_real(), &integral) == 0.0; - } - case Type::Decimal: - return this->to_decimal().is_integral(); - default: - return false; - } -} - -[[nodiscard]] auto JSON::is_number() const noexcept -> bool { - return this->is_integer() || this->is_real() || this->is_decimal(); -} - [[nodiscard]] auto JSON::is_positive() const noexcept -> bool { switch (this->type()) { case Type::Integer: @@ -466,141 +428,12 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & { } } -[[nodiscard]] auto JSON::is_string() const noexcept -> bool { - return this->current_type == Type::String; -} - -[[nodiscard]] auto JSON::is_array() const noexcept -> bool { - return this->current_type == Type::Array; -} - -[[nodiscard]] auto JSON::is_object() const noexcept -> bool { - return this->current_type == Type::Object; -} - -[[nodiscard]] auto JSON::is_decimal() const noexcept -> bool { - return this->current_type == Type::Decimal; -} - -[[nodiscard]] auto JSON::type() const noexcept -> Type { - return this->current_type; -} - -[[nodiscard]] auto JSON::to_boolean() const noexcept -> bool { - assert(this->is_boolean()); - return this->data_boolean; -} - -[[nodiscard]] auto JSON::to_integer() const noexcept -> Integer { - assert(this->is_integer()); - return this->data_integer; -} - -[[nodiscard]] auto JSON::to_real() const noexcept -> Real { - assert(this->is_real()); - // This MUST not happen - assert(!std::isinf(this->data_real)); - assert(!std::isnan(this->data_real)); - return this->data_real; -} - -[[nodiscard]] auto JSON::to_decimal() const noexcept -> const Decimal & { - assert(this->is_decimal()); - // This MUST not happen - assert(this->data_decimal->is_finite()); - assert(!this->data_decimal->is_nan()); - return *this->data_decimal; -} - -[[nodiscard]] auto JSON::to_string() const noexcept -> const JSON::String & { - assert(this->is_string()); - return this->data_string; -} - [[nodiscard]] auto JSON::to_stringstream() const -> std::basic_istringstream> { return std::basic_istringstream>{ this->data_string}; } -[[nodiscard]] auto JSON::as_array() const noexcept -> const JSON::Array & { - assert(this->is_array()); - return this->data_array; -} - -[[nodiscard]] auto JSON::as_array() noexcept -> JSON::Array & { - assert(this->is_array()); - return this->data_array; -} - -[[nodiscard]] auto JSON::as_object() noexcept -> Object & { - assert(this->is_object()); - return this->data_object; -} - -[[nodiscard]] auto JSON::as_object() const noexcept -> const Object & { - assert(this->is_object()); - return this->data_object; -} - -[[nodiscard]] auto JSON::as_real() const noexcept -> Real { - assert(this->is_number()); - return this->is_real() ? this->to_real() - : static_cast(this->to_integer()); -} - -[[nodiscard]] auto JSON::as_integer() const noexcept -> Integer { - assert(this->is_number()); - if (this->is_integer()) { - return this->to_integer(); - } else { - return static_cast(std::trunc(this->to_real())); - } -} - -[[nodiscard]] auto JSON::at(const typename JSON::Array::size_type index) const - -> const JSON & { - assert(this->is_array()); - assert(index < this->size()); - return data_array.data.at(index); -} - -[[nodiscard]] auto JSON::at(const typename JSON::Array::size_type index) - -> JSON & { - assert(this->is_array()); - assert(index < this->size()); - return this->data_array.data.at(index); -} - -[[nodiscard]] auto JSON::at(const JSON::String &key) const -> const JSON & { - assert(this->is_object()); - assert(this->defines(key)); - const auto &object{this->data_object}; - return object.at(key, object.hash(key)); -} - -[[nodiscard]] auto JSON::at(const String &key, - const typename Object::hash_type hash) const - -> const JSON & { - assert(this->is_object()); - assert(this->defines(key)); - return this->data_object.at(key, hash); -} - -[[nodiscard]] auto JSON::at(const JSON::String &key) -> JSON & { - assert(this->is_object()); - assert(this->defines(key)); - auto &object{this->data_object}; - return object.at(key, object.hash(key)); -} - -[[nodiscard]] auto JSON::at(const String &key, - const typename Object::hash_type hash) -> JSON & { - assert(this->is_object()); - assert(this->defines(key)); - return this->data_object.at(key, hash); -} - [[nodiscard]] auto JSON::at_or(const String &key, const typename Object::hash_type hash, const JSON &otherwise) const -> const JSON & { @@ -615,77 +448,22 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & { return this->at_or(key, this->data_object.hash(key), otherwise); } -[[nodiscard]] auto JSON::front() -> JSON & { - assert(this->is_array()); - assert(!this->empty()); - return this->data_array.data.front(); -} - -[[nodiscard]] auto JSON::front() const -> const JSON & { - assert(this->is_array()); - assert(!this->empty()); - return this->data_array.data.front(); -} - -[[nodiscard]] auto JSON::back() -> JSON & { - assert(this->is_array()); - assert(!this->empty()); - return this->data_array.data.back(); -} - -[[nodiscard]] auto JSON::back() const -> const JSON & { - assert(this->is_array()); - assert(!this->empty()); - return this->data_array.data.back(); -} - -[[nodiscard]] auto JSON::size() const -> std::size_t { - if (this->is_object()) { - return this->object_size(); - } else if (this->is_array()) { - return this->array_size(); - } else { - return this->string_size(); - } -} - -[[nodiscard]] auto JSON::string_size() const -> std::size_t { - assert(this->is_string()); - return JSON::size(this->data_string); -} - -[[nodiscard]] auto JSON::array_size() const -> std::size_t { - assert(this->is_array()); - return this->data_array.data.size(); -} - -[[nodiscard]] auto JSON::object_size() const -> std::size_t { - assert(this->is_object()); - return this->data_object.size(); -} - -[[nodiscard]] auto JSON::byte_size() const -> std::size_t { - assert(this->is_string()); - return this->data_string.size(); -} - [[nodiscard]] auto JSON::estimated_byte_size() const -> std::uint64_t { // Of course, container have some overhead of their own // which we are not taking into account here, as its typically // implementation dependent. This function is just a rough estimate. if (this->is_object()) { - return std::accumulate(this->as_object().cbegin(), this->as_object().cend(), - static_cast(0), - [](const std::uint64_t accumulator, - const typename Object::value_type &pair) { - return accumulator + - (pair.first.size() * sizeof(Char)) + - pair.second.estimated_byte_size(); - }); + return std::ranges::fold_left(this->as_object(), + static_cast(0), + [](const std::uint64_t accumulator, + const typename Object::value_type &pair) { + return accumulator + + (pair.first.size() * sizeof(Char)) + + pair.second.estimated_byte_size(); + }); } else if (this->is_array()) { - return std::accumulate( - this->as_array().cbegin(), this->as_array().cend(), - static_cast(0), + return std::ranges::fold_left( + this->as_array(), static_cast(0), [](const std::uint64_t accumulator, const JSON &item) { return accumulator + item.estimated_byte_size(); }); @@ -718,21 +496,19 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & { case Type::String: return 3 + this->byte_size(); case Type::Array: - return std::accumulate( - this->as_array().cbegin(), this->as_array().cend(), - static_cast(6), + return std::ranges::fold_left( + this->as_array(), static_cast(6), [](const std::uint64_t accumulator, const JSON &item) { return accumulator + 1 + item.fast_hash(); }); case Type::Object: - return std::accumulate(this->as_object().cbegin(), - this->as_object().cend(), - static_cast(7), - [](const std::uint64_t accumulator, - const typename Object::value_type &pair) { - return accumulator + 1 + pair.first.size() + - pair.second.fast_hash(); - }); + return std::ranges::fold_left( + this->as_object(), static_cast(7), + [](const std::uint64_t accumulator, + const typename Object::value_type &pair) { + return accumulator + 1 + pair.first.size() + + pair.second.fast_hash(); + }); case Type::Decimal: return 8; default: @@ -794,48 +570,6 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & { return dividend_decimal.divisible_by(divisor.to_decimal()); } -[[nodiscard]] auto JSON::empty() const -> bool { - if (this->is_object()) { - return this->data_object.empty(); - } else if (this->is_array()) { - return this->data_array.data.empty(); - } else { - return this->data_string.empty(); - } -} - -[[nodiscard]] auto JSON::try_at(const JSON::String &key) const -> const JSON * { - assert(this->is_object()); - const auto &object{this->data_object}; - return object.try_at(key, object.hash(key)); -} - -[[nodiscard]] auto JSON::try_at(const String &key, - const typename Object::hash_type hash) const - -> const JSON * { - assert(this->is_object()); - const auto &object{this->data_object}; - return object.try_at(key, hash); -} - -[[nodiscard]] auto JSON::defines(const JSON::String &key) const -> bool { - assert(this->is_object()); - const auto &object{this->data_object}; - return object.defines(key, object.hash(key)); -} - -[[nodiscard]] auto -JSON::defines(const JSON::String &key, - const typename JSON::Object::hash_type hash) const -> bool { - assert(this->is_object()); - return this->data_object.defines(key, hash); -} - -[[nodiscard]] auto -JSON::defines(const typename JSON::Array::size_type index) const -> bool { - return this->defines(std::to_string(index)); -} - [[nodiscard]] auto JSON::defines_any(std::initializer_list keys) const -> bool { return this->defines_any(keys.begin(), keys.end()); @@ -843,8 +577,7 @@ JSON::defines_any(std::initializer_list keys) const -> bool { [[nodiscard]] auto JSON::contains(const JSON &element) const -> bool { assert(this->is_array()); - return std::find(this->as_array().cbegin(), this->as_array().cend(), - element) != this->as_array().cend(); + return std::ranges::contains(this->as_array(), element); } [[nodiscard]] auto JSON::contains(const JSON::StringView element) const @@ -861,13 +594,13 @@ JSON::defines_any(std::initializer_list keys) const -> bool { [[nodiscard]] auto JSON::includes(const JSON::String &input) const -> bool { assert(this->is_string()); - return this->to_string().find(input) != JSON::String::npos; + return this->to_string().contains(input); } [[nodiscard]] auto JSON::includes(const JSON::String::value_type input) const -> bool { assert(this->is_string()); - return this->to_string().find(input) != JSON::String::npos; + return this->to_string().contains(input); } [[nodiscard]] auto JSON::unique() const -> bool { @@ -914,12 +647,11 @@ auto JSON::push_back_if_unique(const JSON &value) -> std::pair, bool> { assert(this->is_array()); auto &array_data{this->as_array().data}; - const auto match{std::ranges::find(array_data, value)}; - if (match == array_data.cend()) { + if (!std::ranges::contains(array_data, value)) { array_data.push_back(value); return {array_data.back(), true}; } else { - return {*match, false}; + return {*std::ranges::find(array_data, value), false}; } } @@ -927,12 +659,11 @@ auto JSON::push_back_if_unique(JSON &&value) -> std::pair, bool> { assert(this->is_array()); auto &array_data{this->as_array().data}; - const auto match{std::ranges::find(array_data, value)}; - if (match == array_data.cend()) { + if (!std::ranges::contains(array_data, value)) { array_data.push_back(std::move(value)); return {array_data.back(), true}; } else { - return {*match, false}; + return {*std::ranges::find(array_data, value), false}; } } @@ -977,6 +708,12 @@ auto JSON::assign_assume_new(JSON::String &&key, JSON &&value) -> void { this->data_object.emplace_assume_new(std::move(key), std::move(value)); } +auto JSON::assign_assume_new(JSON::String &&key, JSON &&value, + Object::hash_type hash) -> void { + assert(this->is_object()); + this->data_object.emplace_assume_new(std::move(key), std::move(value), hash); +} + auto JSON::erase(const JSON::String &key) -> typename Object::size_type { assert(this->is_object()); return this->data_object.erase(key); diff --git a/vendor/core/src/core/json/parser.h b/vendor/core/src/core/json/parser.h index 7341549..b5f8eed 100644 --- a/vendor/core/src/core/json/parser.h +++ b/vendor/core/src/core/json/parser.h @@ -4,1325 +4,768 @@ #include #include -#include - #include "grammar.h" -#include // assert -#include // std::isxdigit -#include // std::isinf, std::isnan -#include // std::size_t -#include // std::uint64_t -#include // std::reference_wrapper -#include // std::basic_istream -#include // std::optional -#include // std::basic_ostringstream, std::basic_istringstream -#include // std::stack -#include // std::out_of_range -#include // std::basic_string, std::stoul +#include // assert +#include // std::uint64_t, std::uint32_t +#include // std::vector + +namespace sourcemeta::core { -namespace sourcemeta::core::internal { +enum class TapeType : std::uint8_t { + ObjectStart, + ObjectEnd, + ArrayStart, + ArrayEnd, + Key, + String, + Number, + Null, + True, + False +}; + +struct TapeEntry { + TapeType type; + std::uint32_t offset; + std::uint32_t length; + std::uint32_t count; + std::uint64_t line; + std::uint64_t column; +}; + +namespace internal { + +template +inline auto skip_whitespace(const char *&cursor, const char *end, + std::uint64_t &line, std::uint64_t &column) + -> void { + while (cursor < end) { + switch (*cursor) { + case internal::token_whitespace_space: + case internal::token_whitespace_tabulation: + case internal::token_whitespace_carriage_return: + if constexpr (TrackPositions) { + column += 1; + } + cursor++; + continue; + case internal::token_whitespace_line_feed: + if constexpr (TrackPositions) { + line += 1; + column = 0; + } + cursor++; + continue; + default: + return; + } + } +} -inline auto parse_null( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream) - -> JSON { +template +inline auto scan_null(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { for ( const auto character : internal::constant_null.substr( 1)) { - column += 1; - if (stream.get() != character) { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) [[unlikely]] { throw JSONParseError(line, column); } + if (*cursor != character) [[unlikely]] { + throw JSONParseError(line, column); + } + cursor++; } - - return JSON{nullptr}; } -inline auto parse_boolean_true( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream) - -> JSON { +template +inline auto scan_true(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { for ( const auto character : internal::constant_true.substr( 1)) { - column += 1; - if (stream.get() != character) { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) [[unlikely]] { + throw JSONParseError(line, column); + } + if (*cursor != character) [[unlikely]] { throw JSONParseError(line, column); } + cursor++; } - - return JSON{true}; } -inline auto parse_boolean_false( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream) - -> JSON { +template +inline auto scan_false(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { for ( const auto character : internal::constant_false.substr( 1)) { - column += 1; - if (stream.get() != character) { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) [[unlikely]] { throw JSONParseError(line, column); } + if (*cursor != character) [[unlikely]] { + throw JSONParseError(line, column); + } + cursor++; } - - return JSON{false}; } -auto parse_string_unicode_code_point( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream) +template +inline auto scan_string_unicode_code_point(const std::uint64_t line, + std::uint64_t &column, + const char *&cursor, const char *end) -> unsigned long { - std::basic_string> - code_point; - code_point.resize(4); - std::size_t code_point_size{0}; - - // Any code point may be represented as a hexadecimal escape sequence. - // The meaning of such a hexadecimal number is determined by ISO/IEC - // 10646. If the code point is in the Basic Multilingual Plane (U+0000 - // through U+FFFF), then it may be represented as a six-character - // sequence: a reverse solidus, followed by the lowercase letter u, - // followed by four hexadecimal digits that encode the code point. - // Hexadecimal digits can be digits (U+0030 through U+0039) or the - // hexadecimal letters A through F in uppercase (U+0041 through U+0046) - // or lowercase (U+0061 through U+0066). - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - while (code_point_size < 4) { - column += 1; - code_point[code_point_size] = - static_cast(stream.get()); - if (std::isxdigit(code_point[code_point_size])) { - code_point_size += 1; - } else { + unsigned long result{0}; + for (std::size_t index = 0; index < 4; index++) { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) [[unlikely]] { throw JSONParseError(line, column); } + const char hex_char{*cursor++}; + unsigned long digit; + if (hex_char >= '0' && hex_char <= '9') { + digit = static_cast(hex_char - '0'); + } else if (hex_char >= 'a' && hex_char <= 'f') { + digit = static_cast(hex_char - 'a') + 10; + } else if (hex_char >= 'A' && hex_char <= 'F') { + digit = static_cast(hex_char - 'A') + 10; + } else [[unlikely]] { + throw JSONParseError(line, column); + } + result = (result << 4) | digit; } - // We don't need to perform any further validation here. - // According to ECMA 404, \u can be followed by "any" - // sequence of 4 hexadecimal digits. - constexpr auto unicode_base{16}; - const auto result{std::stoul(code_point, nullptr, unicode_base)}; - // The largest possible valid unicode code point assert(result <= 0xFFFF); return result; } -auto parse_string_unicode( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream, - std::basic_ostringstream> - &result) -> void { - auto code_point{parse_string_unicode_code_point(line, column, stream)}; +template +inline auto scan_string_unicode(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { + auto code_point{scan_string_unicode_code_point(line, column, + cursor, end)}; using CharT = typename JSON::Char; - // This means we are at the beginning of a UTF-16 surrogate pair high code - // point See - // https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF + if (code_point >= 0xDC00 && code_point <= 0xDFFF) [[unlikely]] { + throw JSONParseError(line, column); + } + if (code_point >= 0xD800 && code_point <= 0xDBFF) { - // Next, we expect "\" - column += 1; - if (stream.get() != internal::token_string_escape) { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) [[unlikely]] { + throw JSONParseError(line, column); + } + if (*cursor != internal::token_string_escape) [[unlikely]] { throw JSONParseError(line, column); } + cursor++; - // Next, we expect "u" - column += 1; - if (stream.get() != internal::token_string_escape_unicode) { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) [[unlikely]] { + throw JSONParseError(line, column); + } + if (*cursor != internal::token_string_escape_unicode) [[unlikely]] { throw JSONParseError(line, column); } + cursor++; - // Finally, get the low code point of the surrogate and calculate - // the real final code point - const auto low_code_point{ - parse_string_unicode_code_point(line, column, stream)}; + const auto low_code_point{scan_string_unicode_code_point( + line, column, cursor, end)}; // See // https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF - if (low_code_point >= 0xDC00 && low_code_point <= 0xDFFF) { - code_point = - 0x10000 + ((code_point - 0xD800) << 10) + (low_code_point - 0xDC00); - } else { + if (low_code_point < 0xDC00 || low_code_point > 0xDFFF) [[unlikely]] { throw JSONParseError(line, column); } } - - // Convert a Unicode codepoint into UTF-8 - // See https://en.wikipedia.org/wiki/UTF-8#Description - - if (code_point <= 0x7F) { - // UTF-8 - result.put(static_cast(code_point)); - } else if (code_point <= 0x7FF) { - // UTF-16 - result.put(static_cast(0xC0 | ((code_point >> 6) & 0x1F))); - result.put(static_cast(0x80 | (code_point & 0x3F))); - } else { - // UTF-32 - result.put(static_cast(0xE0 | ((code_point >> 12) & 0x0F))); - result.put(static_cast(0x80 | ((code_point >> 6) & 0x3F))); - result.put(static_cast(0x80 | (code_point & 0x3F))); - } } -auto parse_string_escape( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream, - std::basic_ostringstream> - &result) -> void { - column += 1; - switch (stream.get()) { +template +inline auto scan_string_escape(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) [[unlikely]] { + throw JSONParseError(line, column); + } + switch (*cursor++) { case internal::token_string_quote: - result.put(internal::token_string_quote); - return; case internal::token_string_escape: - result.put(internal::token_string_escape); - return; case internal::token_string_solidus: - result.put(internal::token_string_solidus); - return; case internal::token_string_escape_backspace: - result.put('\b'); - return; case internal::token_string_escape_form_feed: - result.put('\f'); - return; case internal::token_string_escape_line_feed: - result.put('\n'); - return; case internal::token_string_escape_carriage_return: - result.put('\r'); - return; case internal::token_string_escape_tabulation: - result.put('\t'); return; - - // Any code point may be represented as a hexadecimal escape sequence. - // The meaning of such a hexadecimal number is determined by ISO/IEC - // 10646. If the code point is in the Basic Multilingual Plane (U+0000 - // through U+FFFF), then it may be represented as a six-character - // sequence: a reverse solidus, followed by the lowercase letter u, - // followed by four hexadecimal digits that encode the code point. - // Hexadecimal digits can be digits (U+0030 through U+0039) or the - // hexadecimal letters A through F in uppercase (U+0041 through U+0046) - // or lowercase (U+0061 through U+0066). - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf case internal::token_string_escape_unicode: - parse_string_unicode(line, column, stream, result); + scan_string_unicode(line, column, cursor, end); return; - default: - throw JSONParseError(line, column); + [[unlikely]] throw JSONParseError(line, column); } } -auto parse_string( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream) - -> typename JSON::String { - std::basic_ostringstream> - result; - while (!stream.eof()) { - column += 1; - const typename JSON::Char character{ - static_cast(stream.get())}; - switch (character) { - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - return result.str(); - case internal::token_string_escape: - parse_string_escape(line, column, stream, result); - break; - // These are always disallowed - case '\u0000': - case '\u0001': - case '\u0002': - case '\u0003': - case '\u0004': - case '\u0005': - case '\u0006': - case '\u0007': - case '\u0008': - case '\u0009': - case '\u000A': - case '\u000B': - case '\u000C': - case '\u000D': - case '\u000E': - case '\u000F': - case '\u0010': - case '\u0011': - case '\u0012': - case '\u0013': - case '\u0014': - case '\u0015': - case '\u0016': - case '\u0017': - case '\u0018': - case '\u0019': - case '\u001A': - case '\u001B': - case '\u001C': - case '\u001D': - case '\u001E': - case '\u001F': - case static_cast(JSON::CharTraits::eof()): - throw JSONParseError(line, column); - default: - result.put(character); - break; +template +inline auto scan_string(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { + using CharT = typename JSON::Char; + while (cursor < end) { + const char *scan{cursor}; + while (scan < end && *scan != internal::token_string_quote && + *scan != internal::token_string_escape && + static_cast(*scan) >= 0x20) { + scan++; } - } - - throw JSONParseError(line, column); -} - -template -auto parse_number_decimal(const std::uint64_t line, const std::uint64_t column, - const std::basic_string &string) - -> JSON { - try { - return JSON{Decimal{string}}; - } catch (const DecimalParseError &) { - throw JSONParseError(line, column); - } catch (const std::invalid_argument &) { - throw JSONParseError(line, column); - } -} - -template -auto parse_number_integer_maybe_decimal( - const std::uint64_t line, const std::uint64_t column, - const std::basic_string &string) -> JSON { - const auto result{sourcemeta::core::to_int64_t(string)}; - return result.has_value() ? JSON{result.value()} - : parse_number_decimal(line, column, string); -} -template -auto parse_number_real_maybe_decimal( - const std::uint64_t line, const std::uint64_t column, - const std::basic_string &string, - const std::size_t first_nonzero_position, - const std::size_t decimal_position) -> JSON { - // We are guaranteed to not be dealing with exponential numbers here - assert((string.find('e') == std::basic_string::npos)); - assert((string.find('E') == std::basic_string::npos)); + if (scan > cursor) { + if constexpr (TrackPositions) { + column += static_cast(scan - cursor); + } + cursor = scan; + } - // If the number has enough significant digits, then we risk completely losing - // precision of the fractional component, and thus incorrectly interpreting a - // fractional number as an integral value - const auto decimal_after_first_nonzero{ - decimal_position != std::basic_string::npos && - decimal_position > first_nonzero_position}; - const auto significant_digits{string.length() - first_nonzero_position - - (decimal_after_first_nonzero ? 1 : 0)}; - constexpr std::size_t MAX_SAFE_SIGNIFICANT_DIGITS{15}; - if (significant_digits > MAX_SAFE_SIGNIFICANT_DIGITS) { - return parse_number_decimal(line, column, string); - } + if (cursor >= end) [[unlikely]] { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } - const auto result{sourcemeta::core::to_double(string)}; - return result.has_value() ? JSON{result.value()} - : parse_number_decimal(line, column, string); -} + if constexpr (TrackPositions) { + column += 1; + } + const char character{*cursor++}; -auto parse_number_exponent_rest( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result) -> JSON { - while (!stream.eof()) { - const typename JSON::Char character{ - static_cast(stream.peek())}; switch (character) { - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - result.put(character); - stream.ignore(1); - column += 1; + case internal::token_string_quote: + return; + case internal::token_string_escape: + scan_string_escape(line, column, cursor, end); break; default: - // As a heuristic, if a number has exponential notation, it is almost - // always a big number for which `double` is typically a poor - // representation. If an exponent is encountered, we just always parse - // as a high-precision decimal - return parse_number_decimal(line, original_column, result.str()); + [[unlikely]] throw JSONParseError(line, column); } } + if constexpr (TrackPositions) { + column += 1; + } throw JSONParseError(line, column); } -auto parse_number_exponent( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result) -> JSON { - const typename JSON::Char character{ - static_cast(stream.get())}; - column += 1; - switch (character) { - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - result.put(character); - return parse_number_exponent_rest(line, column, original_column, stream, - result); - default: - throw JSONParseError(line, column); +template +inline auto scan_digits(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end, + const bool at_least_one) -> void { + using CharT = typename JSON::Char; + bool found{false}; + while (cursor < end && *cursor >= internal::token_number_zero && + *cursor <= internal::token_number_nine) { + found = true; + if constexpr (TrackPositions) { + column += 1; + } + cursor++; } -} - -auto parse_number_exponent_first( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result) -> JSON { - const typename JSON::Char character{ - static_cast(stream.get())}; - column += 1; - switch (character) { - case internal::token_number_plus: - // Exponents are positive by default, - // so no need to write the plus sign. - return parse_number_exponent(line, column, original_column, stream, - result); - case internal::token_number_minus: - result.put(character); - return parse_number_exponent(line, column, original_column, stream, - result); - - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - result.put(character); - return parse_number_exponent_rest(line, column, original_column, stream, - result); - default: - throw JSONParseError(line, column); + if (at_least_one && !found) [[unlikely]] { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); } } -auto parse_number_fractional( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result, - std::size_t &first_nonzero_position, const std::size_t decimal_position) - -> JSON { - while (!stream.eof()) { - const typename JSON::Char character{ - static_cast(stream.peek())}; - switch (character) { - // [A number] may have an exponent, prefixed by e (U+0065) or E (U+0045) - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_exponent_uppercase: - case internal::token_number_exponent_lowercase: - result.put(character); - stream.ignore(1); - column += 1; - return parse_number_exponent_first(line, column, original_column, - stream, result); - - case internal::token_number_zero: - result.put(character); - stream.ignore(1); - column += 1; - break; - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (first_nonzero_position == - std::basic_string::npos) { - first_nonzero_position = result.str().size(); - } - result.put(character); - stream.ignore(1); +template +inline auto scan_number(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end, const char first) + -> void { + using CharT = typename JSON::Char; + if (first == internal::token_number_minus) { + if (cursor >= end || *cursor < internal::token_number_zero || + *cursor > internal::token_number_nine) [[unlikely]] { + if constexpr (TrackPositions) { column += 1; - break; - default: - return parse_number_real_maybe_decimal( - line, original_column, result.str(), first_nonzero_position, - decimal_position); + } + throw JSONParseError(line, column); } } - throw JSONParseError(line, column); -} - -auto parse_number_fractional_first( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result, - std::size_t &first_nonzero_position, const std::size_t decimal_position) - -> JSON { - const typename JSON::Char character{ - static_cast(stream.peek())}; - switch (character) { - // [A number] may have a fractional part prefixed by a decimal point - // (U+002E). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_decimal_point: - case static_cast(JSON::CharTraits::eof()): + const char int_start{first == internal::token_number_minus ? *cursor + : first}; + if (first == internal::token_number_minus) { + if constexpr (TrackPositions) { column += 1; - throw JSONParseError(line, column); - case internal::token_number_zero: - result.put(character); - stream.ignore(1); - column += 1; - return parse_number_fractional(line, column, original_column, stream, - result, first_nonzero_position, - decimal_position); - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (first_nonzero_position == - std::basic_string::npos) { - first_nonzero_position = result.str().size(); + } + cursor++; + } + + if (int_start == internal::token_number_zero) { + if (cursor < end && *cursor >= internal::token_number_zero && + *cursor <= internal::token_number_nine) [[unlikely]] { + if constexpr (TrackPositions) { + column += 1; } - result.put(character); - stream.ignore(1); - column += 1; - return parse_number_fractional(line, column, original_column, stream, - result, first_nonzero_position, - decimal_position); - default: - return parse_number_real_maybe_decimal( - line, original_column, result.str(), first_nonzero_position, - decimal_position); + throw JSONParseError(line, column); + } + } else { + scan_digits(line, column, cursor, end, false); } -} -auto parse_number_maybe_fractional( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result, - std::size_t &first_nonzero_position) -> JSON { - const typename JSON::Char character{ - static_cast(stream.peek())}; - switch (character) { - // [A number] may have a fractional part prefixed by a decimal point - // (U+002E). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_decimal_point: { - const std::size_t decimal_position{result.str().size()}; - result.put(character); - stream.ignore(1); + if (cursor < end && *cursor == internal::token_number_decimal_point) { + if constexpr (TrackPositions) { column += 1; - return JSON{parse_number_fractional_first( - line, column, original_column, stream, result, first_nonzero_position, - decimal_position)}; } - case internal::token_number_exponent_uppercase: - case internal::token_number_exponent_lowercase: - result.put(character); - stream.ignore(1); - column += 1; - return JSON{parse_number_exponent_first(line, column, original_column, - stream, result)}; - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - column += 1; - throw JSONParseError(line, column); - default: - return JSON{parse_number_integer_maybe_decimal(line, original_column, - result.str())}; + cursor++; + scan_digits(line, column, cursor, end, true); } -} -auto parse_number_any_rest( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result, - std::size_t &first_nonzero_position) -> JSON { - while (!stream.eof()) { - const typename JSON::Char character{ - static_cast(stream.peek())}; - switch (character) { - // [A number] may have a fractional part prefixed by a decimal point - // (U+002E). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_decimal_point: { - const std::size_t decimal_position{result.str().size()}; - result.put(character); - stream.ignore(1); + if (cursor < end && + (*cursor == internal::token_number_exponent_lowercase || + *cursor == internal::token_number_exponent_uppercase)) { + if constexpr (TrackPositions) { + column += 1; + } + cursor++; + if (cursor < end && (*cursor == internal::token_number_plus || + *cursor == internal::token_number_minus)) { + if constexpr (TrackPositions) { column += 1; - return JSON{parse_number_fractional_first( - line, column, original_column, stream, result, - first_nonzero_position, decimal_position)}; } - case internal::token_number_exponent_uppercase: - case internal::token_number_exponent_lowercase: - result.put(character); - stream.ignore(1); - column += 1; - return JSON{parse_number_exponent_first(line, column, original_column, - stream, result)}; - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - result.put(character); - stream.ignore(1); - column += 1; - break; - default: - return JSON{parse_number_integer_maybe_decimal(line, original_column, - result.str())}; + cursor++; } - } - - throw JSONParseError(line, column); -} - -auto parse_number_any_negative_first( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result, - std::size_t &first_nonzero_position) -> JSON { - const typename JSON::Char character{ - static_cast(stream.get())}; - column += 1; - switch (character) { - // A number is a sequence of decimal digits with no superfluous leading - // zero. See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_zero: - result.put(character); - return parse_number_maybe_fractional(line, column, original_column, - stream, result, - first_nonzero_position); - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - first_nonzero_position = result.str().size(); - result.put(character); - return parse_number_any_rest(line, column, original_column, stream, - result, first_nonzero_position); - default: - throw JSONParseError(line, column); + scan_digits(line, column, cursor, end, true); } } -auto parse_number( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream, - const typename JSON::Char first) -> JSON { - std::basic_ostringstream> - result; - result.put(first); +} // namespace internal - std::size_t first_nonzero_position{ - std::basic_string::npos}; - - // A number is a sequence of decimal digits with no superfluous leading zero. - // It may have a preceding minus sign (U+002D). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - switch (first) { - case internal::token_number_minus: - return parse_number_any_negative_first(line, column, column, stream, - result, first_nonzero_position); - case internal::token_number_zero: - return parse_number_maybe_fractional(line, column, column, stream, result, - first_nonzero_position); - // Any other digit - default: - first_nonzero_position = 0; - return parse_number_any_rest(line, column, column, stream, result, - first_nonzero_position); - } -} +// NOLINTBEGIN(cppcoreguidelines-avoid-goto) -} // namespace sourcemeta::core::internal +template +inline auto scan_json(const char *&cursor, const char *end, + const char *buffer_start, std::uint64_t &line, + std::uint64_t &column, std::vector &tape) + -> void { + struct ContainerFrame { + std::size_t tape_index; + std::uint32_t child_count; + }; -// We use "goto" to avoid recursion -// NOLINTBEGIN(cppcoreguidelines-avoid-goto) + using CharT = typename JSON::Char; + char character = 0; + std::vector container_stack; + container_stack.reserve(32); -#define CALLBACK_PRE(value_type, context, index, property) \ - if (callback) { \ - callback(JSON::ParsePhase::Pre, JSON::Type::value_type, line, column, \ - context, index, property); \ + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) [[unlikely]] { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); } - -#define CALLBACK_PRE_WITH_POSITION(value_type, line, column, context, index, \ - property) \ - if (callback) { \ - callback(JSON::ParsePhase::Pre, JSON::Type::value_type, line, column, \ - context, index, property); \ + if constexpr (TrackPositions) { + column += 1; } + character = *cursor++; -#define CALLBACK_POST(value_type) \ - if (callback) { \ - callback(JSON::ParsePhase::Post, JSON::Type::value_type, line, column, \ - JSON::ParseContext::Root, 0, JSON::StringView{}); \ + { + const auto value_line{line}; + const auto value_column{column}; + switch (character) { + case internal::token_true: + internal::scan_true(line, column, cursor, end); + tape.push_back({TapeType::True, 0, 0, 0, value_line, value_column}); + return; + case internal::token_false: + internal::scan_false(line, column, cursor, end); + tape.push_back({TapeType::False, 0, 0, 0, value_line, value_column}); + return; + case internal::token_null: + internal::scan_null(line, column, cursor, end); + tape.push_back({TapeType::Null, 0, 0, 0, value_line, value_column}); + return; + case internal::token_string_quote: { + const auto string_start{ + static_cast(cursor - buffer_start)}; + internal::scan_string(line, column, cursor, end); + const auto string_length{static_cast( + cursor - buffer_start - string_start - 1)}; + tape.push_back({TapeType::String, string_start, string_length, 0, + value_line, value_column}); + return; + } + case internal::token_array_begin: + goto do_scan_array; + case internal::token_object_begin: + goto do_scan_object; + case internal::token_number_minus: + case internal::token_number_zero: + case internal::token_number_one: + case internal::token_number_two: + case internal::token_number_three: + case internal::token_number_four: + case internal::token_number_five: + case internal::token_number_six: + case internal::token_number_seven: + case internal::token_number_eight: + case internal::token_number_nine: { + const auto number_start{ + static_cast(cursor - buffer_start - 1)}; + internal::scan_number(line, column, cursor, end, + character); + const auto number_length{ + static_cast(cursor - buffer_start - number_start)}; + tape.push_back({TapeType::Number, number_start, number_length, 0, + value_line, value_column}); + return; + } + default: + [[unlikely]] throw JSONParseError(line, column); + } } -namespace sourcemeta::core { -auto internal_parse_json( - std::basic_istream &stream, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback) -> JSON { - // Globals - using Result = JSON; - enum class Container : std::uint8_t { Array, Object }; - std::stack levels; - std::stack> frames; - std::optional result; - typename Result::String key{""}; - std::uint64_t key_line{0}; - std::uint64_t key_column{0}; - typename JSON::Char character = 0; - /* - * Parse any JSON document + * Scan an array */ -do_parse: - column += 1; - character = static_cast(stream.get()); +do_scan_array: { + const auto start_index{tape.size()}; + tape.push_back({TapeType::ArrayStart, 0, 0, 0, line, column}); + container_stack.push_back({start_index, 0}); - // A JSON value can be an object, array, number, string, true, false, or null. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - switch (character) { - case internal::constant_true.front(): - if (callback) { - CALLBACK_PRE(Boolean, JSON::ParseContext::Root, 0, JSON::StringView{}); - const auto value{internal::parse_boolean_true(line, column, stream)}; - CALLBACK_POST(Boolean); - return value; - } else { - return internal::parse_boolean_true(line, column, stream); - } - case internal::constant_false.front(): - if (callback) { - CALLBACK_PRE(Boolean, JSON::ParseContext::Root, 0, JSON::StringView{}); - const auto value{internal::parse_boolean_false(line, column, stream)}; - CALLBACK_POST(Boolean); - return value; - } else { - return internal::parse_boolean_false(line, column, stream); - } - case internal::constant_null.front(): - if (callback) { - CALLBACK_PRE(Null, JSON::ParseContext::Root, 0, JSON::StringView{}); - const auto value{internal::parse_null(line, column, stream)}; - CALLBACK_POST(Null); - return value; - } else { - return internal::parse_null(line, column, stream); - } - - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - if (callback) { - CALLBACK_PRE(String, JSON::ParseContext::Root, 0, JSON::StringView{}); - const Result value{internal::parse_string(line, column, stream)}; - CALLBACK_POST(String); - return value; - } else { - return Result{internal::parse_string(line, column, stream)}; - } - case internal::token_array_begin: - CALLBACK_PRE(Array, JSON::ParseContext::Root, 0, JSON::StringView{}); - goto do_parse_array; - case internal::token_object_begin: - CALLBACK_PRE(Object, JSON::ParseContext::Root, 0, JSON::StringView{}); - goto do_parse_object; + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) [[unlikely]] { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } - case internal::token_number_minus: - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (callback) { - const auto current_line{line}; - const auto current_column{column}; - const auto value{ - internal::parse_number(line, column, stream, character)}; - if (value.is_integer()) { - CALLBACK_PRE_WITH_POSITION(Integer, current_line, current_column, - JSON::ParseContext::Root, 0, - JSON::StringView{}); - CALLBACK_POST(Integer); - } else if (value.is_decimal()) { - CALLBACK_PRE_WITH_POSITION(Decimal, current_line, current_column, - JSON::ParseContext::Root, 0, - JSON::StringView{}); - CALLBACK_POST(Decimal); - } else { - CALLBACK_PRE_WITH_POSITION(Real, current_line, current_column, - JSON::ParseContext::Root, 0, - JSON::StringView{}); - CALLBACK_POST(Real); - } + if (*cursor == internal::token_array_end) { + if constexpr (TrackPositions) { + column += 1; + } + cursor++; + tape[start_index].count = 0; + tape.push_back({TapeType::ArrayEnd, 0, 0, 0, line, column}); + container_stack.pop_back(); + goto do_scan_container_end; + } - return value; - } + goto do_scan_array_item; +} - return internal::parse_number(line, column, stream, character); +do_scan_array_item: + assert(!container_stack.empty()); + container_stack.back().child_count++; - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse; - default: - throw JSONParseError(line, column); + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) [[unlikely]] { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); } - - /* - * Parse an array - */ - -do_parse_array: - if (levels.empty()) { - assert(!result.has_value()); - levels.emplace(Container::Array); - result = std::make_optional(Result::make_array()); - frames.emplace(result.value()); - } else if (levels.top() == Container::Array) { - assert(result.has_value()); - levels.emplace(Container::Array); - assert(!frames.empty()); - assert(frames.top().get().is_array()); - frames.top().get().push_back(Result::make_array()); - frames.emplace(frames.top().get().back()); - } else if (levels.top() == Container::Object) { - assert(result.has_value()); - levels.emplace(Container::Array); - assert(!frames.empty()); - assert(frames.top().get().is_object()); - frames.top().get().assign(key, Result::make_array()); - frames.emplace(frames.top().get().at(key)); + if constexpr (TrackPositions) { + column += 1; } + character = *cursor++; - // An array structure is a pair of square bracket tokens surrounding zero or - // more values. The values are separated by commas. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - -do_parse_array_item: - assert(levels.top() == Container::Array); - column += 1; - character = static_cast(stream.get()); - switch (character) { - // Positional - case internal::token_array_end: - if (frames.top().get().empty()) { - CALLBACK_POST(Array); - goto do_parse_container_end; - } else { - throw JSONParseError(line, column); + { + const auto value_line{line}; + const auto value_column{column}; + switch (character) { + case internal::token_array_begin: + goto do_scan_array; + case internal::token_object_begin: + goto do_scan_object; + case internal::token_true: + internal::scan_true(line, column, cursor, end); + tape.push_back({TapeType::True, 0, 0, 0, value_line, value_column}); + goto do_scan_array_item_separator; + case internal::token_false: + internal::scan_false(line, column, cursor, end); + tape.push_back({TapeType::False, 0, 0, 0, value_line, value_column}); + goto do_scan_array_item_separator; + case internal::token_null: + internal::scan_null(line, column, cursor, end); + tape.push_back({TapeType::Null, 0, 0, 0, value_line, value_column}); + goto do_scan_array_item_separator; + case internal::token_string_quote: { + const auto string_start{ + static_cast(cursor - buffer_start)}; + internal::scan_string(line, column, cursor, end); + const auto string_length{static_cast( + cursor - buffer_start - string_start - 1)}; + tape.push_back({TapeType::String, string_start, string_length, 0, + value_line, value_column}); + goto do_scan_array_item_separator; } - - // Values - case internal::token_array_begin: - CALLBACK_PRE(Array, JSON::ParseContext::Index, frames.top().get().size(), - JSON::StringView{}); - goto do_parse_array; - case internal::token_object_begin: - CALLBACK_PRE(Object, JSON::ParseContext::Index, frames.top().get().size(), - JSON::StringView{}); - goto do_parse_object; - case internal::constant_true.front(): - CALLBACK_PRE(Boolean, JSON::ParseContext::Index, - frames.top().get().size(), JSON::StringView{}); - frames.top().get().push_back( - internal::parse_boolean_true(line, column, stream)); - CALLBACK_POST(Boolean); - goto do_parse_array_item_separator; - case internal::constant_false.front(): - CALLBACK_PRE(Boolean, JSON::ParseContext::Index, - frames.top().get().size(), JSON::StringView{}); - frames.top().get().push_back( - internal::parse_boolean_false(line, column, stream)); - CALLBACK_POST(Boolean); - goto do_parse_array_item_separator; - case internal::constant_null.front(): - CALLBACK_PRE(Null, JSON::ParseContext::Index, frames.top().get().size(), - JSON::StringView{}); - frames.top().get().push_back(internal::parse_null(line, column, stream)); - CALLBACK_POST(Null); - goto do_parse_array_item_separator; - - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - CALLBACK_PRE(String, JSON::ParseContext::Index, frames.top().get().size(), - JSON::StringView{}); - frames.top().get().push_back( - Result{internal::parse_string(line, column, stream)}); - CALLBACK_POST(String); - goto do_parse_array_item_separator; - - case internal::token_number_minus: - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (callback) { - const auto current_line{line}; - const auto current_column{column}; - const auto current_index{frames.top().get().size()}; - const auto value{ - internal::parse_number(line, column, stream, character)}; - if (value.is_integer()) { - CALLBACK_PRE_WITH_POSITION(Integer, current_line, current_column, - JSON::ParseContext::Index, current_index, - JSON::StringView{}); - } else if (value.is_decimal()) { - CALLBACK_PRE_WITH_POSITION(Decimal, current_line, current_column, - JSON::ParseContext::Index, current_index, - JSON::StringView{}); - } else { - CALLBACK_PRE_WITH_POSITION(Real, current_line, current_column, - JSON::ParseContext::Index, current_index, - JSON::StringView{}); - } - - frames.top().get().push_back(value); - - if (value.is_integer()) { - CALLBACK_POST(Integer); - } else if (value.is_decimal()) { - CALLBACK_POST(Decimal); - } else { - CALLBACK_POST(Real); - } - } else { - frames.top().get().push_back( - internal::parse_number(line, column, stream, character)); + case internal::token_number_minus: + case internal::token_number_zero: + case internal::token_number_one: + case internal::token_number_two: + case internal::token_number_three: + case internal::token_number_four: + case internal::token_number_five: + case internal::token_number_six: + case internal::token_number_seven: + case internal::token_number_eight: + case internal::token_number_nine: { + const auto number_start{ + static_cast(cursor - buffer_start - 1)}; + internal::scan_number(line, column, cursor, end, + character); + const auto number_length{ + static_cast(cursor - buffer_start - number_start)}; + tape.push_back({TapeType::Number, number_start, number_length, 0, + value_line, value_column}); + goto do_scan_array_item_separator; } - - goto do_parse_array_item_separator; - - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse_array_item; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse_array_item; - default: - goto error; + default: + [[unlikely]] throw JSONParseError(line, column); + } } -do_parse_array_item_separator: - assert(levels.top() == Container::Array); - column += 1; - character = static_cast(stream.get()); +do_scan_array_item_separator: + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) [[unlikely]] { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { + column += 1; + } + character = *cursor++; switch (character) { - // Positional - case internal::token_array_delimiter: - goto do_parse_array_item; - case internal::token_array_end: - CALLBACK_POST(Array); - goto do_parse_container_end; - - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse_array_item_separator; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse_array_item_separator; + case internal::token_array_delimiter: + goto do_scan_array_item; + case internal::token_array_end: { + assert(!container_stack.empty()); + auto &frame{container_stack.back()}; + tape[frame.tape_index].count = frame.child_count; + tape.push_back({TapeType::ArrayEnd, 0, 0, 0, line, column}); + container_stack.pop_back(); + goto do_scan_container_end; + } default: - goto error; + [[unlikely]] throw JSONParseError(line, column); } /* - * Parse an object + * Scan an object */ -do_parse_object: - if (levels.empty()) { - assert(levels.empty()); - assert(!result.has_value()); - levels.emplace(Container::Object); - result = std::make_optional(Result::make_object()); - frames.emplace(result.value()); - } else if (levels.top() == Container::Array) { - assert(result.has_value()); - levels.emplace(Container::Object); - assert(!frames.empty()); - assert(frames.top().get().is_array()); - frames.top().get().push_back(Result::make_object()); - frames.emplace(frames.top().get().back()); - } else if (levels.top() == Container::Object) { - assert(result.has_value()); - levels.emplace(Container::Object); - assert(!frames.empty()); - assert(frames.top().get().is_object()); - frames.top().get().assign(key, Result::make_object()); - frames.emplace(frames.top().get().at(key)); +do_scan_object: { + const auto start_index{tape.size()}; + tape.push_back({TapeType::ObjectStart, 0, 0, 0, line, column}); + container_stack.push_back({start_index, 0}); + + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) [[unlikely]] { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); } - // An object structure is represented as a pair of curly bracket tokens - // surrounding zero or more name/value pairs. A name is a string. A single - // colon token follows each name, separating the name from the value. A - // single comma token separates a value from a following name. See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf + if (*cursor == internal::token_object_end) { + if constexpr (TrackPositions) { + column += 1; + } + cursor++; + tape[start_index].count = 0; + tape.push_back({TapeType::ObjectEnd, 0, 0, 0, line, column}); + container_stack.pop_back(); + goto do_scan_container_end; + } -do_parse_object_property_key: - assert(levels.top() == Container::Object); - column += 1; - character = static_cast(stream.get()); - switch (character) { - case internal::token_object_end: - if (frames.top().get().empty()) { - CALLBACK_POST(Object); - goto do_parse_container_end; - } else { - goto error; - } + goto do_scan_object_key; +} - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - key_line = line; - key_column = column; - key = internal::parse_string(line, column, stream); - goto do_parse_object_property_separator; +do_scan_object_key: + assert(!container_stack.empty()); + container_stack.back().child_count++; - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse_object_property_key; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse_object_property_key; - default: - goto error; + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) [[unlikely]] { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); } - -do_parse_object_property_separator: - assert(levels.top() == Container::Object); - column += 1; - character = static_cast(stream.get()); + if constexpr (TrackPositions) { + column += 1; + } + character = *cursor++; switch (character) { - case internal::token_object_key_delimiter: - goto do_parse_object_property_value; - - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse_object_property_separator; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse_object_property_separator; + case internal::token_string_quote: { + const auto key_start{static_cast(cursor - buffer_start)}; + const auto key_line{line}; + const auto key_column{column}; + internal::scan_string(line, column, cursor, end); + const auto key_length{ + static_cast(cursor - buffer_start - key_start - 1)}; + tape.push_back( + {TapeType::Key, key_start, key_length, 0, key_line, key_column}); + goto do_scan_object_separator; + } default: - goto error; + [[unlikely]] throw JSONParseError(line, column); } -do_parse_object_property_value: - assert(levels.top() == Container::Object); - column += 1; - character = static_cast(stream.get()); +do_scan_object_separator: + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) [[unlikely]] { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { + column += 1; + } + character = *cursor++; switch (character) { - // Values - case internal::token_array_begin: - CALLBACK_PRE_WITH_POSITION(Array, key_line, key_column, - JSON::ParseContext::Property, 0, key); - goto do_parse_array; - case internal::token_object_begin: - CALLBACK_PRE_WITH_POSITION(Object, key_line, key_column, - JSON::ParseContext::Property, 0, key); - goto do_parse_object; - case internal::constant_true.front(): - CALLBACK_PRE_WITH_POSITION(Boolean, key_line, key_column, - JSON::ParseContext::Property, 0, key); - frames.top().get().assign( - key, internal::parse_boolean_true(line, column, stream)); - CALLBACK_POST(Boolean); - goto do_parse_object_property_end; - case internal::constant_false.front(): - CALLBACK_PRE_WITH_POSITION(Boolean, key_line, key_column, - JSON::ParseContext::Property, 0, key); - frames.top().get().assign( - key, internal::parse_boolean_false(line, column, stream)); - CALLBACK_POST(Boolean); - goto do_parse_object_property_end; - case internal::constant_null.front(): - CALLBACK_PRE_WITH_POSITION(Null, key_line, key_column, - JSON::ParseContext::Property, 0, key); - frames.top().get().assign(key, - internal::parse_null(line, column, stream)); - CALLBACK_POST(Null); - goto do_parse_object_property_end; - - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - CALLBACK_PRE_WITH_POSITION(String, key_line, key_column, - JSON::ParseContext::Property, 0, key); - frames.top().get().assign( - key, Result{internal::parse_string(line, column, stream)}); - CALLBACK_POST(String); - goto do_parse_object_property_end; - - case internal::token_number_minus: - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (callback) { - const auto value{ - internal::parse_number(line, column, stream, character)}; - if (value.is_integer()) { - CALLBACK_PRE_WITH_POSITION(Integer, key_line, key_column, - JSON::ParseContext::Property, 0, key); - } else if (value.is_decimal()) { - CALLBACK_PRE_WITH_POSITION(Decimal, key_line, key_column, - JSON::ParseContext::Property, 0, key); - } else { - CALLBACK_PRE_WITH_POSITION(Real, key_line, key_column, - JSON::ParseContext::Property, 0, key); - } + case internal::token_object_key_delimiter: + goto do_scan_object_value; + default: + [[unlikely]] throw JSONParseError(line, column); + } - frames.top().get().assign(key, value); +do_scan_object_value: + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) [[unlikely]] { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { + column += 1; + } + character = *cursor++; - if (value.is_integer()) { - CALLBACK_POST(Integer); - } else if (value.is_decimal()) { - CALLBACK_POST(Decimal); - } else { - CALLBACK_POST(Real); - } - } else { - frames.top().get().assign( - key, internal::parse_number(line, column, stream, character)); + { + const auto value_line{line}; + const auto value_column{column}; + switch (character) { + case internal::token_array_begin: + goto do_scan_array; + case internal::token_object_begin: + goto do_scan_object; + case internal::token_true: + internal::scan_true(line, column, cursor, end); + tape.push_back({TapeType::True, 0, 0, 0, value_line, value_column}); + goto do_scan_object_property_end; + case internal::token_false: + internal::scan_false(line, column, cursor, end); + tape.push_back({TapeType::False, 0, 0, 0, value_line, value_column}); + goto do_scan_object_property_end; + case internal::token_null: + internal::scan_null(line, column, cursor, end); + tape.push_back({TapeType::Null, 0, 0, 0, value_line, value_column}); + goto do_scan_object_property_end; + case internal::token_string_quote: { + const auto string_start{ + static_cast(cursor - buffer_start)}; + internal::scan_string(line, column, cursor, end); + const auto string_length{static_cast( + cursor - buffer_start - string_start - 1)}; + tape.push_back({TapeType::String, string_start, string_length, 0, + value_line, value_column}); + goto do_scan_object_property_end; } - - goto do_parse_object_property_end; - - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse_object_property_value; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse_object_property_value; - default: - goto error; + case internal::token_number_minus: + case internal::token_number_zero: + case internal::token_number_one: + case internal::token_number_two: + case internal::token_number_three: + case internal::token_number_four: + case internal::token_number_five: + case internal::token_number_six: + case internal::token_number_seven: + case internal::token_number_eight: + case internal::token_number_nine: { + const auto number_start{ + static_cast(cursor - buffer_start - 1)}; + internal::scan_number(line, column, cursor, end, + character); + const auto number_length{ + static_cast(cursor - buffer_start - number_start)}; + tape.push_back({TapeType::Number, number_start, number_length, 0, + value_line, value_column}); + goto do_scan_object_property_end; + } + default: + [[unlikely]] throw JSONParseError(line, column); + } } -do_parse_object_property_end: - assert(levels.top() == Container::Object); - column += 1; - character = static_cast(stream.get()); +do_scan_object_property_end: + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) [[unlikely]] { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { + column += 1; + } + character = *cursor++; switch (character) { - case internal::token_object_delimiter: - goto do_parse_object_property_key; - case internal::token_object_end: - CALLBACK_POST(Object); - goto do_parse_container_end; - - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse_object_property_end; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse_object_property_end; + case internal::token_object_delimiter: + goto do_scan_object_key; + case internal::token_object_end: { + assert(!container_stack.empty()); + auto &frame{container_stack.back()}; + tape[frame.tape_index].count = frame.child_count; + tape.push_back({TapeType::ObjectEnd, 0, 0, 0, line, column}); + container_stack.pop_back(); + goto do_scan_container_end; + } default: - goto error; - } - - /* - * Finish parsing a container - */ - -error: - // For some strange reason, with certain AppleClang versions, - // the program crashes when de-allocating huge array/objects - // before throwing an error. The error goes away if we manually - // reset every frame of the resulting object. Compiler error? - // Seen on Apple clang version 14.0.3 (clang-1403.0.22.14.1) - while (!frames.empty()) { - frames.top().get().into(Result{nullptr}); - frames.pop(); + [[unlikely]] throw JSONParseError(line, column); } - throw JSONParseError(line, column); - -do_parse_container_end: - assert(!levels.empty()); - if (levels.size() == 1) { - return result.value(); +do_scan_container_end: + if (container_stack.empty()) { + return; } - frames.pop(); - levels.pop(); - if (levels.top() == Container::Array) { - goto do_parse_array_item_separator; + if (tape[container_stack.back().tape_index].type == TapeType::ArrayStart) { + goto do_scan_array_item_separator; } else { - goto do_parse_object_property_end; + goto do_scan_object_property_end; } } // NOLINTEND(cppcoreguidelines-avoid-goto) -auto internal_parse_json( - const std::basic_string - &input, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback) -> JSON { - std::basic_istringstream> - stream{input}; - return internal_parse_json(stream, line, column, callback); -} - } // namespace sourcemeta::core -#undef CALLBACK_PRE -#undef CALLBACK_PRE_WITH_POSITION -#undef CALLBACK_POST - #endif diff --git a/vendor/core/src/core/json/stringify.h b/vendor/core/src/core/json/stringify.h index d3b03d3..c32dcca 100644 --- a/vendor/core/src/core/json/stringify.h +++ b/vendor/core/src/core/json/stringify.h @@ -6,6 +6,9 @@ #include "grammar.h" #include // std::transform, std::sort +#include // std::array +#include // assert +#include // std::to_chars #include // std::size_t #include // std::int64_t #include // std::setprecision @@ -13,7 +16,7 @@ #include // std::next, std::cbegin, std::cend, std::back_inserter #include // std::basic_ostream #include // std::ostringstream -#include // std::to_string +#include // std::basic_string #include // std::vector namespace sourcemeta::core::internal { @@ -61,11 +64,13 @@ auto stringify( const std::int64_t value, std::basic_ostream &stream) -> void { - const auto string{std::to_string(value)}; - stream.write(string.c_str(), + std::array buffer{}; + const auto [end_pointer, error_code] = + std::to_chars(buffer.data(), buffer.data() + buffer.size(), value); + stream.write(buffer.data(), static_cast::int_type>( - string.size())); + end_pointer - buffer.data())); } template