diff --git a/bench/resources/jdk17_suite/hash.jsonnet b/bench/resources/jdk17_suite/hash.jsonnet new file mode 100644 index 00000000..f22881c2 --- /dev/null +++ b/bench/resources/jdk17_suite/hash.jsonnet @@ -0,0 +1,15 @@ +local seed = 'sjsonnet-π-你好-🙂'; +local payload = std.repeat(seed, 4096); + +[ + std.md5(payload), + std.sha1(payload), + std.sha256(payload), + std.sha512(payload), + std.sha3(payload), + std.md5(payload + 'x'), + std.sha1(payload + 'x'), + std.sha256(payload + 'x'), + std.sha512(payload + 'x'), + std.sha3(payload + 'x'), +] diff --git a/bench/resources/jdk17_suite/hash.jsonnet.golden b/bench/resources/jdk17_suite/hash.jsonnet.golden new file mode 100644 index 00000000..7837ad1a --- /dev/null +++ b/bench/resources/jdk17_suite/hash.jsonnet.golden @@ -0,0 +1,12 @@ +[ + "165bc8753a6c3602e01e93e67f446dc1", + "03d65852d06704e568d463980671b2f83258a631", + "079fd3c2592dc4c9e33e9ac4ddfd3b718f7fa3967ce2c665e6cc1ca7f7ae59bb", + "a7c354161415d600561ebcd0ba827d2ed47e5a3b17505e67ce9f367841d586e67c983212d9f9e38c3d54e2fe8cc9c657204b138a15a714909afe8f33064cd518", + "6746867e50f7e0bf298dfec41831460c638827a57243d13d5ce240e362dee95b603abbaf4b0d3be2df447ecd773ba1bc3560049afea9b32493ead77b72002ac8", + "3d176bc0962944b43e15dfc5a23423d5", + "fade71afb8da391b3e2fe225817f88b1929c463f", + "b3eca3625fccdb13bb8fade19a9d7557a24163c7cf5ba8b60013d612074a9328", + "c5811f7c6e4c0b75b721212b58dfb567b5ecdf93f940308b51b33a9937d7c47548dee504999285a636600cfb2f22d3794afb856732ca7268c775cabdf81bda5e", + "3aacea0bd0120dcae41c98ecbcef3d9f5fa15ebcd25e524a6f183ef5757dea87908298a568b513cde81f036bffe8b88db629d50328eb61605c5b8fdf6958c7e0" +] diff --git a/bench/resources/jdk17_suite/repeat_format.jsonnet b/bench/resources/jdk17_suite/repeat_format.jsonnet new file mode 100644 index 00000000..efffab50 --- /dev/null +++ b/bench/resources/jdk17_suite/repeat_format.jsonnet @@ -0,0 +1,19 @@ +local values = std.range(0, 512); +local words = std.makeArray(128, function(i) 'w' + i); +local repeated = std.repeat('ab', 4096); +local decimal = std.join('|', ['%08d' % v for v in values]); +local hex = std.join('|', ['%010x' % v for v in values]); +local left = std.join('|', ['%-20s' % w for w in words]); +local right = std.join('|', ['%20s' % w for w in words]); + +{ + repeatedLen: std.length(repeated), + decimalLen: std.length(decimal), + decimalTail: std.substr(decimal, std.length(decimal) - 8, 8), + hexLen: std.length(hex), + hexTail: std.substr(hex, std.length(hex) - 10, 10), + leftLen: std.length(left), + leftTail: std.substr(left, std.length(left) - 20, 20), + rightLen: std.length(right), + rightTail: std.substr(right, std.length(right) - 20, 20), +} diff --git a/bench/resources/jdk17_suite/repeat_format.jsonnet.golden b/bench/resources/jdk17_suite/repeat_format.jsonnet.golden new file mode 100644 index 00000000..588251b1 --- /dev/null +++ b/bench/resources/jdk17_suite/repeat_format.jsonnet.golden @@ -0,0 +1,11 @@ +{ + "decimalLen": 4616, + "decimalTail": "00000512", + "hexLen": 5642, + "hexTail": "0000000200", + "leftLen": 2687, + "leftTail": "w127 ", + "repeatedLen": 8192, + "rightLen": 2687, + "rightTail": " w127" +} diff --git a/bench/resources/jdk17_suite/split_resolve.jsonnet b/bench/resources/jdk17_suite/split_resolve.jsonnet new file mode 100644 index 00000000..4cf8047f --- /dev/null +++ b/bench/resources/jdk17_suite/split_resolve.jsonnet @@ -0,0 +1,18 @@ +local parts = std.makeArray(512, function(i) 'segment' + i); +local joined = std.join('::', parts); +local files = std.makeArray(512, function(i) 'a/b/c/file' + i + '.jsonnet'); +local split = std.split(joined, '::'); +local splitLimit = std.splitLimit(joined, '::', 128); +local splitLimitR = std.splitLimitR(joined, '::', 128); +local resolved = std.join('|', [std.resolvePath(f, 'libsonnet/main.libsonnet') for f in files]); + +{ + splitLen: std.length(split), + splitJoinLen: std.length(std.join('', split)), + splitLimitLen: std.length(splitLimit), + splitLimitLastLen: std.length(splitLimit[128]), + splitLimitRLen: std.length(splitLimitR), + splitLimitRFirstLen: std.length(splitLimitR[0]), + resolveLen: std.length(resolved), + resolveTail: std.substr(resolved, std.length(resolved) - 30, 30), +} diff --git a/bench/resources/jdk17_suite/split_resolve.jsonnet.golden b/bench/resources/jdk17_suite/split_resolve.jsonnet.golden new file mode 100644 index 00000000..fe300b11 --- /dev/null +++ b/bench/resources/jdk17_suite/split_resolve.jsonnet.golden @@ -0,0 +1,10 @@ +{ + "resolveLen": 15871, + "resolveTail": "a/b/c/libsonnet/main.libsonnet", + "splitJoinLen": 5010, + "splitLen": 512, + "splitLimitLastLen": 4606, + "splitLimitLen": 129, + "splitLimitRFirstLen": 4496, + "splitLimitRLen": 129 +} diff --git a/build.mill b/build.mill index 3eb2b60a..58350b72 100644 --- a/build.mill +++ b/build.mill @@ -111,6 +111,7 @@ object bench extends ScalaModule with JmhModule with ScalafmtModule { this.moduleDir / "resources" / "bug_suite", this.moduleDir / "resources" / "cpp_suite", this.moduleDir / "resources" / "go_suite", + this.moduleDir / "resources" / "jdk17_suite", this.moduleDir / "resources" / "sjsonnet_suite" ) diff --git a/sjsonnet/src-js/sjsonnet/Platform.scala b/sjsonnet/src-js/sjsonnet/Platform.scala index 22cfb9f0..e35a79e2 100644 --- a/sjsonnet/src-js/sjsonnet/Platform.scala +++ b/sjsonnet/src-js/sjsonnet/Platform.scala @@ -7,6 +7,22 @@ import java.util.regex.Pattern import scala.collection.mutable object Platform { + private def repeatCapacity(s: String, count: Int): Int = + if (count > 0 && s.length <= Int.MaxValue / count) s.length * count else 0 + + def repeatString(s: String, count: Int): String = { + if (count <= 0 || s.isEmpty) "" + else { + val builder = new StringBuilder(repeatCapacity(s, count)) + var i = 0 + while (i < count) { + builder.append(s) + i += 1 + } + builder.toString() + } + } + private def nodeToJson(node: Node): ujson.Value = node match { case _: Node.ScalarNode => YamlDecoder.forAny.construct(node).getOrElse("") match { diff --git a/sjsonnet/src-jvm-native/sjsonnet/SjsonnetMainBase.scala b/sjsonnet/src-jvm-native/sjsonnet/SjsonnetMainBase.scala index 21cd1703..ad31d4c5 100644 --- a/sjsonnet/src-jvm-native/sjsonnet/SjsonnetMainBase.scala +++ b/sjsonnet/src-jvm-native/sjsonnet/SjsonnetMainBase.scala @@ -353,10 +353,10 @@ object SjsonnetMainBase { codeFiles: Seq[String], wd: os.Path): Map[String, String] = { - def split(s: String) = s.split("=", 2) match { - case Array(x) => (x, System.getenv(x)) - case Array(x, v) => (x, v) - case _ => throw new IllegalArgumentException("invalid binding: " + s) + def split(s: String) = { + val idx = s.indexOf('=') + if (idx < 0) (s, System.getenv(s)) + else (s.substring(0, idx), s.substring(idx + 1)) } def splitMap(s: Seq[String], f: String => String) = diff --git a/sjsonnet/src-jvm/sjsonnet/Platform.scala b/sjsonnet/src-jvm/sjsonnet/Platform.scala index 2bc28595..db6e81b7 100644 --- a/sjsonnet/src-jvm/sjsonnet/Platform.scala +++ b/sjsonnet/src-jvm/sjsonnet/Platform.scala @@ -1,8 +1,10 @@ package sjsonnet import java.io.{BufferedInputStream, ByteArrayOutputStream, File, FileInputStream} +import java.nio.charset.StandardCharsets.UTF_8 import java.util import java.util.Base64 +import java.util.HexFormat import java.util.zip.GZIPOutputStream import com.google.re2j.Pattern import net.jpountz.xxhash.{StreamingXXHash64, XXHashFactory} @@ -16,6 +18,11 @@ import scala.collection.mutable import scala.jdk.CollectionConverters.* object Platform { + private val hexFormat = HexFormat.of() + + def repeatString(s: String, count: Int): String = + if (count <= 0) "" else s.repeat(count) + def gzipBytes(b: Array[Byte]): String = { val outputStream: ByteArrayOutputStream = new ByteArrayOutputStream(b.length) val gzip: GZIPOutputStream = new GZIPOutputStream(outputStream) @@ -29,7 +36,7 @@ object Platform { } def gzipString(s: String): String = { - gzipBytes(s.getBytes()) + gzipBytes(s.getBytes(UTF_8)) } /** @@ -50,7 +57,7 @@ object Platform { } def xzString(s: String, compressionLevel: Option[Int]): String = { - xzBytes(s.getBytes(), compressionLevel) + xzBytes(s.getBytes(UTF_8), compressionLevel) } private def nodeToJson(node: Any): ujson.Value = node match { @@ -105,13 +112,10 @@ object Platform { } } - private def computeHash(algorithm: String, s: String) = { - java.security.MessageDigest - .getInstance(algorithm) - .digest(s.getBytes("UTF-8")) - .map { b => String.format("%02x", (b & 0xff).asInstanceOf[Integer]) } - .mkString - } + private def computeHash(algorithm: String, s: String): String = + hexFormat.formatHex( + java.security.MessageDigest.getInstance(algorithm).digest(s.getBytes(UTF_8)) + ) def md5(s: String): String = computeHash("MD5", s) diff --git a/sjsonnet/src-native/sjsonnet/Platform.scala b/sjsonnet/src-native/sjsonnet/Platform.scala index 8ea3be50..26179bb1 100644 --- a/sjsonnet/src-native/sjsonnet/Platform.scala +++ b/sjsonnet/src-native/sjsonnet/Platform.scala @@ -1,6 +1,7 @@ package sjsonnet import java.io.{ByteArrayOutputStream, File} +import java.nio.charset.StandardCharsets.UTF_8 import java.util import java.util.Base64 import java.util.zip.GZIPOutputStream @@ -9,6 +10,24 @@ import scala.collection.mutable import org.virtuslab.yaml.* object Platform { + private val hexChars = "0123456789abcdef".toCharArray + + private def repeatCapacity(s: String, count: Int): Int = + if (count > 0 && s.length <= Int.MaxValue / count) s.length * count else 0 + + def repeatString(s: String, count: Int): String = { + if (count <= 0 || s.isEmpty) "" + else { + val builder = new StringBuilder(repeatCapacity(s, count)) + var i = 0 + while (i < count) { + builder.append(s) + i += 1 + } + builder.toString() + } + } + def gzipBytes(b: Array[Byte]): String = { val outputStream: ByteArrayOutputStream = new ByteArrayOutputStream(b.length) val gzip: GZIPOutputStream = new GZIPOutputStream(outputStream) @@ -22,7 +41,7 @@ object Platform { } def gzipString(s: String): String = { - gzipBytes(s.getBytes()) + gzipBytes(s.getBytes(UTF_8)) } def xzBytes(s: Array[Byte], compressionLevel: Option[Int]): String = { @@ -140,14 +159,23 @@ object Platform { result.mkString("\n") } - private def computeHash(algorithm: String, s: String) = { - java.security.MessageDigest - .getInstance(algorithm) - .digest(s.getBytes("UTF-8")) - .map { b => String.format("%02x", (b & 0xff).asInstanceOf[Integer]) } - .mkString + private def bytesToHex(bytes: Array[Byte]): String = { + val out = new Array[Char](bytes.length * 2) + var i = 0 + var j = 0 + while (i < bytes.length) { + val b = bytes(i) & 0xff + out(j) = hexChars(b >>> 4) + out(j + 1) = hexChars(b & 0x0f) + i += 1 + j += 2 + } + new String(out) } + private def computeHash(algorithm: String, s: String): String = + bytesToHex(java.security.MessageDigest.getInstance(algorithm).digest(s.getBytes(UTF_8))) + def md5(s: String): String = computeHash("MD5", s) def sha1(s: String): String = computeHash("SHA-1", s) diff --git a/sjsonnet/src/sjsonnet/DecimalFormat.scala b/sjsonnet/src/sjsonnet/DecimalFormat.scala index c8760427..112555f5 100644 --- a/sjsonnet/src/sjsonnet/DecimalFormat.scala +++ b/sjsonnet/src/sjsonnet/DecimalFormat.scala @@ -23,7 +23,7 @@ object DecimalFormat { val sign = if (n < 0) "-" else "" val absN = math.abs(n) val nWidth = if (absN == 0) 1 else Math.log10(absN.toDouble).toInt + 1 - sign + "0" * (targetWidth - nWidth) + absN + sign + Platform.repeatString("0", targetWidth - nWidth) + absN } private def rightPad(n0: Long, minWidth: Int, maxWidth: Int): String = { @@ -32,7 +32,7 @@ object DecimalFormat { val n = (n0 / Math.pow(10, trailingZeroes(n0))).toInt assert(n == math.abs(n)) val nWidth = if (n == 0) 1 else Math.log10(n).toInt + 1 - ("" + n + "0" * (minWidth - nWidth)).take(maxWidth) + ("" + n + Platform.repeatString("0", minWidth - nWidth)).take(maxWidth) } } diff --git a/sjsonnet/src/sjsonnet/Format.scala b/sjsonnet/src/sjsonnet/Format.scala index 8d83afa1..212e2600 100644 --- a/sjsonnet/src/sjsonnet/Format.scala +++ b/sjsonnet/src/sjsonnet/Format.scala @@ -254,14 +254,14 @@ object Format { if (lhs2.isEmpty && mhs.isEmpty) rhs else if (lhs2.isEmpty) mhs + rhs else lhs2 + mhs + rhs - } else if (formatted.zeroPadded) { - if (numeric) lhs2 + mhs + "0" * missingWidth + rhs - else { - if (formatted.leftAdjusted) lhs2 + mhs + rhs + " " * missingWidth - else " " * missingWidth + lhs2 + mhs + rhs - } - } else if (formatted.leftAdjusted) lhs2 + mhs + rhs + " " * missingWidth - else " " * missingWidth + lhs2 + mhs + rhs + } else { + val padding = + if (formatted.zeroPadded && numeric) Platform.repeatString("0", missingWidth) + else Platform.repeatString(" ", missingWidth) + if (formatted.zeroPadded && numeric) lhs2 + mhs + padding + rhs + else if (formatted.leftAdjusted) lhs2 + mhs + rhs + padding + else padding + lhs2 + mhs + rhs + } } def format(s: String, values0: Val, pos: Position)(implicit evaluator: EvalScope): String = { @@ -929,7 +929,7 @@ object Format { if (precision == FormatSpec.NoNumber) rhs else { val shortage = precision - rhs.length - if (shortage > 0) "0" * shortage + rhs else rhs + if (shortage > 0) Platform.repeatString("0", shortage) + rhs else rhs } } diff --git a/sjsonnet/src/sjsonnet/stdlib/ArrayModule.scala b/sjsonnet/src/sjsonnet/stdlib/ArrayModule.scala index 1f578996..5e06a7a0 100644 --- a/sjsonnet/src/sjsonnet/stdlib/ArrayModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/ArrayModule.scala @@ -861,13 +861,7 @@ object ArrayModule extends AbstractFunctionModule { } val res: Val = what match { case Val.Str(_, str) => - val builder = new StringBuilder(str.length * count) - var i = 0 - while (i < count) { - builder.append(str) - i += 1 - } - Val.Str(pos, builder.toString()) + Val.Str(pos, Platform.repeatString(str, count)) case a: Val.Arr => if (a.length.toLong * count.toLong > Int.MaxValue) Error.fail("array too large", pos)(ev) diff --git a/sjsonnet/src/sjsonnet/stdlib/EncodingModule.scala b/sjsonnet/src/sjsonnet/stdlib/EncodingModule.scala index f2375440..f6545648 100644 --- a/sjsonnet/src/sjsonnet/stdlib/EncodingModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/EncodingModule.scala @@ -5,8 +5,6 @@ import java.nio.charset.StandardCharsets.UTF_8 import sjsonnet._ import sjsonnet.functions.AbstractFunctionModule -import java.nio.charset.StandardCharsets.UTF_8 - /** * Native implementations for Jsonnet standard-library entries in this module. * diff --git a/sjsonnet/src/sjsonnet/stdlib/StringModule.scala b/sjsonnet/src/sjsonnet/stdlib/StringModule.scala index defd7b19..707d2c72 100644 --- a/sjsonnet/src/sjsonnet/stdlib/StringModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/StringModule.scala @@ -465,21 +465,16 @@ object StringModule extends AbstractFunctionModule { val b = new mutable.ArrayBuilder.ofRef[Eval] if (maxSplits >= 0 && maxSplits < Int.MaxValue) b.sizeHint(maxSplits + 1) var sz = 0 - var i = 0 var start = 0 + var next = if (maxSplits == 0) -1 else str.indexOf(cStr, start) - while (i <= str.length - cStr.length && (maxSplits < 0 || sz < maxSplits)) { - if (str.startsWith(cStr, i)) { - val finalStr = Val.Str(pos, str.substring(start, i)) - b.+=(finalStr) - start = i + cStr.length - sz += 1 - i += cStr.length - } else { - i += 1 - } + while (next >= 0 && (maxSplits < 0 || sz < maxSplits)) { + b += Val.Str(pos, str.substring(start, next)) + start = next + cStr.length + sz += 1 + next = if (maxSplits >= 0 && sz >= maxSplits) -1 else str.indexOf(cStr, start) } - b.+=(Val.Str(pos, str.substring(start))) + b += Val.Str(pos, str.substring(start)) sz += 1 b.result() } @@ -787,21 +782,9 @@ object StringModule extends AbstractFunctionModule { builtin(SplitLimit), builtin(SplitLimitR), builtin("resolvePath", "f", "r") { (_, _, f: String, r: String) => - val parts = f.split("/", -1) - val prefixCount = parts.length - 1 - if (prefixCount <= 0) r - else { - val out = new java.lang.StringBuilder(f.length + r.length) - var i = 0 - while (i < prefixCount) { - if (i > 0) out.append('/') - out.append(parts(i)) - i += 1 - } - out.append('/') - out.append(r) - out.toString - } + val slash = f.lastIndexOf('/') + if (slash < 0) r + else f.substring(0, slash + 1) + r }, builtin(StringChars), builtin(ParseInt), diff --git a/sjsonnet/test/resources/new_test_suite/jdk17_hash_directional-jvm.jsonnet b/sjsonnet/test/resources/new_test_suite/jdk17_hash_directional-jvm.jsonnet new file mode 100644 index 00000000..2125bd14 --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/jdk17_hash_directional-jvm.jsonnet @@ -0,0 +1,5 @@ +// JVM/Native-only directional hash vectors. Scala.js intentionally does not implement these APIs. + +local s = "sjsonnet-π-你好-🙂"; + +std.assertEqual(std.sha256(s), "d42b7a53590c05dc117ea4cfc8dd0a52670e125ee505dc9a280205081a560d84") diff --git a/sjsonnet/test/resources/new_test_suite/jdk17_hash_directional-jvm.jsonnet.golden b/sjsonnet/test/resources/new_test_suite/jdk17_hash_directional-jvm.jsonnet.golden new file mode 100644 index 00000000..27ba77dd --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/jdk17_hash_directional-jvm.jsonnet.golden @@ -0,0 +1 @@ +true diff --git a/sjsonnet/test/resources/new_test_suite/jdk17_shared_directional.jsonnet b/sjsonnet/test/resources/new_test_suite/jdk17_shared_directional.jsonnet new file mode 100644 index 00000000..d4d0cbdd --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/jdk17_shared_directional.jsonnet @@ -0,0 +1,14 @@ +// Directional tests for platform-specialized string helpers. +// These assert exact outputs, not only roundtrips, so consistently-wrong implementations fail. + +std.assertEqual(std.repeat("é", 3), "ééé") && + +std.assertEqual("%-5s" % "x", "x ") && +std.assertEqual("%010x" % 255, "00000000ff") && + +std.assertEqual(std.split("a::::b::", "::"), ["a", "", "b", ""]) && +std.assertEqual(std.split("aaaa", "aa"), ["", "", ""]) && +std.assertEqual(std.splitLimit("a::b::c", "::", 0), ["a::b::c"]) && +std.assertEqual(std.splitLimitR("a::b::c", "::", 1), ["a::b", "c"]) && + +std.assertEqual(std.resolvePath("a/b/", "d.libsonnet"), "a/b/d.libsonnet") diff --git a/sjsonnet/test/resources/new_test_suite/jdk17_shared_directional.jsonnet.golden b/sjsonnet/test/resources/new_test_suite/jdk17_shared_directional.jsonnet.golden new file mode 100644 index 00000000..27ba77dd --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/jdk17_shared_directional.jsonnet.golden @@ -0,0 +1 @@ +true