From 6758e7a39bee056bb0cd4f2be615e64c277ad3d2 Mon Sep 17 00:00:00 2001 From: Stephen Amar Date: Fri, 1 May 2026 20:39:43 +0000 Subject: [PATCH 1/5] feat: async import loader for JS via static preload (#476) Adds Preloader + ImportFinder to discover imports from each file's AST and drive caller-controlled (async) loading before synchronous evaluation. Exposes SjsonnetMain.interpretAsync for the JS build, accepting a Promise-returning loader so callers can use fetch / FileReader / etc. Co-authored-by: Isaac --- readme.md | 24 ++ sjsonnet/src-js/sjsonnet/SjsonnetMain.scala | 230 ++++++++++++------ sjsonnet/src/sjsonnet/ImportFinder.scala | 57 +++++ sjsonnet/src/sjsonnet/Preloader.scala | 132 ++++++++++ .../src/sjsonnet/stdlib/EncodingModule.scala | 2 - .../src-js/sjsonnet/InterpretAsyncTests.scala | 107 ++++++++ .../test/src/sjsonnet/PreloaderTests.scala | 127 ++++++++++ 7 files changed, 608 insertions(+), 71 deletions(-) create mode 100644 sjsonnet/src/sjsonnet/ImportFinder.scala create mode 100644 sjsonnet/src/sjsonnet/Preloader.scala create mode 100644 sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala create mode 100644 sjsonnet/test/src/sjsonnet/PreloaderTests.scala diff --git a/readme.md b/readme.md index 09fad1a93..3e73e157a 100644 --- a/readme.md +++ b/readme.md @@ -125,6 +125,30 @@ filesystem, you have to provide an explicit import callback that you can use to resolve imports yourself (whether through Node's `fs` module, or by emulating a filesystem in-memory) +#### Async imports + +`SjsonnetMain.interpret` is synchronous, which is awkward in browsers where +files come from `fetch` or `FileReader`. Use `SjsonnetMain.interpretAsync` +instead: the loader returns a `Promise` and the call returns a `Promise` of +the result. Imports are statically discovered from each parsed file's AST, +loaded concurrently, then evaluated synchronously against the populated cache. + +```javascript +const result = await SjsonnetMain.interpretAsync( + "local lib = import 'lib.libsonnet'; lib.greet('world')", + {}, // extVars + {}, // tlaVars + "", // initial working directory + (wd, imported) => imported, // resolver, same shape as `interpret` + // loader: returns a Promise of the file contents (string for `import` / + // `importstr`, or bytes for `importbin`) + async (path, binary) => { + const response = await fetch("/files/" + path); + return binary ? new Uint8Array(await response.arrayBuffer()) : await response.text(); + } +); +``` + ### Running deeply recursive Jsonnet programs The depth of recursion is limited by running environment stack size. You can run Sjsonnet with increased diff --git a/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala b/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala index f96a6cea3..23d7e5c7f 100644 --- a/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala +++ b/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala @@ -3,7 +3,11 @@ package sjsonnet import sjsonnet.stdlib.NativeRegex import scala.collection.mutable +import scala.concurrent.Future +import scala.scalajs.concurrent.JSExecutionContext.Implicits.queue import scala.scalajs.js +import scala.scalajs.js.JSConverters._ +import scala.scalajs.js.Thenable.Implicits._ import scala.scalajs.js.annotation.{JSExport, JSExportTopLevel} import scala.scalajs.js.typedarray.{ArrayBuffer, Int8Array, Uint8Array} @@ -49,6 +53,78 @@ object SjsonnetMain { case _ => None } + /** Convert the value returned by a JS import loader into a [[ResolvedFile]]. */ + private def toResolvedFile(path: String, value: Any, binaryData: Boolean): ResolvedFile = + value match { + case s: String => StaticResolvedFile(s) + case arr: Array[Byte] => StaticBinaryResolvedFile(arr) + case other => + toBytesFromJs(other) match { + case Some(bytes) => StaticBinaryResolvedFile(bytes) + case None => + val msg = + s"Import loader for '$path' must return a string or byte array, got: ${ + if (other == null) "null" else other.getClass.getName + }" + js.Dynamic.global.console.error(msg) + throw js.JavaScriptException(msg) + } + } + + /** Build the parent importer used during preload (only its `resolve` is called). */ + private def jsResolveImporter(importResolver: js.Function2[String, String, String]): Importer = + new Importer { + def resolve(docBase: Path, importName: String): Option[Path] = + importResolver(docBase.asInstanceOf[JsVirtualPath].path, importName) match { + case null => None + case s => Some(JsVirtualPath(s)) + } + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = + throw new RuntimeException( + s"Importer.read should not be called during async preload (path=$path)" + ) + } + + private def parseStringMap(label: String, value: js.Any): Map[String, String] = + try { + ujson.WebJson.transform(value, ujson.Value).obj.toMap.map { + case (k, ujson.Str(v)) => (k, v) + case (k, _) => + throw js.JavaScriptException(s"$label '$k' must be a string value, got non-string") + } + } catch { + case e: js.JavaScriptException => throw e + case e: Exception => + val msg = s"Failed to parse ${label.toLowerCase}: ${e.getMessage}" + js.Dynamic.global.console.error(msg, e.asInstanceOf[js.Any]) + throw js.JavaScriptException(msg) + } + + private def runInterpret( + text: String, + parsedExtVars: Map[String, String], + parsedTlaVars: Map[String, String], + wd0: String, + importer: Importer, + preserveOrder: Boolean): js.Any = { + val interp = new Interpreter( + parsedExtVars, + parsedTlaVars, + JsVirtualPath(wd0), + importer, + parseCache = new DefaultParseCache, + settings = new Settings(preserveOrder = preserveOrder), + std = + new sjsonnet.stdlib.StdLibModule(nativeFunctions = Map.from(NativeRegex.functions)).module + ) + interp.interpret0(text, JsVirtualPath("(memory)"), ujson.WebJson.Builder) match { + case Left(msg) => + js.Dynamic.global.console.error("Sjsonnet evaluation error:", msg) + throw js.JavaScriptException(msg) + case Right(v) => v + } + } + @JSExport def interpret( text: String, @@ -59,85 +135,101 @@ object SjsonnetMain { importLoader: js.Function2[String, Boolean, Any], preserveOrder: Boolean = false): js.Any = { try { - val parsedExtVars = - try { - ujson.WebJson.transform(extVars, ujson.Value).obj.toMap.map { - case (k, ujson.Str(v)) => (k, v) - case (k, _) => - throw js.JavaScriptException( - s"External variable '$k' must be a string value, got non-string" - ) + val parsedExtVars = parseStringMap("External variable", extVars) + val parsedTlaVars = parseStringMap("Top-level argument", tlaVars) + + val importer = new Importer { + def resolve(docBase: Path, importName: String): Option[Path] = + importResolver(docBase.asInstanceOf[JsVirtualPath].path, importName) match { + case null => None + case s => Some(JsVirtualPath(s)) } - } catch { - case e: js.JavaScriptException => throw e - case e: Exception => - val msg = s"Failed to parse external variables: ${e.getMessage}" - js.Dynamic.global.console.error(msg, e.asInstanceOf[js.Any]) - throw js.JavaScriptException(msg) - } + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = + Some( + toResolvedFile( + path.asInstanceOf[JsVirtualPath].path, + importLoader(path.asInstanceOf[JsVirtualPath].path, binaryData), + binaryData + ) + ) + } - val parsedTlaVars = - try { - ujson.WebJson.transform(tlaVars, ujson.Value).obj.toMap.map { - case (k, ujson.Str(v)) => (k, v) - case (k, _) => - throw js.JavaScriptException( - s"Top-level argument '$k' must be a string value, got non-string" - ) + runInterpret(text, parsedExtVars, parsedTlaVars, wd0, importer, preserveOrder) + } catch { + case e: js.JavaScriptException => throw e + case e: Exception => + val msg = s"Sjsonnet internal error: ${e.getClass.getName}: ${e.getMessage}" + js.Dynamic.global.console.error(msg, e.asInstanceOf[js.Any]) + throw js.JavaScriptException(msg) + } + } + + /** + * Async variant of [[interpret]]. Accepts an `importLoader` that returns a `Promise` of the file + * contents, and returns a `Promise` resolving to the rendered output. + * + * Implementation: imports are statically discovered by parsing each file's AST, loaded + * concurrently via the user-supplied async loader, and inserted into a cache. Once the transitive + * closure is loaded, evaluation runs synchronously against the cache. + */ + @JSExport + def interpretAsync( + text: String, + extVars: js.Any, + tlaVars: js.Any, + wd0: String, + importResolver: js.Function2[String, String, String], + importLoader: js.Function2[String, Boolean, js.Promise[Any]], + preserveOrder: Boolean = false): js.Promise[js.Any] = { + try { + val parsedExtVars = parseStringMap("External variable", extVars) + val parsedTlaVars = parseStringMap("Top-level argument", tlaVars) + + val parentImporter = jsResolveImporter(importResolver) + val preloader = new Preloader(parentImporter) + val entryPath = JsVirtualPath("(memory)") + + preloader.add(entryPath, StaticResolvedFile(text), ImportFinder.Kind.Code) match { + case Left(err) => return js.Promise.reject(err.getMessage) + case Right(_) => + } + + def loadOne(p: Preloader.Pending): Future[Unit] = { + val pathStr = p.path.asInstanceOf[JsVirtualPath].path + val promise = importLoader(pathStr, p.binaryData) + // implicit Thenable.Implicits converts Promise[Any] to Future[Any] + (promise: Future[Any]).map { value => + val resolved = toResolvedFile(pathStr, value, p.binaryData) + preloader.add(p.path, resolved, p.kind) match { + case Left(err) => throw js.JavaScriptException(err.getMessage) + case Right(_) => () } - } catch { - case e: js.JavaScriptException => throw e - case e: Exception => - val msg = s"Failed to parse top-level arguments: ${e.getMessage}" - js.Dynamic.global.console.error(msg, e.asInstanceOf[js.Any]) - throw js.JavaScriptException(msg) } + } - val interp = new Interpreter( - parsedExtVars, - parsedTlaVars, - JsVirtualPath(wd0), - new Importer { - def resolve(docBase: Path, importName: String): Option[Path] = - importResolver(docBase.asInstanceOf[JsVirtualPath].path, importName) match { - case null => None - case s => Some(JsVirtualPath(s)) - } - def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = - importLoader(path.asInstanceOf[JsVirtualPath].path, binaryData) match { - case s: String => Some(StaticResolvedFile(s)) - case arr: Array[Byte] => Some(StaticBinaryResolvedFile(arr)) - case other => - // Handle JS-native binary types: Uint8Array, ArrayBuffer, or plain JS number[] - toBytesFromJs(other) match { - case Some(bytes) => Some(StaticBinaryResolvedFile(bytes)) - case None => - val msg = - s"Import loader for '${path}' must return a string or byte array, got: ${ - if (other == null) "null" else other.getClass.getName - }" - js.Dynamic.global.console.error(msg) - throw js.JavaScriptException(msg) - } - } - }, - parseCache = new DefaultParseCache, - settings = new Settings(preserveOrder = preserveOrder), - std = - new sjsonnet.stdlib.StdLibModule(nativeFunctions = Map.from(NativeRegex.functions)).module - ) - interp.interpret0(text, JsVirtualPath("(memory)"), ujson.WebJson.Builder) match { - case Left(msg) => - js.Dynamic.global.console.error("Sjsonnet evaluation error:", msg) - throw js.JavaScriptException(msg) - case Right(v) => v + def loop(): Future[Unit] = { + val batch = preloader.takePendingImports() + if (batch.isEmpty) Future.successful(()) + else Future.sequence(batch.map(loadOne)).flatMap(_ => loop()) } + + val result: Future[js.Any] = loop().map { _ => + runInterpret( + text, + parsedExtVars, + parsedTlaVars, + wd0, + preloader.importer, + preserveOrder + ) + } + result.toJSPromise } catch { - case e: js.JavaScriptException => throw e + case e: js.JavaScriptException => js.Promise.reject(e.exception) case e: Exception => val msg = s"Sjsonnet internal error: ${e.getClass.getName}: ${e.getMessage}" js.Dynamic.global.console.error(msg, e.asInstanceOf[js.Any]) - throw js.JavaScriptException(msg) + js.Promise.reject(msg) } } } diff --git a/sjsonnet/src/sjsonnet/ImportFinder.scala b/sjsonnet/src/sjsonnet/ImportFinder.scala new file mode 100644 index 000000000..501ad9489 --- /dev/null +++ b/sjsonnet/src/sjsonnet/ImportFinder.scala @@ -0,0 +1,57 @@ +package sjsonnet + +import scala.collection.mutable + +/** + * Walks an [[Expr]] AST collecting all `import`, `importstr`, and `importbin` expressions. Used by + * [[Preloader]] to discover the transitive set of files that need to be loaded before evaluation. + */ +object ImportFinder { + + sealed trait Kind { + + /** + * Whether the file should be read as raw bytes (`importbin`) vs. text (`import`/`importstr`). + */ + def binaryData: Boolean + + /** Whether the loaded file is itself Jsonnet code that may contain further imports. */ + def isCode: Boolean + } + + object Kind { + case object Code extends Kind { + def binaryData: Boolean = false + def isCode: Boolean = true + } + case object Str extends Kind { + def binaryData: Boolean = false + def isCode: Boolean = false + } + case object Bin extends Kind { + def binaryData: Boolean = true + def isCode: Boolean = false + } + } + + final case class Found(value: String, kind: Kind) + + def collect(expr: Expr): Seq[Found] = { + val buf = mutable.ArrayBuffer.empty[Found] + val walker = new Walker(buf) + walker.transform(expr) + buf.toSeq + } + + private class Walker(buf: mutable.ArrayBuffer[Found]) extends ExprTransform { + override def transform(expr: Expr): Expr = { + expr match { + case Expr.Import(_, v) => buf += Found(v, Kind.Code) + case Expr.ImportStr(_, v) => buf += Found(v, Kind.Str) + case Expr.ImportBin(_, v) => buf += Found(v, Kind.Bin) + case _ => + } + rec(expr) + } + } +} diff --git a/sjsonnet/src/sjsonnet/Preloader.scala b/sjsonnet/src/sjsonnet/Preloader.scala new file mode 100644 index 000000000..b1d72ab8e --- /dev/null +++ b/sjsonnet/src/sjsonnet/Preloader.scala @@ -0,0 +1,132 @@ +package sjsonnet + +import fastparse.Parsed + +import scala.collection.mutable + +/** + * Drives asynchronous (or otherwise externally-controlled) loading of imports by statically + * discovering them ahead of evaluation. + * + * Jsonnet has no dynamic imports: every `import`, `importstr`, or `importbin` expression has a + * literal string path. So given the parsed AST of an entry file we can enumerate its imports, load + * them, parse the loaded code files for further imports, and repeat until the closure is known. + * Once all files are in the cache, normal synchronous evaluation can run. + * + * Usage (pseudo-code): + * {{{ + * val preloader = new Preloader(parentImporter) + * preloader.add(entryPath, StaticResolvedFile(entryText), Preloader.EntryKind) + * while (preloader.pendingImports.nonEmpty) { + * val batch = preloader.takePendingImports() + * for (p <- batch) { + * val content = await asyncLoad(p.path, p.binaryData) // platform-specific async + * preloader.add(p.path, content, p.kind) + * } + * } + * val interpreter = new Interpreter(..., importer = preloader.importer, ...) + * interpreter.interpret(entryText, entryPath) + * }}} + * + * @param parentImporter + * used only to resolve import names to [[Path]]s. Its `read` is never called. + * @param settings + * parser settings (recursion depth, etc.). + */ +class Preloader(parentImporter: Importer, settings: Settings = Settings.default) { + + private val internedStrings = new mutable.HashMap[String, String] + private val internedFieldSets = + new mutable.HashMap[Val.StaticObjectFieldSet, java.util.LinkedHashMap[ + String, + java.lang.Boolean + ]] + + private val cache = mutable.LinkedHashMap.empty[Path, ResolvedFile] + private val seen = mutable.HashSet.empty[(Path, ImportFinder.Kind)] + private val pending = mutable.ArrayBuffer.empty[Preloader.Pending] + + /** Resolve an import name relative to a base path, using the parent importer. */ + def resolve(docBase: Path, importName: String): Option[Path] = + parentImporter.resolve(docBase, importName) + + /** + * Register a loaded file in the cache and, if it's a Jsonnet code file, parse it to discover its + * imports. + * + * The returned `Either` reports parse errors for code files; binary or string imports never fail + * here. + */ + def add( + path: Path, + content: ResolvedFile, + kind: ImportFinder.Kind = ImportFinder.Kind.Code): Either[Error, Unit] = { + cache.put(path, content) + if (kind.isCode) discover(path, content) else Right(()) + } + + /** All imports queued for loading. */ + def pendingImports: Seq[Preloader.Pending] = pending.toSeq + + /** Atomically take and clear the queue of pending imports. */ + def takePendingImports(): Seq[Preloader.Pending] = { + val out = pending.toVector + pending.clear() + out + } + + /** True when no more imports need to be loaded. */ + def isComplete: Boolean = pending.isEmpty + + /** + * An [[Importer]] that resolves names through the parent importer but reads exclusively from this + * preloader's cache. Pass to an [[Interpreter]] for synchronous evaluation after preload + * completes. + */ + def importer: Importer = new Importer { + def resolve(docBase: Path, importName: String): Option[Path] = + parentImporter.resolve(docBase, importName) + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = cache.get(path) + } + + /** Snapshot of the loaded cache, exposed so callers can inspect or persist it. */ + def loaded: collection.Map[Path, ResolvedFile] = cache + + private def discover(path: Path, content: ResolvedFile): Either[Error, Unit] = { + val parser = new Parser(path, internedStrings, internedFieldSets, settings) + try { + fastparse.parse(content.getParserInput(), parser.document(_)) match { + case f: Parsed.Failure => + val traced = f.trace() + Left(new ParseError(s"$path: ${traced.msg}", offset = traced.index)) + case Parsed.Success((expr, _), _) => + ImportFinder.collect(expr).foreach { found => + parentImporter.resolve(path, found.value) match { + case Some(resolved) => + if (seen.add((resolved, found.kind))) enqueue(resolved, found.kind) + case None => + // resolution failure is deferred until evaluation, where it will surface + // with a proper stack frame + } + } + Right(()) + } + } catch { + case e: ParseError => Left(e) + } + } + + private def enqueue(path: Path, kind: ImportFinder.Kind): Unit = { + if (cache.contains(path)) return + pending += Preloader.Pending(path, kind) + } +} + +object Preloader { + + /** A path that needs to be loaded before evaluation can proceed. */ + final case class Pending(path: Path, kind: ImportFinder.Kind) { + def binaryData: Boolean = kind.binaryData + def isCode: Boolean = kind.isCode + } +} diff --git a/sjsonnet/src/sjsonnet/stdlib/EncodingModule.scala b/sjsonnet/src/sjsonnet/stdlib/EncodingModule.scala index f2375440c..f65456487 100644 --- a/sjsonnet/src/sjsonnet/stdlib/EncodingModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/EncodingModule.scala @@ -5,8 +5,6 @@ import java.nio.charset.StandardCharsets.UTF_8 import sjsonnet._ import sjsonnet.functions.AbstractFunctionModule -import java.nio.charset.StandardCharsets.UTF_8 - /** * Native implementations for Jsonnet standard-library entries in this module. * diff --git a/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala b/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala new file mode 100644 index 000000000..187cdd91c --- /dev/null +++ b/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala @@ -0,0 +1,107 @@ +package sjsonnet + +import utest._ + +import scala.concurrent.Future +import scala.scalajs.concurrent.JSExecutionContext.Implicits.queue +import scala.scalajs.js +import scala.scalajs.js.JSConverters._ + +object InterpretAsyncTests extends TestSuite { + + /** + * Wraps a synchronous file map as a JS Promise-returning loader, so the test exercises the real + * async code path. Records the order of resolved loads. + */ + private def makeAsyncLoader( + files: Map[String, String], + bin: Map[String, Array[Byte]] = Map.empty, + log: scala.collection.mutable.ArrayBuffer[String] = + scala.collection.mutable.ArrayBuffer.empty) + : js.Function2[String, Boolean, js.Promise[Any]] = { + (path: String, binaryData: Boolean) => + Future { + log += path + if (binaryData) { + bin.get(path) match { + case Some(b) => b.toJSArray.asInstanceOf[Any] + case None => throw js.JavaScriptException(s"missing binary file: $path") + } + } else { + files.get(path) match { + case Some(s) => s.asInstanceOf[Any] + case None => throw js.JavaScriptException(s"missing file: $path") + } + } + }.toJSPromise + } + + private def makeResolver(known: Set[String]): js.Function2[String, String, String] = + (_: String, name: String) => if (known.contains(name)) name else null + + private def runAsync( + text: String, + files: Map[String, String] = Map.empty, + bin: Map[String, Array[Byte]] = Map.empty): Future[ujson.Value] = { + val loader = makeAsyncLoader(files, bin) + val resolver = makeResolver(files.keySet ++ bin.keySet) + SjsonnetMain + .interpretAsync( + text, + js.Dictionary[js.Any](), + js.Dictionary[js.Any](), + "/", + resolver, + loader + ) + .toFuture + .map(v => ujson.WebJson.transform(v, ujson.Value)) + } + + def tests: Tests = Tests { + + test("simple async import returns a Promise of the result") { + runAsync( + "(import 'lib.libsonnet').n", + Map("lib.libsonnet" -> "{ n: 42 }") + ).map(v => assert(v == ujson.Num(42))) + } + + test("transitive async imports load and evaluate") { + runAsync( + "(import 'a.libsonnet').value", + Map( + "a.libsonnet" -> "{ value: (import 'b.libsonnet').y + 1 }", + "b.libsonnet" -> "{ y: 10 }" + ) + ).map(v => assert(v == ujson.Num(11))) + } + + test("importstr loads as text without further parsing") { + // The data file would be invalid Jsonnet — it must NOT be parsed. + runAsync( + "importstr 'data.txt'", + Map("data.txt" -> "this is :: not :: jsonnet") + ).map(v => assert(v == ujson.Str("this is :: not :: jsonnet"))) + } + + test("async loader rejection propagates through the returned Promise") { + val resolver = makeResolver(Set("missing.libsonnet")) + val loader: js.Function2[String, Boolean, js.Promise[Any]] = + (path: String, _: Boolean) => js.Promise.reject(s"boom: $path") + val out = SjsonnetMain.interpretAsync( + "import 'missing.libsonnet'", + js.Dictionary[js.Any](), + js.Dictionary[js.Any](), + "/", + resolver, + loader + ) + out.toFuture.transform { + case scala.util.Failure(_) => scala.util.Success(()) + case scala.util.Success(v) => + scala.util.Failure(new RuntimeException(s"expected failure, got $v")) + } + } + } +} diff --git a/sjsonnet/test/src/sjsonnet/PreloaderTests.scala b/sjsonnet/test/src/sjsonnet/PreloaderTests.scala new file mode 100644 index 000000000..0fe694647 --- /dev/null +++ b/sjsonnet/test/src/sjsonnet/PreloaderTests.scala @@ -0,0 +1,127 @@ +package sjsonnet + +import utest._ + +import scala.collection.mutable + +object PreloaderTests extends TestSuite { + + /** A virtual file system used by both the preloader's `resolve` and the test's loading loop. */ + private class FakeFs(files: Map[String, String], binFiles: Map[String, Array[Byte]] = Map.empty) { + val readPaths: mutable.ArrayBuffer[(String, Boolean)] = mutable.ArrayBuffer.empty + + val importer: Importer = new Importer { + def resolve(docBase: Path, importName: String): Option[Path] = { + val candidate = DummyPath(importName) + if (files.contains(importName) || binFiles.contains(importName)) Some(candidate) else None + } + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = + throw new RuntimeException(s"read should not be called during preload: $path") + } + + def load(path: Path, binaryData: Boolean): ResolvedFile = { + val key = path.asInstanceOf[DummyPath].segments.head + readPaths += ((key, binaryData)) + if (binaryData) StaticBinaryResolvedFile(binFiles(key)) + else StaticResolvedFile(files(key)) + } + } + + private def runPreload(fs: FakeFs, entryPath: Path, entry: String): Preloader = { + val preloader = new Preloader(fs.importer) + preloader.add(entryPath, StaticResolvedFile(entry), ImportFinder.Kind.Code) match { + case Left(err) => throw err + case Right(_) => + } + while (!preloader.isComplete) { + val batch = preloader.takePendingImports() + batch.foreach { p => + val content = fs.load(p.path, p.binaryData) + preloader.add(p.path, content, p.kind) match { + case Left(err) => throw err + case Right(_) => + } + } + } + preloader + } + + def tests: Tests = Tests { + + test("discovers transitive imports") { + val fs = new FakeFs( + Map( + "a.libsonnet" -> "import 'b.libsonnet'", + "b.libsonnet" -> "{ x: 1 }" + ) + ) + val entry = "import 'a.libsonnet'" + val preloader = runPreload(fs, DummyPath("entry"), entry) + + val loaded = fs.readPaths.map(_._1).toSet + assert(loaded == Set("a.libsonnet", "b.libsonnet")) + assert(preloader.loaded.size == 3) // entry + a + b + } + + test("dedupes identical imports") { + val fs = new FakeFs( + Map( + "shared.libsonnet" -> "{ y: 2 }" + ) + ) + val entry = "[import 'shared.libsonnet', import 'shared.libsonnet']" + runPreload(fs, DummyPath("entry"), entry) + + assert(fs.readPaths.count(_._1 == "shared.libsonnet") == 1) + } + + test("handles importstr and importbin") { + val fs = new FakeFs( + Map("data.txt" -> "hello"), + binFiles = Map("blob.bin" -> Array[Byte](1, 2, 3)) + ) + val entry = "{ s: importstr 'data.txt', b: importbin 'blob.bin' }" + runPreload(fs, DummyPath("entry"), entry) + + assert(fs.readPaths.toSet == Set(("data.txt", false), ("blob.bin", true))) + } + + test("does not parse importstr/importbin contents for further imports") { + // The string content here would be invalid Jsonnet if parsed; preloader must not parse it. + val fs = new FakeFs(Map("data.txt" -> "this is not jsonnet ::: !@#")) + val entry = "importstr 'data.txt'" + runPreload(fs, DummyPath("entry"), entry) + + assert(fs.readPaths.toSeq == Seq(("data.txt", false))) + } + + test("interpreter evaluates against preloaded cache") { + val fs = new FakeFs( + Map( + "lib.libsonnet" -> "{ greet(name): 'hello, ' + name }" + ) + ) + val entry = "(import 'lib.libsonnet').greet('world')" + val entryPath = DummyPath("entry") + val preloader = runPreload(fs, entryPath, entry) + + val interp = new Interpreter( + Map.empty[String, String], + Map.empty[String, String], + DummyPath(), + preloader.importer, + parseCache = new DefaultParseCache + ) + val result = interp.interpret(entry, entryPath) + assert(result == Right(ujson.Str("hello, world"))) + } + + test("parse error in entry is reported") { + val fs = new FakeFs(Map.empty) + val preloader = new Preloader(fs.importer) + val out = + preloader.add(DummyPath("entry"), StaticResolvedFile("local x ="), ImportFinder.Kind.Code) + assert(out.isLeft) + } + } +} From cc2848f98f3934cc388ef2567269f5b3d137bacb Mon Sep 17 00:00:00 2001 From: Stephen Amar Date: Fri, 1 May 2026 23:40:09 +0000 Subject: [PATCH 2/5] test: drop unused defaults in InterpretAsyncTests for Scala 2.13/2.12 Scala 2.13/2.12 promote "private default argument never used" to an error; the binary-loader and log defaults in the test helpers were unused at every call site. Inline them since no test exercises binary imports through the async path. Co-authored-by: Isaac --- .../src-js/sjsonnet/InterpretAsyncTests.scala | 33 +++++-------------- 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala b/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala index 187cdd91c..a3cc8346b 100644 --- a/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala +++ b/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala @@ -11,27 +11,15 @@ object InterpretAsyncTests extends TestSuite { /** * Wraps a synchronous file map as a JS Promise-returning loader, so the test exercises the real - * async code path. Records the order of resolved loads. + * async code path. */ private def makeAsyncLoader( - files: Map[String, String], - bin: Map[String, Array[Byte]] = Map.empty, - log: scala.collection.mutable.ArrayBuffer[String] = - scala.collection.mutable.ArrayBuffer.empty) - : js.Function2[String, Boolean, js.Promise[Any]] = { - (path: String, binaryData: Boolean) => + files: Map[String, String]): js.Function2[String, Boolean, js.Promise[Any]] = { + (path: String, _: Boolean) => Future { - log += path - if (binaryData) { - bin.get(path) match { - case Some(b) => b.toJSArray.asInstanceOf[Any] - case None => throw js.JavaScriptException(s"missing binary file: $path") - } - } else { - files.get(path) match { - case Some(s) => s.asInstanceOf[Any] - case None => throw js.JavaScriptException(s"missing file: $path") - } + files.get(path) match { + case Some(s) => s.asInstanceOf[Any] + case None => throw js.JavaScriptException(s"missing file: $path") } }.toJSPromise } @@ -39,12 +27,9 @@ object InterpretAsyncTests extends TestSuite { private def makeResolver(known: Set[String]): js.Function2[String, String, String] = (_: String, name: String) => if (known.contains(name)) name else null - private def runAsync( - text: String, - files: Map[String, String] = Map.empty, - bin: Map[String, Array[Byte]] = Map.empty): Future[ujson.Value] = { - val loader = makeAsyncLoader(files, bin) - val resolver = makeResolver(files.keySet ++ bin.keySet) + private def runAsync(text: String, files: Map[String, String]): Future[ujson.Value] = { + val loader = makeAsyncLoader(files) + val resolver = makeResolver(files.keySet) SjsonnetMain .interpretAsync( text, From 996136eab42dcc5d8cbe725c38a345d1bd2c168b Mon Sep 17 00:00:00 2001 From: Stephen Amar Date: Thu, 7 May 2026 17:51:38 +0000 Subject: [PATCH 3/5] fix: address He-Pin's review on async preload semantics Three correctness issues with interpretAsync: 1. Preloader now passes the importing file's parent directory as docBase to the resolver, matching Importer.resolveAndReadOrFail. Before, a JS resolver doing `wd + "/" + importName` would look up nested imports under `dir/a.libsonnet/b.libsonnet` instead of `dir/b.libsonnet`. 2. interpretAsync now feeds extVar/tlaVar snippets through the preloader so any imports they contain land in the cache. Without this, `std.extVar('cfg')` where `cfg` is `import 'lib.libsonnet'` failed at evaluation time against the cache-only importer. 3. Parse errors on discovered (non-entry) files no longer abort preload. Jsonnet evaluation is lazy, so a parse error in `if false then import 'bad' else 1` should not fail evaluation; the interpreter surfaces the error only if the branch is forced. Matches jrsonnet's preloader behavior. Co-authored-by: Isaac --- sjsonnet/src-js/sjsonnet/SjsonnetMain.scala | 23 ++++++-- sjsonnet/src/sjsonnet/Preloader.scala | 6 +- .../src-js/sjsonnet/InterpretAsyncTests.scala | 31 ++++++++++ .../test/src/sjsonnet/PreloaderTests.scala | 56 +++++++++++++++++++ 4 files changed, 111 insertions(+), 5 deletions(-) diff --git a/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala b/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala index 23d7e5c7f..a97dcf5d1 100644 --- a/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala +++ b/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala @@ -187,6 +187,7 @@ object SjsonnetMain { val parentImporter = jsResolveImporter(importResolver) val preloader = new Preloader(parentImporter) + val wd = JsVirtualPath(wd0) val entryPath = JsVirtualPath("(memory)") preloader.add(entryPath, StaticResolvedFile(text), ImportFinder.Kind.Code) match { @@ -194,16 +195,30 @@ object SjsonnetMain { case Right(_) => } + // ext/tla vars are parsed as Jsonnet code (Interpreter.parseVar) and may contain imports. + // Feed each value through the preloader using the same synthetic path layout so that + // discovered imports resolve against `wd`, matching the synchronous evaluator. + def discoverVarImports(prefix: String, vars: Map[String, String]): Unit = + vars.foreach { case (k, v) => + val varPath = wd / Util.wrapInLessThanGreaterThan(s"$prefix-var $k") + // Ignore parse errors here: Interpreter.parseVar will surface them at evaluation time + // with a proper stack frame if the variable is actually referenced. + preloader.add(varPath, StaticResolvedFile(v), ImportFinder.Kind.Code) + } + discoverVarImports("ext", parsedExtVars) + discoverVarImports("tla", parsedTlaVars) + def loadOne(p: Preloader.Pending): Future[Unit] = { val pathStr = p.path.asInstanceOf[JsVirtualPath].path val promise = importLoader(pathStr, p.binaryData) // implicit Thenable.Implicits converts Promise[Any] to Future[Any] (promise: Future[Any]).map { value => val resolved = toResolvedFile(pathStr, value, p.binaryData) - preloader.add(p.path, resolved, p.kind) match { - case Left(err) => throw js.JavaScriptException(err.getMessage) - case Right(_) => () - } + // Ignore parse errors on discovered imports: Jsonnet evaluation is lazy, so a parse + // error in `if false then import 'bad' else 1` should not fail the whole evaluation. + // If the branch is forced at runtime, the interpreter surfaces the error there. + preloader.add(p.path, resolved, p.kind) + () } } diff --git a/sjsonnet/src/sjsonnet/Preloader.scala b/sjsonnet/src/sjsonnet/Preloader.scala index b1d72ab8e..b6c0f9e61 100644 --- a/sjsonnet/src/sjsonnet/Preloader.scala +++ b/sjsonnet/src/sjsonnet/Preloader.scala @@ -100,8 +100,12 @@ class Preloader(parentImporter: Importer, settings: Settings = Settings.default) val traced = f.trace() Left(new ParseError(s"$path: ${traced.msg}", offset = traced.index)) case Parsed.Success((expr, _), _) => + // Match the synchronous evaluator's docBase: resolve relative to the importing file's + // parent directory, not the file path itself. See Importer.resolveAndReadOrFail, which + // calls resolve(pos.fileScope.currentFile.parent(), ...). + val docBase = path.parent() ImportFinder.collect(expr).foreach { found => - parentImporter.resolve(path, found.value) match { + parentImporter.resolve(docBase, found.value) match { case Some(resolved) => if (seen.add((resolved, found.kind))) enqueue(resolved, found.kind) case None => diff --git a/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala b/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala index a3cc8346b..5fa69d09f 100644 --- a/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala +++ b/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala @@ -70,6 +70,37 @@ object InterpretAsyncTests extends TestSuite { ).map(v => assert(v == ujson.Str("this is :: not :: jsonnet"))) } + test("preloads imports referenced from extVars") { + // extVar value is a Jsonnet snippet that imports a file. interpretAsync must walk + // ext/tla var snippets too; otherwise the cache-only importer hits a miss at eval time. + val files = Map("lib.libsonnet" -> "{ n: 5 }") + val loader = makeAsyncLoader(files) + val resolver = makeResolver(files.keySet) + val extVars = js.Dictionary[js.Any]("cfg" -> "(import 'lib.libsonnet').n") + SjsonnetMain + .interpretAsync( + "std.extVar('cfg') + 1", + extVars, + js.Dictionary[js.Any](), + "/", + resolver, + loader + ) + .toFuture + .map(v => ujson.WebJson.transform(v, ujson.Value)) + .map(v => assert(v == ujson.Num(6))) + } + + test("parse error in unforced branch does not fail evaluation") { + // Lazy semantics: `if false then import 'bad' else 1` should evaluate to 1, even though + // bad.libsonnet has a parse error. The preloader still loads the file (jsonnet imports + // are statically discoverable), but a parse failure on a discovered file must not abort. + runAsync( + "if false then import 'bad.libsonnet' else 1", + Map("bad.libsonnet" -> "this is :: not :: jsonnet") + ).map(v => assert(v == ujson.Num(1))) + } + test("async loader rejection propagates through the returned Promise") { val resolver = makeResolver(Set("missing.libsonnet")) val loader: js.Function2[String, Boolean, js.Promise[Any]] = diff --git a/sjsonnet/test/src/sjsonnet/PreloaderTests.scala b/sjsonnet/test/src/sjsonnet/PreloaderTests.scala index 0fe694647..167d810e6 100644 --- a/sjsonnet/test/src/sjsonnet/PreloaderTests.scala +++ b/sjsonnet/test/src/sjsonnet/PreloaderTests.scala @@ -116,6 +116,62 @@ object PreloaderTests extends TestSuite { assert(result == Right(ujson.Str("hello, world"))) } + test("resolves imports relative to the importing file's parent directory") { + // Resolver records what docBase it was called with, and only resolves names against the + // expected `dir/` parent — proving the preloader passes parent(), not the file path itself. + val seenDocBases = mutable.ArrayBuffer.empty[String] + val files = Map("dir/a.libsonnet" -> "import 'b.libsonnet'", "dir/b.libsonnet" -> "{ ok: true }") + val importer = new Importer { + def resolve(docBase: Path, importName: String): Option[Path] = { + seenDocBases += docBase.asInstanceOf[DummyPath].segments.mkString("/") + val joined = docBase.asInstanceOf[DummyPath].segments.mkString("/") match { + case "" => importName + case base => s"$base/$importName" + } + if (files.contains(joined)) Some(DummyPath(joined.split('/').toIndexedSeq: _*)) else None + } + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = + throw new RuntimeException(s"unexpected read: $path") + } + val preloader = new Preloader(importer) + val entryPath = DummyPath("dir", "a.libsonnet") + preloader.add(entryPath, StaticResolvedFile(files("dir/a.libsonnet")), ImportFinder.Kind.Code) + while (!preloader.isComplete) { + val batch = preloader.takePendingImports() + batch.foreach { p => + val key = p.path.asInstanceOf[DummyPath].segments.mkString("/") + preloader.add(p.path, StaticResolvedFile(files(key)), p.kind) + } + } + // Every docBase observed should be the parent dir, never the file path itself. + assert(seenDocBases.forall(_ == "dir")) + assert(preloader.loaded.contains(DummyPath("dir", "b.libsonnet"))) + } + + test("does not fail preload on parse errors in discovered files") { + // A parse error in a discovered file (e.g. behind `if false then import 'bad'`) should + // not abort preload — Jsonnet evaluation is lazy, the error should only surface if the + // branch is actually forced. + val fs = new FakeFs(Map("bad.libsonnet" -> "this is :: not :: jsonnet")) + val preloader = new Preloader(fs.importer) + preloader.add( + DummyPath("entry"), + StaticResolvedFile("if false then import 'bad.libsonnet' else 1"), + ImportFinder.Kind.Code + ) + while (!preloader.isComplete) { + val batch = preloader.takePendingImports() + batch.foreach { p => + val content = fs.load(p.path, p.binaryData) + // Parse failure here returns Left, but we deliberately ignore it. + preloader.add(p.path, content, p.kind) + } + } + // bad.libsonnet was loaded but its parse error did not fail preload. + assert(fs.readPaths.map(_._1).contains("bad.libsonnet")) + assert(preloader.loaded.contains(DummyPath("bad.libsonnet"))) + } + test("parse error in entry is reported") { val fs = new FakeFs(Map.empty) val preloader = new Preloader(fs.importer) From fff77b3cac8b47ee76dd936f87d2ce552393f563 Mon Sep 17 00:00:00 2001 From: Stephen Amar Date: Thu, 7 May 2026 18:02:45 +0000 Subject: [PATCH 4/5] perf: skip second fastparse on preloaded files; document eager front-loading Adds an optional preParsedAst hook on ResolvedFile and a PreParsedResolvedFile wrapper. CachedResolver.parse uses the attached AST when present, skipping fastparse and going straight to the static optimizer. Preloader now stashes the AST it produced during discovery on the cache entry, so each file is fastparse'd once total instead of twice (once for discovery, once for evaluation). Also expands the interpretAsync and Preloader docstrings to make the eager front-loading semantics explicit: every reachable import is loaded up front, including ones inside branches the evaluator will never force; this is the tradeoff that makes synchronous evaluation possible. Loader rejection still propagates; parse errors on discovered (non-entry) files are tolerated. Co-authored-by: Isaac --- sjsonnet/src-js/sjsonnet/SjsonnetMain.scala | 16 +++++- sjsonnet/src/sjsonnet/Importer.scala | 56 +++++++++++++------ sjsonnet/src/sjsonnet/Preloader.scala | 16 +++++- .../test/src/sjsonnet/PreloaderTests.scala | 52 +++++++++++++++++ 4 files changed, 118 insertions(+), 22 deletions(-) diff --git a/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala b/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala index a97dcf5d1..82fe632cf 100644 --- a/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala +++ b/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala @@ -168,9 +168,19 @@ object SjsonnetMain { * Async variant of [[interpret]]. Accepts an `importLoader` that returns a `Promise` of the file * contents, and returns a `Promise` resolving to the rendered output. * - * Implementation: imports are statically discovered by parsing each file's AST, loaded - * concurrently via the user-supplied async loader, and inserted into a cache. Once the transitive - * closure is loaded, evaluation runs synchronously against the cache. + * Imports are eagerly front-loaded: every `import`, `importstr`, and `importbin` reachable from + * the entry source (plus from any extVar/tlaVar code snippets) is statically discovered and + * loaded before evaluation begins. This includes imports inside branches the evaluator will never + * force, e.g. `if false then import 'x' else 1` will still ask the loader for `x`. The tradeoff + * is that all I/O happens up front, which is what lets evaluation run synchronously. + * + * - Loader rejection (missing file, network error, etc.) fails the returned Promise. + * - A parse error on a discovered (non-entry) file is tolerated; it only surfaces if evaluation + * actually forces that branch. + * - The entry source's own parse error fails immediately. + * + * Each discovered file is parsed once during preload and again referenced by the evaluator; the + * parsed AST is shared so fastparse runs only once per file. */ @JSExport def interpretAsync( diff --git a/sjsonnet/src/sjsonnet/Importer.scala b/sjsonnet/src/sjsonnet/Importer.scala index debd3b640..3d1e4df19 100644 --- a/sjsonnet/src/sjsonnet/Importer.scala +++ b/sjsonnet/src/sjsonnet/Importer.scala @@ -155,6 +155,23 @@ trait ResolvedFile { // Used by importbin def readRawBytes(): Array[Byte] + + /** + * Optional pre-parsed AST. When defined, [[CachedResolver.parse]] uses this instead of running + * fastparse again. Set by [[Preloader]] to avoid parsing each file twice (once during async + * import discovery, once during evaluation). + */ + def preParsedAst: Option[(Expr, FileScope)] = None +} + +/** Wraps another [[ResolvedFile]] with an attached pre-parsed AST so the parser can be skipped. */ +final case class PreParsedResolvedFile(underlying: ResolvedFile, expr: Expr, fileScope: FileScope) + extends ResolvedFile { + def getParserInput(): ParserInput = underlying.getParserInput() + def readString(): String = underlying.readString() + def contentHash(): String = underlying.contentHash() + def readRawBytes(): Array[Byte] = underlying.readRawBytes() + override val preParsedAst: Option[(Expr, FileScope)] = Some((expr, fileScope)) } final case class StaticResolvedFile(content: String) extends ResolvedFile { @@ -209,25 +226,28 @@ class CachedResolver( ev: EvalErrorScope): Either[Error, (Expr, FileScope)] = { parseCache.getOrElseUpdate( (path, content.contentHash()), { - val parsed = - try { - fastparse.parse( - content.getParserInput(), - parser(path).document(_) - ) match { - case f @ Parsed.Failure(_, _, _) => - val traced = f.trace() - val pos = new Position(new FileScope(path), traced.index) - Left(new ParseError(traced.msg).addFrame(pos)) - case Parsed.Success(r, _) => Right(r) + val parsed: Either[Error, (Expr, FileScope)] = content.preParsedAst match { + case Some(pre) => Right(pre) + case None => + try { + fastparse.parse( + content.getParserInput(), + parser(path).document(_) + ) match { + case f @ Parsed.Failure(_, _, _) => + val traced = f.trace() + val pos = new Position(new FileScope(path), traced.index) + Left(new ParseError(traced.msg).addFrame(pos)) + case Parsed.Success(r, _) => Right(r) + } + } catch { + case e: ParseError if e.offset >= 0 => + val pos = new Position(new FileScope(path), e.offset) + Left(new ParseError(e.getMessage).addFrame(pos)) + case e: ParseError => + Left(e) } - } catch { - case e: ParseError if e.offset >= 0 => - val pos = new Position(new FileScope(path), e.offset) - Left(new ParseError(e.getMessage).addFrame(pos)) - case e: ParseError => - Left(e) - } + } parsed.flatMap { case (e, fs) => process(e, fs) } } ) diff --git a/sjsonnet/src/sjsonnet/Preloader.scala b/sjsonnet/src/sjsonnet/Preloader.scala index b6c0f9e61..6ac416ea3 100644 --- a/sjsonnet/src/sjsonnet/Preloader.scala +++ b/sjsonnet/src/sjsonnet/Preloader.scala @@ -13,6 +13,17 @@ import scala.collection.mutable * them, parse the loaded code files for further imports, and repeat until the closure is known. * Once all files are in the cache, normal synchronous evaluation can run. * + * Eager front-loading: every reachable import is loaded up front, including ones inside branches + * the evaluator will never force (`if false then import 'x' else 1`). This trades laziness for the + * ability to do all I/O before evaluation. Parse errors on discovered (non-entry) files are + * tolerated for the same reason — they only surface at evaluation time if the branch is actually + * forced. Loader failures (a rejected Promise, missing file, etc.) are real I/O problems and + * propagate up to the caller. + * + * The parsed AST of each loaded code file is attached to its cache entry as a + * [[PreParsedResolvedFile]] so the Interpreter does not re-run fastparse during evaluation; the + * static optimizer still runs once on cache hit. + * * Usage (pseudo-code): * {{{ * val preloader = new Preloader(parentImporter) @@ -99,7 +110,10 @@ class Preloader(parentImporter: Importer, settings: Settings = Settings.default) case f: Parsed.Failure => val traced = f.trace() Left(new ParseError(s"$path: ${traced.msg}", offset = traced.index)) - case Parsed.Success((expr, _), _) => + case Parsed.Success((expr, fs), _) => + // Stash the parsed AST on the cache entry so the Interpreter doesn't re-run fastparse. + // The optimizer still runs once at evaluation time on cache hit. + cache.put(path, PreParsedResolvedFile(content, expr, fs)) // Match the synchronous evaluator's docBase: resolve relative to the importing file's // parent directory, not the file path itself. See Importer.resolveAndReadOrFail, which // calls resolve(pos.fileScope.currentFile.parent(), ...). diff --git a/sjsonnet/test/src/sjsonnet/PreloaderTests.scala b/sjsonnet/test/src/sjsonnet/PreloaderTests.scala index 167d810e6..41d72d618 100644 --- a/sjsonnet/test/src/sjsonnet/PreloaderTests.scala +++ b/sjsonnet/test/src/sjsonnet/PreloaderTests.scala @@ -116,6 +116,58 @@ object PreloaderTests extends TestSuite { assert(result == Right(ujson.Str("hello, world"))) } + test("preloaded files carry pre-parsed AST so fastparse runs once") { + // Wrap each ResolvedFile with a counter so we can detect re-parsing. The Preloader parses + // once during discover; the Interpreter should consume the attached AST without re-parsing. + val parseCount = mutable.HashMap.empty[Path, Int].withDefaultValue(0) + class CountingResolvedFile(content: String, path: Path) extends ResolvedFile { + def getParserInput(): fastparse.ParserInput = { + parseCount(path) = parseCount(path) + 1 + fastparse.IndexedParserInput(content) + } + def readString(): String = content + def contentHash(): String = content + def readRawBytes(): Array[Byte] = + content.getBytes(java.nio.charset.StandardCharsets.UTF_8) + } + val files = Map( + "lib.libsonnet" -> "{ x: 1 }", + "entry" -> "(import 'lib.libsonnet').x" + ) + val importer = new Importer { + def resolve(docBase: Path, importName: String): Option[Path] = + if (files.contains(importName)) Some(DummyPath(importName)) else None + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = + throw new RuntimeException(s"unexpected read: $path") + } + val preloader = new Preloader(importer) + val entryPath = DummyPath("entry") + preloader.add(entryPath, new CountingResolvedFile(files("entry"), entryPath)) + while (!preloader.isComplete) { + val batch = preloader.takePendingImports() + batch.foreach { p => + val key = p.path.asInstanceOf[DummyPath].segments.head + preloader.add(p.path, new CountingResolvedFile(files(key), p.path), p.kind) + } + } + // Both files have been parsed exactly once — the Preloader's parse pass. + assert(parseCount(DummyPath("lib.libsonnet")) == 1) + assert(parseCount(entryPath) == 1) + + // Run a full interpret. The Interpreter must not re-parse; getParserInput would bump the + // counter again if it did. + val interp = new Interpreter( + Map.empty[String, String], + Map.empty[String, String], + DummyPath(), + preloader.importer, + parseCache = new DefaultParseCache + ) + val result = interp.interpret(files("entry"), entryPath) + assert(result == Right(ujson.Num(1))) + assert(parseCount(DummyPath("lib.libsonnet")) == 1) + } + test("resolves imports relative to the importing file's parent directory") { // Resolver records what docBase it was called with, and only resolves names against the // expected `dir/` parent — proving the preloader passes parent(), not the file path itself. From b3e59a055e73302e8fc1f33bf8e8c4785220e636 Mon Sep 17 00:00:00 2001 From: Stephen Amar Date: Thu, 7 May 2026 18:48:34 +0000 Subject: [PATCH 5/5] fix: address He-Pin's review on async preload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four issues from the review pass: 1. parseStringMap allocations: stop round-tripping through ujson — iterate the JS dictionary directly into a Map.newBuilder. Drops the intermediate ujson tree, the .obj.toMap copy, and the trailing .map. 2. Naming: rename ImportFinder.Kind to a top-level ImportKind, matching the existing ExternalVariableKind convention in the codebase. 3. Cache key collision for importstr/importbin of the same path. The cache was keyed by Path only, so a program like `if true then importstr "x" else importbin "x"` had whichever load ran last clobber the other; sync interpret returns text, async was rejecting with NotImplementedError. Cache is now keyed by (Path, binaryData), pending dedup tracks the same key, and a new record() helper upgrades a pending Str entry to Code if a later reference needs the AST. Added putContent that refuses to downgrade a PreParsedResolvedFile back to plain text. 4. Entry parse error formatting: interpretAsync was rejecting with err.getMessage, bypassing Error.formatError used by interpret0. Now we let the entry's parse error fall through to runInterpret so the message has the same shape (root frame, "(memory):line:col") as synchronous interpret. New tests: - PreloaderTests.scala: importstr+importbin for the same path keep separate cache entries. - InterpretAsyncTests.scala: same case end-to-end through the JS API, and a test that the entry parse error message contains "(memory)". Co-authored-by: Isaac --- sjsonnet/src-js/sjsonnet/SjsonnetMain.scala | 35 +++++--- sjsonnet/src/sjsonnet/ImportFinder.scala | 59 +++++++------ sjsonnet/src/sjsonnet/Preloader.scala | 82 +++++++++++++++---- .../src-js/sjsonnet/InterpretAsyncTests.scala | 55 +++++++++++++ .../test/src/sjsonnet/PreloaderTests.scala | 30 +++++-- 5 files changed, 196 insertions(+), 65 deletions(-) diff --git a/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala b/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala index 82fe632cf..b8fbf5322 100644 --- a/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala +++ b/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala @@ -85,13 +85,26 @@ object SjsonnetMain { ) } + /** + * Coerce a JS object whose values are strings into a `Map[String, String]`. Iterates the JS + * dictionary directly instead of round-tripping through `ujson` to avoid the intermediate ujson + * tree, the `.obj.toMap` copy, and the trailing `.map` on the immutable map. + */ private def parseStringMap(label: String, value: js.Any): Map[String, String] = try { - ujson.WebJson.transform(value, ujson.Value).obj.toMap.map { - case (k, ujson.Str(v)) => (k, v) - case (k, _) => - throw js.JavaScriptException(s"$label '$k' must be a string value, got non-string") + val dict = value.asInstanceOf[js.Dictionary[js.Any]] + val out = Map.newBuilder[String, String] + out.sizeHint(dict.size) + val it = dict.iterator + while (it.hasNext) { + val (k, v) = it.next() + (v: Any) match { + case s: String => out += k -> s + case _ => + throw js.JavaScriptException(s"$label '$k' must be a string value, got non-string") + } } + out.result() } catch { case e: js.JavaScriptException => throw e case e: Exception => @@ -177,7 +190,8 @@ object SjsonnetMain { * - Loader rejection (missing file, network error, etc.) fails the returned Promise. * - A parse error on a discovered (non-entry) file is tolerated; it only surfaces if evaluation * actually forces that branch. - * - The entry source's own parse error fails immediately. + * - The entry source's own parse error is reported through the normal `interpret0` formatting + * path so the error shape and location info match synchronous `interpret`. * * Each discovered file is parsed once during preload and again referenced by the evaluator; the * parsed AST is shared so fastparse runs only once per file. @@ -200,10 +214,11 @@ object SjsonnetMain { val wd = JsVirtualPath(wd0) val entryPath = JsVirtualPath("(memory)") - preloader.add(entryPath, StaticResolvedFile(text), ImportFinder.Kind.Code) match { - case Left(err) => return js.Promise.reject(err.getMessage) - case Right(_) => - } + // Don't propagate the entry's parse error here — let runInterpret surface it via + // interpret0 so the message goes through the same Error.formatError path as synchronous + // interpret (root frame, "(memory):line:col", etc.). If parsing the entry fails we still + // get an empty pending queue and a fast path to runInterpret, which fails identically. + preloader.add(entryPath, StaticResolvedFile(text), ImportKind.Code) // ext/tla vars are parsed as Jsonnet code (Interpreter.parseVar) and may contain imports. // Feed each value through the preloader using the same synthetic path layout so that @@ -213,7 +228,7 @@ object SjsonnetMain { val varPath = wd / Util.wrapInLessThanGreaterThan(s"$prefix-var $k") // Ignore parse errors here: Interpreter.parseVar will surface them at evaluation time // with a proper stack frame if the variable is actually referenced. - preloader.add(varPath, StaticResolvedFile(v), ImportFinder.Kind.Code) + preloader.add(varPath, StaticResolvedFile(v), ImportKind.Code) } discoverVarImports("ext", parsedExtVars) discoverVarImports("tla", parsedTlaVars) diff --git a/sjsonnet/src/sjsonnet/ImportFinder.scala b/sjsonnet/src/sjsonnet/ImportFinder.scala index 501ad9489..93adaf626 100644 --- a/sjsonnet/src/sjsonnet/ImportFinder.scala +++ b/sjsonnet/src/sjsonnet/ImportFinder.scala @@ -2,39 +2,38 @@ package sjsonnet import scala.collection.mutable -/** - * Walks an [[Expr]] AST collecting all `import`, `importstr`, and `importbin` expressions. Used by - * [[Preloader]] to discover the transitive set of files that need to be loaded before evaluation. - */ -object ImportFinder { +/** The kind of import expression that referenced a file. */ +sealed trait ImportKind { - sealed trait Kind { + /** Whether the file should be read as raw bytes (`importbin`) vs. text (`import`/`importstr`). */ + def binaryData: Boolean - /** - * Whether the file should be read as raw bytes (`importbin`) vs. text (`import`/`importstr`). - */ - def binaryData: Boolean + /** Whether the loaded file is itself Jsonnet code that may contain further imports. */ + def isCode: Boolean +} - /** Whether the loaded file is itself Jsonnet code that may contain further imports. */ - def isCode: Boolean +object ImportKind { + case object Code extends ImportKind { + def binaryData: Boolean = false + def isCode: Boolean = true } - - object Kind { - case object Code extends Kind { - def binaryData: Boolean = false - def isCode: Boolean = true - } - case object Str extends Kind { - def binaryData: Boolean = false - def isCode: Boolean = false - } - case object Bin extends Kind { - def binaryData: Boolean = true - def isCode: Boolean = false - } + case object Str extends ImportKind { + def binaryData: Boolean = false + def isCode: Boolean = false + } + case object Bin extends ImportKind { + def binaryData: Boolean = true + def isCode: Boolean = false } +} + +/** + * Walks an [[Expr]] AST collecting all `import`, `importstr`, and `importbin` expressions. Used by + * [[Preloader]] to discover the transitive set of files that need to be loaded before evaluation. + */ +object ImportFinder { - final case class Found(value: String, kind: Kind) + final case class Found(value: String, kind: ImportKind) def collect(expr: Expr): Seq[Found] = { val buf = mutable.ArrayBuffer.empty[Found] @@ -46,9 +45,9 @@ object ImportFinder { private class Walker(buf: mutable.ArrayBuffer[Found]) extends ExprTransform { override def transform(expr: Expr): Expr = { expr match { - case Expr.Import(_, v) => buf += Found(v, Kind.Code) - case Expr.ImportStr(_, v) => buf += Found(v, Kind.Str) - case Expr.ImportBin(_, v) => buf += Found(v, Kind.Bin) + case Expr.Import(_, v) => buf += Found(v, ImportKind.Code) + case Expr.ImportStr(_, v) => buf += Found(v, ImportKind.Str) + case Expr.ImportBin(_, v) => buf += Found(v, ImportKind.Bin) case _ => } rec(expr) diff --git a/sjsonnet/src/sjsonnet/Preloader.scala b/sjsonnet/src/sjsonnet/Preloader.scala index 6ac416ea3..e2e2291a5 100644 --- a/sjsonnet/src/sjsonnet/Preloader.scala +++ b/sjsonnet/src/sjsonnet/Preloader.scala @@ -24,10 +24,15 @@ import scala.collection.mutable * [[PreParsedResolvedFile]] so the Interpreter does not re-run fastparse during evaluation; the * static optimizer still runs once on cache hit. * + * Cache keying: entries are keyed by `(Path, binaryData)` so that the same path referenced as both + * `importstr` (text) and `importbin` (bytes) keeps two distinct cache entries — matching the + * `Importer.read(path, binaryData)` contract. Without this, an `importstr "x" + importbin "x"` + * program would overwrite one entry and hand the wrong content to the evaluator. + * * Usage (pseudo-code): * {{{ * val preloader = new Preloader(parentImporter) - * preloader.add(entryPath, StaticResolvedFile(entryText), Preloader.EntryKind) + * preloader.add(entryPath, StaticResolvedFile(entryText), ImportKind.Code) * while (preloader.pendingImports.nonEmpty) { * val batch = preloader.takePendingImports() * for (p <- batch) { @@ -53,8 +58,13 @@ class Preloader(parentImporter: Importer, settings: Settings = Settings.default) java.lang.Boolean ]] - private val cache = mutable.LinkedHashMap.empty[Path, ResolvedFile] - private val seen = mutable.HashSet.empty[(Path, ImportFinder.Kind)] + // Keyed by (path, binaryData) so importstr and importbin for the same path don't collide. + private val cache = mutable.LinkedHashMap.empty[(Path, Boolean), ResolvedFile] + + // Tracks the strongest kind enqueued/loaded for each (path, binaryData). Used both to dedupe + // loader calls and to upgrade a previously-Str pending entry to Code if a Code reference shows + // up later. + private val seen = mutable.HashMap.empty[(Path, Boolean), ImportKind] private val pending = mutable.ArrayBuffer.empty[Preloader.Pending] /** Resolve an import name relative to a base path, using the parent importer. */ @@ -71,8 +81,8 @@ class Preloader(parentImporter: Importer, settings: Settings = Settings.default) def add( path: Path, content: ResolvedFile, - kind: ImportFinder.Kind = ImportFinder.Kind.Code): Either[Error, Unit] = { - cache.put(path, content) + kind: ImportKind = ImportKind.Code): Either[Error, Unit] = { + putContent(path, kind.binaryData, content) if (kind.isCode) discover(path, content) else Right(()) } @@ -97,11 +107,27 @@ class Preloader(parentImporter: Importer, settings: Settings = Settings.default) def importer: Importer = new Importer { def resolve(docBase: Path, importName: String): Option[Path] = parentImporter.resolve(docBase, importName) - def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = cache.get(path) + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = + cache.get((path, binaryData)) } /** Snapshot of the loaded cache, exposed so callers can inspect or persist it. */ - def loaded: collection.Map[Path, ResolvedFile] = cache + def loaded: collection.Map[(Path, Boolean), ResolvedFile] = cache + + /** + * Insert content into the cache without clobbering a richer (pre-parsed) entry. discover() puts a + * [[PreParsedResolvedFile]]; a later add() of the same physical content (e.g. via a separate + * `importstr` reference to the same path) must not downgrade it back to plain text. + */ + private def putContent(path: Path, binaryData: Boolean, content: ResolvedFile): Unit = { + val key = (path, binaryData) + cache.get(key) match { + case Some(existing) if existing.preParsedAst.isDefined && content.preParsedAst.isEmpty => + // keep the pre-parsed version + case _ => + cache.put(key, content) + } + } private def discover(path: Path, content: ResolvedFile): Either[Error, Unit] = { val parser = new Parser(path, internedStrings, internedFieldSets, settings) @@ -113,18 +139,14 @@ class Preloader(parentImporter: Importer, settings: Settings = Settings.default) case Parsed.Success((expr, fs), _) => // Stash the parsed AST on the cache entry so the Interpreter doesn't re-run fastparse. // The optimizer still runs once at evaluation time on cache hit. - cache.put(path, PreParsedResolvedFile(content, expr, fs)) + cache.put((path, false), PreParsedResolvedFile(content, expr, fs)) // Match the synchronous evaluator's docBase: resolve relative to the importing file's // parent directory, not the file path itself. See Importer.resolveAndReadOrFail, which // calls resolve(pos.fileScope.currentFile.parent(), ...). val docBase = path.parent() ImportFinder.collect(expr).foreach { found => - parentImporter.resolve(docBase, found.value) match { - case Some(resolved) => - if (seen.add((resolved, found.kind))) enqueue(resolved, found.kind) - case None => - // resolution failure is deferred until evaluation, where it will surface - // with a proper stack frame + parentImporter.resolve(docBase, found.value).foreach { resolved => + record(resolved, found.kind) } } Right(()) @@ -134,16 +156,40 @@ class Preloader(parentImporter: Importer, settings: Settings = Settings.default) } } - private def enqueue(path: Path, kind: ImportFinder.Kind): Unit = { - if (cache.contains(path)) return - pending += Preloader.Pending(path, kind) + /** + * Record that `path` was referenced with `kind`. Enqueues the load if new, upgrades a pending + * Str→Code if applicable, and lazily parses an already-loaded Str entry that needs Code analysis. + */ + private def record(path: Path, kind: ImportKind): Unit = { + val key = (path, kind.binaryData) + cache.get(key) match { + case Some(content) => + // Already loaded. If we now need Code analysis (e.g. an earlier `importstr` reference + // loaded the file as plain text and we just hit an `import` of the same path), parse it + // now and walk for sub-imports. + if (kind.isCode && content.preParsedAst.isEmpty) { + val _ = discover(path, content) + } + case None => + seen.get(key) match { + case None => + seen(key) = kind + pending += Preloader.Pending(path, kind) + case Some(existing) if !existing.isCode && kind.isCode => + seen(key) = kind + val idx = + pending.indexWhere(p => p.path == path && p.kind.binaryData == kind.binaryData) + if (idx >= 0) pending(idx) = Preloader.Pending(path, kind) + case _ => // already pending with equal-or-stronger kind + } + } } } object Preloader { /** A path that needs to be loaded before evaluation can proceed. */ - final case class Pending(path: Path, kind: ImportFinder.Kind) { + final case class Pending(path: Path, kind: ImportKind) { def binaryData: Boolean = kind.binaryData def isCode: Boolean = kind.isCode } diff --git a/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala b/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala index 5fa69d09f..fb2443504 100644 --- a/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala +++ b/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala @@ -101,6 +101,61 @@ object InterpretAsyncTests extends TestSuite { ).map(v => assert(v == ujson.Num(1))) } + test("importstr and importbin for the same path return distinct values") { + // He-Pin's reproducer: `if true then importstr "x" else importbin "x"`. The cache must + // keep separate entries for text and bytes; otherwise async returns a binary file to + // importstr (rejecting with NotImplementedError) or vice versa. + val text = "the-text" + val bytes = Array[Byte](1, 2, 3) + val resolver = makeResolver(Set("same")) + val loader: js.Function2[String, Boolean, js.Promise[Any]] = + (_: String, binaryData: Boolean) => + Future { + if (binaryData) bytes.toJSArray.asInstanceOf[Any] + else text.asInstanceOf[Any] + }.toJSPromise + SjsonnetMain + .interpretAsync( + "if true then importstr 'same' else importbin 'same'", + js.Dictionary[js.Any](), + js.Dictionary[js.Any](), + "/", + resolver, + loader + ) + .toFuture + .map(v => ujson.WebJson.transform(v, ujson.Value)) + .map(v => assert(v == ujson.Str(text))) + } + + test("entry parse error matches synchronous interpret's error formatting") { + // The async path must route entry parse errors through interpret0 so the message shape + // and root frame match the synchronous interpret. Verify the error includes the + // "(memory)" location marker that interpret0 produces. + val resolver = makeResolver(Set.empty) + val loader: js.Function2[String, Boolean, js.Promise[Any]] = + (path: String, _: Boolean) => js.Promise.reject(s"unexpected load: $path") + val out = SjsonnetMain.interpretAsync( + "local x =", // syntactically invalid + js.Dictionary[js.Any](), + js.Dictionary[js.Any](), + "/", + resolver, + loader + ) + out.toFuture.transform { + case scala.util.Success(v) => + scala.util.Failure(new RuntimeException(s"expected parse failure, got $v")) + case scala.util.Failure(e) => + val msg = e.getMessage + if (msg != null && msg.contains("(memory)")) scala.util.Success(()) + else + scala.util.Failure( + new RuntimeException(s"expected formatted parse error mentioning (memory), got: $msg") + ) + } + } + test("async loader rejection propagates through the returned Promise") { val resolver = makeResolver(Set("missing.libsonnet")) val loader: js.Function2[String, Boolean, js.Promise[Any]] = diff --git a/sjsonnet/test/src/sjsonnet/PreloaderTests.scala b/sjsonnet/test/src/sjsonnet/PreloaderTests.scala index 41d72d618..209bce6b4 100644 --- a/sjsonnet/test/src/sjsonnet/PreloaderTests.scala +++ b/sjsonnet/test/src/sjsonnet/PreloaderTests.scala @@ -29,7 +29,7 @@ object PreloaderTests extends TestSuite { private def runPreload(fs: FakeFs, entryPath: Path, entry: String): Preloader = { val preloader = new Preloader(fs.importer) - preloader.add(entryPath, StaticResolvedFile(entry), ImportFinder.Kind.Code) match { + preloader.add(entryPath, StaticResolvedFile(entry), ImportKind.Code) match { case Left(err) => throw err case Right(_) => } @@ -60,7 +60,7 @@ object PreloaderTests extends TestSuite { val loaded = fs.readPaths.map(_._1).toSet assert(loaded == Set("a.libsonnet", "b.libsonnet")) - assert(preloader.loaded.size == 3) // entry + a + b + assert(preloader.loaded.size == 3) // entry + a + b, all keyed at (path, false) } test("dedupes identical imports") { @@ -187,7 +187,7 @@ object PreloaderTests extends TestSuite { } val preloader = new Preloader(importer) val entryPath = DummyPath("dir", "a.libsonnet") - preloader.add(entryPath, StaticResolvedFile(files("dir/a.libsonnet")), ImportFinder.Kind.Code) + preloader.add(entryPath, StaticResolvedFile(files("dir/a.libsonnet")), ImportKind.Code) while (!preloader.isComplete) { val batch = preloader.takePendingImports() batch.foreach { p => @@ -197,7 +197,7 @@ object PreloaderTests extends TestSuite { } // Every docBase observed should be the parent dir, never the file path itself. assert(seenDocBases.forall(_ == "dir")) - assert(preloader.loaded.contains(DummyPath("dir", "b.libsonnet"))) + assert(preloader.loaded.contains((DummyPath("dir", "b.libsonnet"), false))) } test("does not fail preload on parse errors in discovered files") { @@ -209,7 +209,7 @@ object PreloaderTests extends TestSuite { preloader.add( DummyPath("entry"), StaticResolvedFile("if false then import 'bad.libsonnet' else 1"), - ImportFinder.Kind.Code + ImportKind.Code ) while (!preloader.isComplete) { val batch = preloader.takePendingImports() @@ -221,14 +221,30 @@ object PreloaderTests extends TestSuite { } // bad.libsonnet was loaded but its parse error did not fail preload. assert(fs.readPaths.map(_._1).contains("bad.libsonnet")) - assert(preloader.loaded.contains(DummyPath("bad.libsonnet"))) + assert(preloader.loaded.contains((DummyPath("bad.libsonnet"), false))) + } + + test("importstr and importbin for the same path keep separate cache entries") { + // He-Pin's reproducer: a single path referenced as both text and bytes must not collide + // in the cache. With a Path-only key, one read overwrote the other, breaking the + // synchronous evaluator's Importer.read(path, binaryData) contract. + val fs = new FakeFs( + Map("same" -> "the-text"), + binFiles = Map("same" -> Array[Byte](1, 2, 3)) + ) + val entry = "{ s: importstr 'same', b: importbin 'same' }" + val preloader = runPreload(fs, DummyPath("entry"), entry) + val sameTxt = preloader.importer.read(DummyPath("same"), binaryData = false) + val sameBin = preloader.importer.read(DummyPath("same"), binaryData = true) + assert(sameTxt.exists(_.readString() == "the-text")) + assert(sameBin.exists(_.readRawBytes().sameElements(Array[Byte](1, 2, 3)))) } test("parse error in entry is reported") { val fs = new FakeFs(Map.empty) val preloader = new Preloader(fs.importer) val out = - preloader.add(DummyPath("entry"), StaticResolvedFile("local x ="), ImportFinder.Kind.Code) + preloader.add(DummyPath("entry"), StaticResolvedFile("local x ="), ImportKind.Code) assert(out.isLeft) } }