diff --git a/readme.md b/readme.md index 09fad1a93..3e73e157a 100644 --- a/readme.md +++ b/readme.md @@ -125,6 +125,30 @@ filesystem, you have to provide an explicit import callback that you can use to resolve imports yourself (whether through Node's `fs` module, or by emulating a filesystem in-memory) +#### Async imports + +`SjsonnetMain.interpret` is synchronous, which is awkward in browsers where +files come from `fetch` or `FileReader`. Use `SjsonnetMain.interpretAsync` +instead: the loader returns a `Promise` and the call returns a `Promise` of +the result. Imports are statically discovered from each parsed file's AST, +loaded concurrently, then evaluated synchronously against the populated cache. + +```javascript +const result = await SjsonnetMain.interpretAsync( + "local lib = import 'lib.libsonnet'; lib.greet('world')", + {}, // extVars + {}, // tlaVars + "", // initial working directory + (wd, imported) => imported, // resolver, same shape as `interpret` + // loader: returns a Promise of the file contents (string for `import` / + // `importstr`, or bytes for `importbin`) + async (path, binary) => { + const response = await fetch("/files/" + path); + return binary ? new Uint8Array(await response.arrayBuffer()) : await response.text(); + } +); +``` + ### Running deeply recursive Jsonnet programs The depth of recursion is limited by running environment stack size. You can run Sjsonnet with increased diff --git a/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala b/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala index f96a6cea3..b8fbf5322 100644 --- a/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala +++ b/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala @@ -3,7 +3,11 @@ package sjsonnet import sjsonnet.stdlib.NativeRegex import scala.collection.mutable +import scala.concurrent.Future +import scala.scalajs.concurrent.JSExecutionContext.Implicits.queue import scala.scalajs.js +import scala.scalajs.js.JSConverters._ +import scala.scalajs.js.Thenable.Implicits._ import scala.scalajs.js.annotation.{JSExport, JSExportTopLevel} import scala.scalajs.js.typedarray.{ArrayBuffer, Int8Array, Uint8Array} @@ -49,6 +53,91 @@ object SjsonnetMain { case _ => None } + /** Convert the value returned by a JS import loader into a [[ResolvedFile]]. */ + private def toResolvedFile(path: String, value: Any, binaryData: Boolean): ResolvedFile = + value match { + case s: String => StaticResolvedFile(s) + case arr: Array[Byte] => StaticBinaryResolvedFile(arr) + case other => + toBytesFromJs(other) match { + case Some(bytes) => StaticBinaryResolvedFile(bytes) + case None => + val msg = + s"Import loader for '$path' must return a string or byte array, got: ${ + if (other == null) "null" else other.getClass.getName + }" + js.Dynamic.global.console.error(msg) + throw js.JavaScriptException(msg) + } + } + + /** Build the parent importer used during preload (only its `resolve` is called). */ + private def jsResolveImporter(importResolver: js.Function2[String, String, String]): Importer = + new Importer { + def resolve(docBase: Path, importName: String): Option[Path] = + importResolver(docBase.asInstanceOf[JsVirtualPath].path, importName) match { + case null => None + case s => Some(JsVirtualPath(s)) + } + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = + throw new RuntimeException( + s"Importer.read should not be called during async preload (path=$path)" + ) + } + + /** + * Coerce a JS object whose values are strings into a `Map[String, String]`. Iterates the JS + * dictionary directly instead of round-tripping through `ujson` to avoid the intermediate ujson + * tree, the `.obj.toMap` copy, and the trailing `.map` on the immutable map. + */ + private def parseStringMap(label: String, value: js.Any): Map[String, String] = + try { + val dict = value.asInstanceOf[js.Dictionary[js.Any]] + val out = Map.newBuilder[String, String] + out.sizeHint(dict.size) + val it = dict.iterator + while (it.hasNext) { + val (k, v) = it.next() + (v: Any) match { + case s: String => out += k -> s + case _ => + throw js.JavaScriptException(s"$label '$k' must be a string value, got non-string") + } + } + out.result() + } catch { + case e: js.JavaScriptException => throw e + case e: Exception => + val msg = s"Failed to parse ${label.toLowerCase}: ${e.getMessage}" + js.Dynamic.global.console.error(msg, e.asInstanceOf[js.Any]) + throw js.JavaScriptException(msg) + } + + private def runInterpret( + text: String, + parsedExtVars: Map[String, String], + parsedTlaVars: Map[String, String], + wd0: String, + importer: Importer, + preserveOrder: Boolean): js.Any = { + val interp = new Interpreter( + parsedExtVars, + parsedTlaVars, + JsVirtualPath(wd0), + importer, + parseCache = new DefaultParseCache, + settings = new Settings(preserveOrder = preserveOrder), + std = + new sjsonnet.stdlib.StdLibModule(nativeFunctions = Map.from(NativeRegex.functions)).module + ) + interp.interpret0(text, JsVirtualPath("(memory)"), ujson.WebJson.Builder) match { + case Left(msg) => + js.Dynamic.global.console.error("Sjsonnet evaluation error:", msg) + throw js.JavaScriptException(msg) + case Right(v) => v + } + } + @JSExport def interpret( text: String, @@ -59,85 +148,128 @@ object SjsonnetMain { importLoader: js.Function2[String, Boolean, Any], preserveOrder: Boolean = false): js.Any = { try { - val parsedExtVars = - try { - ujson.WebJson.transform(extVars, ujson.Value).obj.toMap.map { - case (k, ujson.Str(v)) => (k, v) - case (k, _) => - throw js.JavaScriptException( - s"External variable '$k' must be a string value, got non-string" - ) + val parsedExtVars = parseStringMap("External variable", extVars) + val parsedTlaVars = parseStringMap("Top-level argument", tlaVars) + + val importer = new Importer { + def resolve(docBase: Path, importName: String): Option[Path] = + importResolver(docBase.asInstanceOf[JsVirtualPath].path, importName) match { + case null => None + case s => Some(JsVirtualPath(s)) } - } catch { - case e: js.JavaScriptException => throw e - case e: Exception => - val msg = s"Failed to parse external variables: ${e.getMessage}" - js.Dynamic.global.console.error(msg, e.asInstanceOf[js.Any]) - throw js.JavaScriptException(msg) + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = + Some( + toResolvedFile( + path.asInstanceOf[JsVirtualPath].path, + importLoader(path.asInstanceOf[JsVirtualPath].path, binaryData), + binaryData + ) + ) + } + + runInterpret(text, parsedExtVars, parsedTlaVars, wd0, importer, preserveOrder) + } catch { + case e: js.JavaScriptException => throw e + case e: Exception => + val msg = s"Sjsonnet internal error: ${e.getClass.getName}: ${e.getMessage}" + js.Dynamic.global.console.error(msg, e.asInstanceOf[js.Any]) + throw js.JavaScriptException(msg) + } + } + + /** + * Async variant of [[interpret]]. Accepts an `importLoader` that returns a `Promise` of the file + * contents, and returns a `Promise` resolving to the rendered output. + * + * Imports are eagerly front-loaded: every `import`, `importstr`, and `importbin` reachable from + * the entry source (plus from any extVar/tlaVar code snippets) is statically discovered and + * loaded before evaluation begins. This includes imports inside branches the evaluator will never + * force, e.g. `if false then import 'x' else 1` will still ask the loader for `x`. The tradeoff + * is that all I/O happens up front, which is what lets evaluation run synchronously. + * + * - Loader rejection (missing file, network error, etc.) fails the returned Promise. + * - A parse error on a discovered (non-entry) file is tolerated; it only surfaces if evaluation + * actually forces that branch. + * - The entry source's own parse error is reported through the normal `interpret0` formatting + * path so the error shape and location info match synchronous `interpret`. + * + * Each discovered file is parsed once during preload and again referenced by the evaluator; the + * parsed AST is shared so fastparse runs only once per file. + */ + @JSExport + def interpretAsync( + text: String, + extVars: js.Any, + tlaVars: js.Any, + wd0: String, + importResolver: js.Function2[String, String, String], + importLoader: js.Function2[String, Boolean, js.Promise[Any]], + preserveOrder: Boolean = false): js.Promise[js.Any] = { + try { + val parsedExtVars = parseStringMap("External variable", extVars) + val parsedTlaVars = parseStringMap("Top-level argument", tlaVars) + + val parentImporter = jsResolveImporter(importResolver) + val preloader = new Preloader(parentImporter) + val wd = JsVirtualPath(wd0) + val entryPath = JsVirtualPath("(memory)") + + // Don't propagate the entry's parse error here — let runInterpret surface it via + // interpret0 so the message goes through the same Error.formatError path as synchronous + // interpret (root frame, "(memory):line:col", etc.). If parsing the entry fails we still + // get an empty pending queue and a fast path to runInterpret, which fails identically. + preloader.add(entryPath, StaticResolvedFile(text), ImportKind.Code) + + // ext/tla vars are parsed as Jsonnet code (Interpreter.parseVar) and may contain imports. + // Feed each value through the preloader using the same synthetic path layout so that + // discovered imports resolve against `wd`, matching the synchronous evaluator. + def discoverVarImports(prefix: String, vars: Map[String, String]): Unit = + vars.foreach { case (k, v) => + val varPath = wd / Util.wrapInLessThanGreaterThan(s"$prefix-var $k") + // Ignore parse errors here: Interpreter.parseVar will surface them at evaluation time + // with a proper stack frame if the variable is actually referenced. + preloader.add(varPath, StaticResolvedFile(v), ImportKind.Code) } + discoverVarImports("ext", parsedExtVars) + discoverVarImports("tla", parsedTlaVars) - val parsedTlaVars = - try { - ujson.WebJson.transform(tlaVars, ujson.Value).obj.toMap.map { - case (k, ujson.Str(v)) => (k, v) - case (k, _) => - throw js.JavaScriptException( - s"Top-level argument '$k' must be a string value, got non-string" - ) - } - } catch { - case e: js.JavaScriptException => throw e - case e: Exception => - val msg = s"Failed to parse top-level arguments: ${e.getMessage}" - js.Dynamic.global.console.error(msg, e.asInstanceOf[js.Any]) - throw js.JavaScriptException(msg) + def loadOne(p: Preloader.Pending): Future[Unit] = { + val pathStr = p.path.asInstanceOf[JsVirtualPath].path + val promise = importLoader(pathStr, p.binaryData) + // implicit Thenable.Implicits converts Promise[Any] to Future[Any] + (promise: Future[Any]).map { value => + val resolved = toResolvedFile(pathStr, value, p.binaryData) + // Ignore parse errors on discovered imports: Jsonnet evaluation is lazy, so a parse + // error in `if false then import 'bad' else 1` should not fail the whole evaluation. + // If the branch is forced at runtime, the interpreter surfaces the error there. + preloader.add(p.path, resolved, p.kind) + () } + } - val interp = new Interpreter( - parsedExtVars, - parsedTlaVars, - JsVirtualPath(wd0), - new Importer { - def resolve(docBase: Path, importName: String): Option[Path] = - importResolver(docBase.asInstanceOf[JsVirtualPath].path, importName) match { - case null => None - case s => Some(JsVirtualPath(s)) - } - def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = - importLoader(path.asInstanceOf[JsVirtualPath].path, binaryData) match { - case s: String => Some(StaticResolvedFile(s)) - case arr: Array[Byte] => Some(StaticBinaryResolvedFile(arr)) - case other => - // Handle JS-native binary types: Uint8Array, ArrayBuffer, or plain JS number[] - toBytesFromJs(other) match { - case Some(bytes) => Some(StaticBinaryResolvedFile(bytes)) - case None => - val msg = - s"Import loader for '${path}' must return a string or byte array, got: ${ - if (other == null) "null" else other.getClass.getName - }" - js.Dynamic.global.console.error(msg) - throw js.JavaScriptException(msg) - } - } - }, - parseCache = new DefaultParseCache, - settings = new Settings(preserveOrder = preserveOrder), - std = - new sjsonnet.stdlib.StdLibModule(nativeFunctions = Map.from(NativeRegex.functions)).module - ) - interp.interpret0(text, JsVirtualPath("(memory)"), ujson.WebJson.Builder) match { - case Left(msg) => - js.Dynamic.global.console.error("Sjsonnet evaluation error:", msg) - throw js.JavaScriptException(msg) - case Right(v) => v + def loop(): Future[Unit] = { + val batch = preloader.takePendingImports() + if (batch.isEmpty) Future.successful(()) + else Future.sequence(batch.map(loadOne)).flatMap(_ => loop()) } + + val result: Future[js.Any] = loop().map { _ => + runInterpret( + text, + parsedExtVars, + parsedTlaVars, + wd0, + preloader.importer, + preserveOrder + ) + } + result.toJSPromise } catch { - case e: js.JavaScriptException => throw e + case e: js.JavaScriptException => js.Promise.reject(e.exception) case e: Exception => val msg = s"Sjsonnet internal error: ${e.getClass.getName}: ${e.getMessage}" js.Dynamic.global.console.error(msg, e.asInstanceOf[js.Any]) - throw js.JavaScriptException(msg) + js.Promise.reject(msg) } } } diff --git a/sjsonnet/src/sjsonnet/ImportFinder.scala b/sjsonnet/src/sjsonnet/ImportFinder.scala new file mode 100644 index 000000000..93adaf626 --- /dev/null +++ b/sjsonnet/src/sjsonnet/ImportFinder.scala @@ -0,0 +1,56 @@ +package sjsonnet + +import scala.collection.mutable + +/** The kind of import expression that referenced a file. */ +sealed trait ImportKind { + + /** Whether the file should be read as raw bytes (`importbin`) vs. text (`import`/`importstr`). */ + def binaryData: Boolean + + /** Whether the loaded file is itself Jsonnet code that may contain further imports. */ + def isCode: Boolean +} + +object ImportKind { + case object Code extends ImportKind { + def binaryData: Boolean = false + def isCode: Boolean = true + } + case object Str extends ImportKind { + def binaryData: Boolean = false + def isCode: Boolean = false + } + case object Bin extends ImportKind { + def binaryData: Boolean = true + def isCode: Boolean = false + } +} + +/** + * Walks an [[Expr]] AST collecting all `import`, `importstr`, and `importbin` expressions. Used by + * [[Preloader]] to discover the transitive set of files that need to be loaded before evaluation. + */ +object ImportFinder { + + final case class Found(value: String, kind: ImportKind) + + def collect(expr: Expr): Seq[Found] = { + val buf = mutable.ArrayBuffer.empty[Found] + val walker = new Walker(buf) + walker.transform(expr) + buf.toSeq + } + + private class Walker(buf: mutable.ArrayBuffer[Found]) extends ExprTransform { + override def transform(expr: Expr): Expr = { + expr match { + case Expr.Import(_, v) => buf += Found(v, ImportKind.Code) + case Expr.ImportStr(_, v) => buf += Found(v, ImportKind.Str) + case Expr.ImportBin(_, v) => buf += Found(v, ImportKind.Bin) + case _ => + } + rec(expr) + } + } +} diff --git a/sjsonnet/src/sjsonnet/Importer.scala b/sjsonnet/src/sjsonnet/Importer.scala index debd3b640..3d1e4df19 100644 --- a/sjsonnet/src/sjsonnet/Importer.scala +++ b/sjsonnet/src/sjsonnet/Importer.scala @@ -155,6 +155,23 @@ trait ResolvedFile { // Used by importbin def readRawBytes(): Array[Byte] + + /** + * Optional pre-parsed AST. When defined, [[CachedResolver.parse]] uses this instead of running + * fastparse again. Set by [[Preloader]] to avoid parsing each file twice (once during async + * import discovery, once during evaluation). + */ + def preParsedAst: Option[(Expr, FileScope)] = None +} + +/** Wraps another [[ResolvedFile]] with an attached pre-parsed AST so the parser can be skipped. */ +final case class PreParsedResolvedFile(underlying: ResolvedFile, expr: Expr, fileScope: FileScope) + extends ResolvedFile { + def getParserInput(): ParserInput = underlying.getParserInput() + def readString(): String = underlying.readString() + def contentHash(): String = underlying.contentHash() + def readRawBytes(): Array[Byte] = underlying.readRawBytes() + override val preParsedAst: Option[(Expr, FileScope)] = Some((expr, fileScope)) } final case class StaticResolvedFile(content: String) extends ResolvedFile { @@ -209,25 +226,28 @@ class CachedResolver( ev: EvalErrorScope): Either[Error, (Expr, FileScope)] = { parseCache.getOrElseUpdate( (path, content.contentHash()), { - val parsed = - try { - fastparse.parse( - content.getParserInput(), - parser(path).document(_) - ) match { - case f @ Parsed.Failure(_, _, _) => - val traced = f.trace() - val pos = new Position(new FileScope(path), traced.index) - Left(new ParseError(traced.msg).addFrame(pos)) - case Parsed.Success(r, _) => Right(r) + val parsed: Either[Error, (Expr, FileScope)] = content.preParsedAst match { + case Some(pre) => Right(pre) + case None => + try { + fastparse.parse( + content.getParserInput(), + parser(path).document(_) + ) match { + case f @ Parsed.Failure(_, _, _) => + val traced = f.trace() + val pos = new Position(new FileScope(path), traced.index) + Left(new ParseError(traced.msg).addFrame(pos)) + case Parsed.Success(r, _) => Right(r) + } + } catch { + case e: ParseError if e.offset >= 0 => + val pos = new Position(new FileScope(path), e.offset) + Left(new ParseError(e.getMessage).addFrame(pos)) + case e: ParseError => + Left(e) } - } catch { - case e: ParseError if e.offset >= 0 => - val pos = new Position(new FileScope(path), e.offset) - Left(new ParseError(e.getMessage).addFrame(pos)) - case e: ParseError => - Left(e) - } + } parsed.flatMap { case (e, fs) => process(e, fs) } } ) diff --git a/sjsonnet/src/sjsonnet/Preloader.scala b/sjsonnet/src/sjsonnet/Preloader.scala new file mode 100644 index 000000000..e2e2291a5 --- /dev/null +++ b/sjsonnet/src/sjsonnet/Preloader.scala @@ -0,0 +1,196 @@ +package sjsonnet + +import fastparse.Parsed + +import scala.collection.mutable + +/** + * Drives asynchronous (or otherwise externally-controlled) loading of imports by statically + * discovering them ahead of evaluation. + * + * Jsonnet has no dynamic imports: every `import`, `importstr`, or `importbin` expression has a + * literal string path. So given the parsed AST of an entry file we can enumerate its imports, load + * them, parse the loaded code files for further imports, and repeat until the closure is known. + * Once all files are in the cache, normal synchronous evaluation can run. + * + * Eager front-loading: every reachable import is loaded up front, including ones inside branches + * the evaluator will never force (`if false then import 'x' else 1`). This trades laziness for the + * ability to do all I/O before evaluation. Parse errors on discovered (non-entry) files are + * tolerated for the same reason — they only surface at evaluation time if the branch is actually + * forced. Loader failures (a rejected Promise, missing file, etc.) are real I/O problems and + * propagate up to the caller. + * + * The parsed AST of each loaded code file is attached to its cache entry as a + * [[PreParsedResolvedFile]] so the Interpreter does not re-run fastparse during evaluation; the + * static optimizer still runs once on cache hit. + * + * Cache keying: entries are keyed by `(Path, binaryData)` so that the same path referenced as both + * `importstr` (text) and `importbin` (bytes) keeps two distinct cache entries — matching the + * `Importer.read(path, binaryData)` contract. Without this, an `importstr "x" + importbin "x"` + * program would overwrite one entry and hand the wrong content to the evaluator. + * + * Usage (pseudo-code): + * {{{ + * val preloader = new Preloader(parentImporter) + * preloader.add(entryPath, StaticResolvedFile(entryText), ImportKind.Code) + * while (preloader.pendingImports.nonEmpty) { + * val batch = preloader.takePendingImports() + * for (p <- batch) { + * val content = await asyncLoad(p.path, p.binaryData) // platform-specific async + * preloader.add(p.path, content, p.kind) + * } + * } + * val interpreter = new Interpreter(..., importer = preloader.importer, ...) + * interpreter.interpret(entryText, entryPath) + * }}} + * + * @param parentImporter + * used only to resolve import names to [[Path]]s. Its `read` is never called. + * @param settings + * parser settings (recursion depth, etc.). + */ +class Preloader(parentImporter: Importer, settings: Settings = Settings.default) { + + private val internedStrings = new mutable.HashMap[String, String] + private val internedFieldSets = + new mutable.HashMap[Val.StaticObjectFieldSet, java.util.LinkedHashMap[ + String, + java.lang.Boolean + ]] + + // Keyed by (path, binaryData) so importstr and importbin for the same path don't collide. + private val cache = mutable.LinkedHashMap.empty[(Path, Boolean), ResolvedFile] + + // Tracks the strongest kind enqueued/loaded for each (path, binaryData). Used both to dedupe + // loader calls and to upgrade a previously-Str pending entry to Code if a Code reference shows + // up later. + private val seen = mutable.HashMap.empty[(Path, Boolean), ImportKind] + private val pending = mutable.ArrayBuffer.empty[Preloader.Pending] + + /** Resolve an import name relative to a base path, using the parent importer. */ + def resolve(docBase: Path, importName: String): Option[Path] = + parentImporter.resolve(docBase, importName) + + /** + * Register a loaded file in the cache and, if it's a Jsonnet code file, parse it to discover its + * imports. + * + * The returned `Either` reports parse errors for code files; binary or string imports never fail + * here. + */ + def add( + path: Path, + content: ResolvedFile, + kind: ImportKind = ImportKind.Code): Either[Error, Unit] = { + putContent(path, kind.binaryData, content) + if (kind.isCode) discover(path, content) else Right(()) + } + + /** All imports queued for loading. */ + def pendingImports: Seq[Preloader.Pending] = pending.toSeq + + /** Atomically take and clear the queue of pending imports. */ + def takePendingImports(): Seq[Preloader.Pending] = { + val out = pending.toVector + pending.clear() + out + } + + /** True when no more imports need to be loaded. */ + def isComplete: Boolean = pending.isEmpty + + /** + * An [[Importer]] that resolves names through the parent importer but reads exclusively from this + * preloader's cache. Pass to an [[Interpreter]] for synchronous evaluation after preload + * completes. + */ + def importer: Importer = new Importer { + def resolve(docBase: Path, importName: String): Option[Path] = + parentImporter.resolve(docBase, importName) + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = + cache.get((path, binaryData)) + } + + /** Snapshot of the loaded cache, exposed so callers can inspect or persist it. */ + def loaded: collection.Map[(Path, Boolean), ResolvedFile] = cache + + /** + * Insert content into the cache without clobbering a richer (pre-parsed) entry. discover() puts a + * [[PreParsedResolvedFile]]; a later add() of the same physical content (e.g. via a separate + * `importstr` reference to the same path) must not downgrade it back to plain text. + */ + private def putContent(path: Path, binaryData: Boolean, content: ResolvedFile): Unit = { + val key = (path, binaryData) + cache.get(key) match { + case Some(existing) if existing.preParsedAst.isDefined && content.preParsedAst.isEmpty => + // keep the pre-parsed version + case _ => + cache.put(key, content) + } + } + + private def discover(path: Path, content: ResolvedFile): Either[Error, Unit] = { + val parser = new Parser(path, internedStrings, internedFieldSets, settings) + try { + fastparse.parse(content.getParserInput(), parser.document(_)) match { + case f: Parsed.Failure => + val traced = f.trace() + Left(new ParseError(s"$path: ${traced.msg}", offset = traced.index)) + case Parsed.Success((expr, fs), _) => + // Stash the parsed AST on the cache entry so the Interpreter doesn't re-run fastparse. + // The optimizer still runs once at evaluation time on cache hit. + cache.put((path, false), PreParsedResolvedFile(content, expr, fs)) + // Match the synchronous evaluator's docBase: resolve relative to the importing file's + // parent directory, not the file path itself. See Importer.resolveAndReadOrFail, which + // calls resolve(pos.fileScope.currentFile.parent(), ...). + val docBase = path.parent() + ImportFinder.collect(expr).foreach { found => + parentImporter.resolve(docBase, found.value).foreach { resolved => + record(resolved, found.kind) + } + } + Right(()) + } + } catch { + case e: ParseError => Left(e) + } + } + + /** + * Record that `path` was referenced with `kind`. Enqueues the load if new, upgrades a pending + * Str→Code if applicable, and lazily parses an already-loaded Str entry that needs Code analysis. + */ + private def record(path: Path, kind: ImportKind): Unit = { + val key = (path, kind.binaryData) + cache.get(key) match { + case Some(content) => + // Already loaded. If we now need Code analysis (e.g. an earlier `importstr` reference + // loaded the file as plain text and we just hit an `import` of the same path), parse it + // now and walk for sub-imports. + if (kind.isCode && content.preParsedAst.isEmpty) { + val _ = discover(path, content) + } + case None => + seen.get(key) match { + case None => + seen(key) = kind + pending += Preloader.Pending(path, kind) + case Some(existing) if !existing.isCode && kind.isCode => + seen(key) = kind + val idx = + pending.indexWhere(p => p.path == path && p.kind.binaryData == kind.binaryData) + if (idx >= 0) pending(idx) = Preloader.Pending(path, kind) + case _ => // already pending with equal-or-stronger kind + } + } + } +} + +object Preloader { + + /** A path that needs to be loaded before evaluation can proceed. */ + final case class Pending(path: Path, kind: ImportKind) { + def binaryData: Boolean = kind.binaryData + def isCode: Boolean = kind.isCode + } +} diff --git a/sjsonnet/src/sjsonnet/stdlib/EncodingModule.scala b/sjsonnet/src/sjsonnet/stdlib/EncodingModule.scala index f2375440c..f65456487 100644 --- a/sjsonnet/src/sjsonnet/stdlib/EncodingModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/EncodingModule.scala @@ -5,8 +5,6 @@ import java.nio.charset.StandardCharsets.UTF_8 import sjsonnet._ import sjsonnet.functions.AbstractFunctionModule -import java.nio.charset.StandardCharsets.UTF_8 - /** * Native implementations for Jsonnet standard-library entries in this module. * diff --git a/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala b/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala new file mode 100644 index 000000000..fb2443504 --- /dev/null +++ b/sjsonnet/test/src-js/sjsonnet/InterpretAsyncTests.scala @@ -0,0 +1,178 @@ +package sjsonnet + +import utest._ + +import scala.concurrent.Future +import scala.scalajs.concurrent.JSExecutionContext.Implicits.queue +import scala.scalajs.js +import scala.scalajs.js.JSConverters._ + +object InterpretAsyncTests extends TestSuite { + + /** + * Wraps a synchronous file map as a JS Promise-returning loader, so the test exercises the real + * async code path. + */ + private def makeAsyncLoader( + files: Map[String, String]): js.Function2[String, Boolean, js.Promise[Any]] = { + (path: String, _: Boolean) => + Future { + files.get(path) match { + case Some(s) => s.asInstanceOf[Any] + case None => throw js.JavaScriptException(s"missing file: $path") + } + }.toJSPromise + } + + private def makeResolver(known: Set[String]): js.Function2[String, String, String] = + (_: String, name: String) => if (known.contains(name)) name else null + + private def runAsync(text: String, files: Map[String, String]): Future[ujson.Value] = { + val loader = makeAsyncLoader(files) + val resolver = makeResolver(files.keySet) + SjsonnetMain + .interpretAsync( + text, + js.Dictionary[js.Any](), + js.Dictionary[js.Any](), + "/", + resolver, + loader + ) + .toFuture + .map(v => ujson.WebJson.transform(v, ujson.Value)) + } + + def tests: Tests = Tests { + + test("simple async import returns a Promise of the result") { + runAsync( + "(import 'lib.libsonnet').n", + Map("lib.libsonnet" -> "{ n: 42 }") + ).map(v => assert(v == ujson.Num(42))) + } + + test("transitive async imports load and evaluate") { + runAsync( + "(import 'a.libsonnet').value", + Map( + "a.libsonnet" -> "{ value: (import 'b.libsonnet').y + 1 }", + "b.libsonnet" -> "{ y: 10 }" + ) + ).map(v => assert(v == ujson.Num(11))) + } + + test("importstr loads as text without further parsing") { + // The data file would be invalid Jsonnet — it must NOT be parsed. + runAsync( + "importstr 'data.txt'", + Map("data.txt" -> "this is :: not :: jsonnet") + ).map(v => assert(v == ujson.Str("this is :: not :: jsonnet"))) + } + + test("preloads imports referenced from extVars") { + // extVar value is a Jsonnet snippet that imports a file. interpretAsync must walk + // ext/tla var snippets too; otherwise the cache-only importer hits a miss at eval time. + val files = Map("lib.libsonnet" -> "{ n: 5 }") + val loader = makeAsyncLoader(files) + val resolver = makeResolver(files.keySet) + val extVars = js.Dictionary[js.Any]("cfg" -> "(import 'lib.libsonnet').n") + SjsonnetMain + .interpretAsync( + "std.extVar('cfg') + 1", + extVars, + js.Dictionary[js.Any](), + "/", + resolver, + loader + ) + .toFuture + .map(v => ujson.WebJson.transform(v, ujson.Value)) + .map(v => assert(v == ujson.Num(6))) + } + + test("parse error in unforced branch does not fail evaluation") { + // Lazy semantics: `if false then import 'bad' else 1` should evaluate to 1, even though + // bad.libsonnet has a parse error. The preloader still loads the file (jsonnet imports + // are statically discoverable), but a parse failure on a discovered file must not abort. + runAsync( + "if false then import 'bad.libsonnet' else 1", + Map("bad.libsonnet" -> "this is :: not :: jsonnet") + ).map(v => assert(v == ujson.Num(1))) + } + + test("importstr and importbin for the same path return distinct values") { + // He-Pin's reproducer: `if true then importstr "x" else importbin "x"`. The cache must + // keep separate entries for text and bytes; otherwise async returns a binary file to + // importstr (rejecting with NotImplementedError) or vice versa. + val text = "the-text" + val bytes = Array[Byte](1, 2, 3) + val resolver = makeResolver(Set("same")) + val loader: js.Function2[String, Boolean, js.Promise[Any]] = + (_: String, binaryData: Boolean) => + Future { + if (binaryData) bytes.toJSArray.asInstanceOf[Any] + else text.asInstanceOf[Any] + }.toJSPromise + SjsonnetMain + .interpretAsync( + "if true then importstr 'same' else importbin 'same'", + js.Dictionary[js.Any](), + js.Dictionary[js.Any](), + "/", + resolver, + loader + ) + .toFuture + .map(v => ujson.WebJson.transform(v, ujson.Value)) + .map(v => assert(v == ujson.Str(text))) + } + + test("entry parse error matches synchronous interpret's error formatting") { + // The async path must route entry parse errors through interpret0 so the message shape + // and root frame match the synchronous interpret. Verify the error includes the + // "(memory)" location marker that interpret0 produces. + val resolver = makeResolver(Set.empty) + val loader: js.Function2[String, Boolean, js.Promise[Any]] = + (path: String, _: Boolean) => js.Promise.reject(s"unexpected load: $path") + val out = SjsonnetMain.interpretAsync( + "local x =", // syntactically invalid + js.Dictionary[js.Any](), + js.Dictionary[js.Any](), + "/", + resolver, + loader + ) + out.toFuture.transform { + case scala.util.Success(v) => + scala.util.Failure(new RuntimeException(s"expected parse failure, got $v")) + case scala.util.Failure(e) => + val msg = e.getMessage + if (msg != null && msg.contains("(memory)")) scala.util.Success(()) + else + scala.util.Failure( + new RuntimeException(s"expected formatted parse error mentioning (memory), got: $msg") + ) + } + } + + test("async loader rejection propagates through the returned Promise") { + val resolver = makeResolver(Set("missing.libsonnet")) + val loader: js.Function2[String, Boolean, js.Promise[Any]] = + (path: String, _: Boolean) => js.Promise.reject(s"boom: $path") + val out = SjsonnetMain.interpretAsync( + "import 'missing.libsonnet'", + js.Dictionary[js.Any](), + js.Dictionary[js.Any](), + "/", + resolver, + loader + ) + out.toFuture.transform { + case scala.util.Failure(_) => scala.util.Success(()) + case scala.util.Success(v) => + scala.util.Failure(new RuntimeException(s"expected failure, got $v")) + } + } + } +} diff --git a/sjsonnet/test/src/sjsonnet/PreloaderTests.scala b/sjsonnet/test/src/sjsonnet/PreloaderTests.scala new file mode 100644 index 000000000..209bce6b4 --- /dev/null +++ b/sjsonnet/test/src/sjsonnet/PreloaderTests.scala @@ -0,0 +1,251 @@ +package sjsonnet + +import utest._ + +import scala.collection.mutable + +object PreloaderTests extends TestSuite { + + /** A virtual file system used by both the preloader's `resolve` and the test's loading loop. */ + private class FakeFs(files: Map[String, String], binFiles: Map[String, Array[Byte]] = Map.empty) { + val readPaths: mutable.ArrayBuffer[(String, Boolean)] = mutable.ArrayBuffer.empty + + val importer: Importer = new Importer { + def resolve(docBase: Path, importName: String): Option[Path] = { + val candidate = DummyPath(importName) + if (files.contains(importName) || binFiles.contains(importName)) Some(candidate) else None + } + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = + throw new RuntimeException(s"read should not be called during preload: $path") + } + + def load(path: Path, binaryData: Boolean): ResolvedFile = { + val key = path.asInstanceOf[DummyPath].segments.head + readPaths += ((key, binaryData)) + if (binaryData) StaticBinaryResolvedFile(binFiles(key)) + else StaticResolvedFile(files(key)) + } + } + + private def runPreload(fs: FakeFs, entryPath: Path, entry: String): Preloader = { + val preloader = new Preloader(fs.importer) + preloader.add(entryPath, StaticResolvedFile(entry), ImportKind.Code) match { + case Left(err) => throw err + case Right(_) => + } + while (!preloader.isComplete) { + val batch = preloader.takePendingImports() + batch.foreach { p => + val content = fs.load(p.path, p.binaryData) + preloader.add(p.path, content, p.kind) match { + case Left(err) => throw err + case Right(_) => + } + } + } + preloader + } + + def tests: Tests = Tests { + + test("discovers transitive imports") { + val fs = new FakeFs( + Map( + "a.libsonnet" -> "import 'b.libsonnet'", + "b.libsonnet" -> "{ x: 1 }" + ) + ) + val entry = "import 'a.libsonnet'" + val preloader = runPreload(fs, DummyPath("entry"), entry) + + val loaded = fs.readPaths.map(_._1).toSet + assert(loaded == Set("a.libsonnet", "b.libsonnet")) + assert(preloader.loaded.size == 3) // entry + a + b, all keyed at (path, false) + } + + test("dedupes identical imports") { + val fs = new FakeFs( + Map( + "shared.libsonnet" -> "{ y: 2 }" + ) + ) + val entry = "[import 'shared.libsonnet', import 'shared.libsonnet']" + runPreload(fs, DummyPath("entry"), entry) + + assert(fs.readPaths.count(_._1 == "shared.libsonnet") == 1) + } + + test("handles importstr and importbin") { + val fs = new FakeFs( + Map("data.txt" -> "hello"), + binFiles = Map("blob.bin" -> Array[Byte](1, 2, 3)) + ) + val entry = "{ s: importstr 'data.txt', b: importbin 'blob.bin' }" + runPreload(fs, DummyPath("entry"), entry) + + assert(fs.readPaths.toSet == Set(("data.txt", false), ("blob.bin", true))) + } + + test("does not parse importstr/importbin contents for further imports") { + // The string content here would be invalid Jsonnet if parsed; preloader must not parse it. + val fs = new FakeFs(Map("data.txt" -> "this is not jsonnet ::: !@#")) + val entry = "importstr 'data.txt'" + runPreload(fs, DummyPath("entry"), entry) + + assert(fs.readPaths.toSeq == Seq(("data.txt", false))) + } + + test("interpreter evaluates against preloaded cache") { + val fs = new FakeFs( + Map( + "lib.libsonnet" -> "{ greet(name): 'hello, ' + name }" + ) + ) + val entry = "(import 'lib.libsonnet').greet('world')" + val entryPath = DummyPath("entry") + val preloader = runPreload(fs, entryPath, entry) + + val interp = new Interpreter( + Map.empty[String, String], + Map.empty[String, String], + DummyPath(), + preloader.importer, + parseCache = new DefaultParseCache + ) + val result = interp.interpret(entry, entryPath) + assert(result == Right(ujson.Str("hello, world"))) + } + + test("preloaded files carry pre-parsed AST so fastparse runs once") { + // Wrap each ResolvedFile with a counter so we can detect re-parsing. The Preloader parses + // once during discover; the Interpreter should consume the attached AST without re-parsing. + val parseCount = mutable.HashMap.empty[Path, Int].withDefaultValue(0) + class CountingResolvedFile(content: String, path: Path) extends ResolvedFile { + def getParserInput(): fastparse.ParserInput = { + parseCount(path) = parseCount(path) + 1 + fastparse.IndexedParserInput(content) + } + def readString(): String = content + def contentHash(): String = content + def readRawBytes(): Array[Byte] = + content.getBytes(java.nio.charset.StandardCharsets.UTF_8) + } + val files = Map( + "lib.libsonnet" -> "{ x: 1 }", + "entry" -> "(import 'lib.libsonnet').x" + ) + val importer = new Importer { + def resolve(docBase: Path, importName: String): Option[Path] = + if (files.contains(importName)) Some(DummyPath(importName)) else None + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = + throw new RuntimeException(s"unexpected read: $path") + } + val preloader = new Preloader(importer) + val entryPath = DummyPath("entry") + preloader.add(entryPath, new CountingResolvedFile(files("entry"), entryPath)) + while (!preloader.isComplete) { + val batch = preloader.takePendingImports() + batch.foreach { p => + val key = p.path.asInstanceOf[DummyPath].segments.head + preloader.add(p.path, new CountingResolvedFile(files(key), p.path), p.kind) + } + } + // Both files have been parsed exactly once — the Preloader's parse pass. + assert(parseCount(DummyPath("lib.libsonnet")) == 1) + assert(parseCount(entryPath) == 1) + + // Run a full interpret. The Interpreter must not re-parse; getParserInput would bump the + // counter again if it did. + val interp = new Interpreter( + Map.empty[String, String], + Map.empty[String, String], + DummyPath(), + preloader.importer, + parseCache = new DefaultParseCache + ) + val result = interp.interpret(files("entry"), entryPath) + assert(result == Right(ujson.Num(1))) + assert(parseCount(DummyPath("lib.libsonnet")) == 1) + } + + test("resolves imports relative to the importing file's parent directory") { + // Resolver records what docBase it was called with, and only resolves names against the + // expected `dir/` parent — proving the preloader passes parent(), not the file path itself. + val seenDocBases = mutable.ArrayBuffer.empty[String] + val files = Map("dir/a.libsonnet" -> "import 'b.libsonnet'", "dir/b.libsonnet" -> "{ ok: true }") + val importer = new Importer { + def resolve(docBase: Path, importName: String): Option[Path] = { + seenDocBases += docBase.asInstanceOf[DummyPath].segments.mkString("/") + val joined = docBase.asInstanceOf[DummyPath].segments.mkString("/") match { + case "" => importName + case base => s"$base/$importName" + } + if (files.contains(joined)) Some(DummyPath(joined.split('/').toIndexedSeq: _*)) else None + } + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = + throw new RuntimeException(s"unexpected read: $path") + } + val preloader = new Preloader(importer) + val entryPath = DummyPath("dir", "a.libsonnet") + preloader.add(entryPath, StaticResolvedFile(files("dir/a.libsonnet")), ImportKind.Code) + while (!preloader.isComplete) { + val batch = preloader.takePendingImports() + batch.foreach { p => + val key = p.path.asInstanceOf[DummyPath].segments.mkString("/") + preloader.add(p.path, StaticResolvedFile(files(key)), p.kind) + } + } + // Every docBase observed should be the parent dir, never the file path itself. + assert(seenDocBases.forall(_ == "dir")) + assert(preloader.loaded.contains((DummyPath("dir", "b.libsonnet"), false))) + } + + test("does not fail preload on parse errors in discovered files") { + // A parse error in a discovered file (e.g. behind `if false then import 'bad'`) should + // not abort preload — Jsonnet evaluation is lazy, the error should only surface if the + // branch is actually forced. + val fs = new FakeFs(Map("bad.libsonnet" -> "this is :: not :: jsonnet")) + val preloader = new Preloader(fs.importer) + preloader.add( + DummyPath("entry"), + StaticResolvedFile("if false then import 'bad.libsonnet' else 1"), + ImportKind.Code + ) + while (!preloader.isComplete) { + val batch = preloader.takePendingImports() + batch.foreach { p => + val content = fs.load(p.path, p.binaryData) + // Parse failure here returns Left, but we deliberately ignore it. + preloader.add(p.path, content, p.kind) + } + } + // bad.libsonnet was loaded but its parse error did not fail preload. + assert(fs.readPaths.map(_._1).contains("bad.libsonnet")) + assert(preloader.loaded.contains((DummyPath("bad.libsonnet"), false))) + } + + test("importstr and importbin for the same path keep separate cache entries") { + // He-Pin's reproducer: a single path referenced as both text and bytes must not collide + // in the cache. With a Path-only key, one read overwrote the other, breaking the + // synchronous evaluator's Importer.read(path, binaryData) contract. + val fs = new FakeFs( + Map("same" -> "the-text"), + binFiles = Map("same" -> Array[Byte](1, 2, 3)) + ) + val entry = "{ s: importstr 'same', b: importbin 'same' }" + val preloader = runPreload(fs, DummyPath("entry"), entry) + val sameTxt = preloader.importer.read(DummyPath("same"), binaryData = false) + val sameBin = preloader.importer.read(DummyPath("same"), binaryData = true) + assert(sameTxt.exists(_.readString() == "the-text")) + assert(sameBin.exists(_.readRawBytes().sameElements(Array[Byte](1, 2, 3)))) + } + + test("parse error in entry is reported") { + val fs = new FakeFs(Map.empty) + val preloader = new Preloader(fs.importer) + val out = + preloader.add(DummyPath("entry"), StaticResolvedFile("local x ="), ImportKind.Code) + assert(out.isLeft) + } + } +}