diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2572b941..b45865a2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,10 +17,12 @@ jobs: build: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: distribution: [ 'temurin' ] java: [ '21' ] - name: Java ${{ matrix.Java }} Build + grammarParseEngine: [ 'false', 'true' ] + name: Java ${{ matrix.Java }} Build (grammar engine ${{ matrix.grammarParseEngine }}) steps: - name: Check out Git repository uses: actions/checkout@v7 @@ -46,10 +48,11 @@ jobs: mkdir -p ./build docker compose --project-directory ./systemd-build up --build ./generate-changelog > build/CHANGELOG - ./gradlew test buildPlugin + ./gradlew test buildPlugin -Dsystemd.unit.grammarParseEngine=${{ matrix.grammarParseEngine }} ./gradlew --stop - name: Publish Unit Test Results uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + check_name: "Unit Test Results (grammar engine ${{ matrix.grammarParseEngine }})" junit_files: build/test-results/**/*.xml diff --git a/build.gradle.kts b/build.gradle.kts index b43f36cf..e2501df9 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -144,6 +144,9 @@ tasks { test { testLogging.showExceptions = true testLogging.setExceptionFormat("full") + // Forward the experimental grammar-engine flag so CI can run the whole suite twice: once on the + // original engine and once with -Dsystemd.unit.grammarParseEngine=true (see GrammarOptionValue). + systemProperty("systemd.unit.grammarParseEngine", System.getProperty("systemd.unit.grammarParseEngine", "false")) } } diff --git a/ci/release.Jenkinsfile b/ci/release.Jenkinsfile index 64f1f32f..663a5bb7 100644 --- a/ci/release.Jenkinsfile +++ b/ci/release.Jenkinsfile @@ -332,6 +332,11 @@ pipeline { } } stage("Build") { + // Two independent kubernetes pods => two separate workspaces => safe to run concurrently. The + // second pod re-runs the whole unit-test suite with the new grammar engine forced on, so the + // experimental path is validated on every release build before it ships. + parallel { + stage("Build & Publish") { agent { kubernetes { //cloud 'kubernetes' @@ -394,6 +399,27 @@ pipeline { } } } + } + stage("Unit Tests (new grammar engine)") { + agent { + kubernetes { + //cloud 'kubernetes' + defaultContainer 'worker-pod' + + // language=yaml + yaml buildPodDefinition("${env.DOCKER_REGISTRY_PREFIX}/systemd-plugin-build-environment:$buildEnvironmentHash", false,false) + //workspaceVolume hostPathWorkspaceVolume('/opt/jenkins/workspace') + } + } + steps { + unstash 'systemd-build-build' + unstash 'ubuntu-units' + sh(""" + ./gradlew --no-daemon -I ./build-cache-init.gradle.kts -I ./repo-cache-init.gradle.kts --build-cache test -Dsystemd.unit.grammarParseEngine=true + """) + } + } + } } } post { diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/AlternativeCombinator.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/AlternativeCombinator.kt index 76320d71..b538df24 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/AlternativeCombinator.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/AlternativeCombinator.kt @@ -38,6 +38,11 @@ open class AlternativeCombinator(vararg val tokens: Combinator) : Combinator { return match(value, offset, Combinator::SemanticMatch) } + override fun parse(value: String, offset: Int): Sequence = + // Offer every alternative's steps (matches and dead ends), so option order no longer affects + // correctness, and a failing branch still contributes what it expected. + tokens.asSequence().flatMap { it.parse(value, offset) } + override fun toString(): String = toStringIndented(0) override fun toStringIndented(indent: Int): String { diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Combinator.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Combinator.kt index 29757dac..33db155c 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Combinator.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Combinator.kt @@ -32,5 +32,21 @@ interface Combinator { */ fun SemanticMatch(value : String, offset: Int): MatchResult + /** + * List-of-successes matcher (#467). Returns EVERY way this combinator can proceed from [offset] in + * [value], lazily, as a stream of [ParseStep]s: a [Parse] for each way it matched, and a [Stuck] + * for each dead end (carrying where it got stuck and what was expected there). + * + * This lives alongside Syntactic/SemanticMatch and is a single lenient pass: each [ParsedToken] + * carries a `valid` flag for the strict (semantic) check. Because every alternative is offered + * rather than the first greedy one committed to, matching is complete — e.g. + * Seq(ZeroOrMore("a"), "a") on "aa" matches, because ZeroOrMore offers the shorter match too. + * + * Returning [Stuck] as a value (rather than an empty sequence) means failure information — how far + * we got and what we expected — travels back through the return value, so no side channel is + * needed to localize errors. + */ + fun parse(value: String, offset: Int): Sequence + fun toStringIndented(indent: Int): String } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/EOF.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/EOF.kt index 770b4ef7..006b5a80 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/EOF.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/EOF.kt @@ -17,6 +17,10 @@ class EOF : Combinator { } } + override fun parse(value: String, offset: Int): Sequence = + if (offset == value.length) sequenceOf(Parse(offset, emptyList())) + else sequenceOf(Stuck(offset, setOf(this))) // expected end-of-input here + override fun toStringIndented(indent: Int): String { return "EOF" } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/FlexibleLiteralChoiceTerminal.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/FlexibleLiteralChoiceTerminal.kt index 4876bfde..a2bdcaa2 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/FlexibleLiteralChoiceTerminal.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/FlexibleLiteralChoiceTerminal.kt @@ -91,6 +91,15 @@ class FlexibleLiteralChoiceTerminal(vararg val choices: String) : TerminalCombin return NoMatch.copy(longestMatch = offset) } + override fun parse(value: String, offset: Int): Sequence { + // Lenient shape match (so a wrong token like AF_BOGUS still matches and can be highlighted), + // valid only if the matched text is one of the exact choices. + val m = syntaticMatch.matchAt(value, offset) ?: return sequenceOf(Stuck(offset, setOf(this))) + val text = m.value + val valid = choices.any { it == text } + return sequenceOf(Parse(offset + text.length, listOf(ParsedToken(offset, offset + text.length, text, this, valid)))) + } + override fun toString(): String { return if (choices.size == 1) { "Literal(\"${choices[0]}\")" diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/GrammarOptionValue.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/GrammarOptionValue.kt index 68bb0aba..a8567bf9 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/GrammarOptionValue.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/GrammarOptionValue.kt @@ -4,12 +4,15 @@ import com.intellij.codeInspection.LocalQuickFix import com.intellij.codeInspection.ProblemHighlightType import com.intellij.codeInspection.ProblemsHolder import com.intellij.openapi.diagnostic.Logger +import com.intellij.openapi.progress.ProcessCanceledException +import com.intellij.openapi.progress.ProgressManager import com.intellij.openapi.project.Project import com.intellij.openapi.util.TextRange import net.sjrx.intellij.plugins.systemdunitfiles.intentions.ReplaceInvalidLiteralChoiceQuickFix import net.sjrx.intellij.plugins.systemdunitfiles.psi.UnitFilePropertyType import net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.SemanticDataRepository import net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.OptionValueInformation +import net.sjrx.intellij.plugins.systemdunitfiles.settings.ExperimentalSettings open class GrammarOptionValue( override val validatorName: String, @@ -34,6 +37,11 @@ open class GrammarOptionValue( override fun generateProblemDescriptors(property: UnitFilePropertyType, holder: ProblemsHolder) { val value = property.valueText ?: return + if (FORCE_PARSE_ENGINE || ExperimentalSettings.getInstance(property.project).state.useGrammarParseEngine) { + generateProblemDescriptorsViaParse(property, value, holder) + return + } + val syntaticMatch = combinator.SyntacticMatch(value, 0) try { @@ -114,7 +122,66 @@ open class GrammarOptionValue( } + /** + * Experimental path (#467): validate via the list-of-successes engine and map the [ParseOutcome] + * onto the same problem descriptors the SyntacticMatch/SemanticMatch path produces. Gated behind + * [ExperimentalSettings.useGrammarParseEngine] so the original engine remains the default. + */ + private fun generateProblemDescriptorsViaParse(property: UnitFilePropertyType, value: String, holder: ProblemsHolder) { + val outcome = try { + combinator.validate(value) { ProgressManager.checkCanceled() } + } catch (e: ProcessCanceledException) { + throw e + } catch (e: RuntimeException) { + LOG.error("Error while processing ${property.key} with value $value", e) + holder.registerProblem(property.valueNode.psi, "Internal error, please report an bug to the systemd plugin. Include the Key and Value used.", ProblemHighlightType.ERROR) + return + } + + when (outcome) { + is ParseOutcome.Valid -> return + + is ParseOutcome.SyntaxError -> { + // Highlight from where parsing got stuck to the end (or everything if it reached the end). + val tr = if (outcome.furthest < value.length) { + TextRange(outcome.furthest, value.length) + } else { + TextRange(0, value.length) + } + holder.registerProblem(property.valueNode.psi, "${property.key}'s value does not match the expected format. Possible reasons include unrecognized characters or premature end of input.", ProblemHighlightType.GENERIC_ERROR_OR_WARNING, tr) + } + + is ParseOutcome.SemanticError -> { + // Well-formed but invalid: highlight the offending token, and offer literal replacements. + val bad = outcome.badToken + val tr = TextRange(bad.start, bad.end) + + val quickFixes = mutableListOf() + val choices = when (val terminal = bad.terminal) { + is LiteralChoiceTerminal -> terminal.choices + is FlexibleLiteralChoiceTerminal -> terminal.choices + else -> emptyArray() + } + for (choice in choices) { + quickFixes.add(ReplaceInvalidLiteralChoiceQuickFix(bad.start, bad.text, choice)) + } + + holder.registerProblem(property.valueNode.psi, "${property.key}'s value is correctly formatted but seems invalid.", ProblemHighlightType.GENERIC_ERROR_OR_WARNING, tr, *quickFixes.toTypedArray()) + } + } + } + companion object { private val LOG = Logger.getInstance(SemanticDataRepository::class.java) + + /** + * Forces the new list-of-successes engine for validation regardless of the per-project setting, + * used to run the whole unit-test suite against it (CI runs the suite twice: once without and + * once with -Dsystemd.unit.grammarParseEngine=true). Only the validation engine is forced; the + * cosmetic annotators stay on the user flag, so problem counts are unchanged and only exact + * error spans/messages can differ between engines. + */ + @JvmField + val FORCE_PARSE_ENGINE: Boolean = java.lang.Boolean.getBoolean("systemd.unit.grammarParseEngine") } } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/IntegerTerminal.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/IntegerTerminal.kt index 11864d77..43f07293 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/IntegerTerminal.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/IntegerTerminal.kt @@ -29,6 +29,14 @@ class IntegerTerminal(private val minInclusive: Long,private val maxExclusive: L } } + override fun parse(value: String, offset: Int): Sequence { + val m = intRegex.matchAt(value, offset) ?: return sequenceOf(Stuck(offset, setOf(this))) + val text = m.value + // Lenient: any integer matches (so we can locate it); valid only if it is within range. + val valid = text.toLongOrNull()?.let { it >= minInclusive && it < maxExclusive } ?: false + return sequenceOf(Parse(offset + text.length, listOf(ParsedToken(offset, offset + text.length, text, this, valid)))) + } + override fun toString(): String { return "Int($minInclusive,$maxExclusive)" } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/LiteralChoiceTerminal.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/LiteralChoiceTerminal.kt index 870db3d1..5f69c6a1 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/LiteralChoiceTerminal.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/LiteralChoiceTerminal.kt @@ -24,6 +24,21 @@ class LiteralChoiceTerminal(vararg var choices: String) : TerminalCombinator { return match(value, offset) } + override fun parse(value: String, offset: Int): Sequence { + // Offer EVERY choice that matches at this offset, not just the longest. When choices share a + // prefix more than one can match here -- e.g. CollectMode's ("inactive", "inactive-or-failed") + // both match at the start of "inactive-or-failed". A greedy matcher would have to commit to one + // and could dead-end later (taking "inactive" when the grammar needed "inactive-or-failed", or + // vice versa); returning both as separate Parses lets the rest of the grammar pick the branch + // that actually completes, with no backtracking. Every choice here is an exact literal, so each + // matched token is always strictly valid (valid = true) -- the wrong-value case is what + // FlexibleLiteralChoiceTerminal handles, where a token can match the shape but be valid = false. + val matches = choices.filter { value.startsWith(it, offset) } + return if (matches.isEmpty()) sequenceOf(Stuck(offset, setOf(this))) + else matches.asSequence() + .map { Parse(offset + it.length, listOf(ParsedToken(offset, offset + it.length, it, this, valid = true))) } + } + override fun toString(): String { return if (choices.size == 1) { "Literal(\"${choices[0]}\")" diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/OneOrMore.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/OneOrMore.kt index f56a0f1b..77c3850c 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/OneOrMore.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/OneOrMore.kt @@ -40,6 +40,25 @@ class OneOrMore(val combinator : Combinator) : Combinator { return match(value, offset, combinator::SemanticMatch) } + override fun parse(value: String, offset: Int): Sequence { + // Same as ZeroOrMore, but the first repetition is mandatory (and must make progress). + fun extend(from: Parse): Sequence = sequence { + yield(from) + for (step in combinator.parse(value, from.end)) { + when (step) { + is Parse -> if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens))) + is Stuck -> yield(step) + } + } + } + return combinator.parse(value, offset).flatMap { step -> + when (step) { + is Parse -> if (step.end > offset) extend(step) else emptySequence() + is Stuck -> sequenceOf(step) // the mandatory first repetition failed + } + } + } + override fun toString(): String = toStringIndented(0) override fun toStringIndented(indent: Int): String { diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Parse.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Parse.kt new file mode 100644 index 00000000..17f908ac --- /dev/null +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Parse.kt @@ -0,0 +1,124 @@ +package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.grammar + +/* + * List-of-successes matcher (GitHub #467). + * + * These types support a second matching method, `Combinator.parse()`, that lives ALONGSIDE the + * existing SyntacticMatch / SemanticMatch on every combinator. Nothing here is wired into + * GrammarOptionValue yet — the goal is to flesh the approach out on the real combinators and + * validate it against the real grammars in tests before deciding to migrate the caller. + * + * Where the existing engine returns ONE greedy result and runs two near-identical passes, parse() + * returns EVERY way a combinator can proceed (lazily), and folds the strict "semantic" check into a + * `valid` flag on each token. So one lenient pass answers both questions, and greedy traps like + * Seq(ZeroOrMore("a"), "a") on "aa" resolve themselves. + * + * FAILURE IS A VALUE, NOT AN ABSENCE + * ---------------------------------- + * A matcher does not signal "no match" by returning an empty sequence. It returns a [Stuck] — a + * first-class value carrying the offset it got stuck at and what it was hoping to see. That single + * decision is why error localization needs no side-channel: when Seq(..., EOF()) can't finish, the + * EOF failure rides back up the return value as a Stuck(offset=7, {EOF}), so we still know we + * reached offset 7. (Earlier this was modelled as an empty sequence, which threw the offset away and + * forced a mutable "frontier" object to be threaded through parse() to recover it.) + * + * SIMPLER ALTERNATIVE (for the record): instead of returning Stuck values, you can thread a mutable + * accumulator ("frontier") through parse() that every leaf matcher writes its deepest reach into. + * That is less code and a touch lazier, but it splits the data flow across two channels — successes + * via the return value, failures via a pass-by-reference side effect — which is the asymmetry this + * design removes by making both kinds of result travel the same way. + */ + +/** A single terminal token, with the strict-validity verdict (the old "semantic" check) folded in. */ +data class ParsedToken( + val start: Int, + val end: Int, + val text: String, + val terminal: TerminalCombinator, + val valid: Boolean, +) + +/** One step a matcher can take from an offset: either it consumed input ([Parse]) or it got [Stuck]. */ +sealed interface ParseStep + +/** A successful match: consumed input up to [end], producing [tokens] (each with its `valid` flag). */ +data class Parse(val end: Int, val tokens: List) : ParseStep + +/** + * A dead end: matching could not proceed at [offset], where [expected] is the set of matchers the + * grammar was hoping to see. Carrying this as a value (rather than an empty result) is what lets us + * localize errors and, later, drive completion — both are "what was expected at this offset?". + */ +data class Stuck(val offset: Int, val expected: Set) : ParseStep + +/** The outcome of validating a whole value against a grammar via parse(). */ +sealed interface ParseOutcome { + /** Some path consumed the whole value with every token strictly valid. */ + object Valid : ParseOutcome + + /** A path consumed the whole value, but a token is not strictly valid (well-formed but wrong). */ + data class SemanticError(val badToken: ParsedToken) : ParseOutcome + + /** + * No path consumed the whole value. [furthest] is the deepest offset any path reached, and + * [expected] is what the grammar was hoping to see there (for error localization / completion). + */ + data class SyntaxError(val furthest: Int, val expected: Set) : ParseOutcome +} + +/** + * One lenient parse answers both questions the old two passes did: + * - syntactic ("could be this, color it"): did any path consume the whole value? + * - semantic ("actually valid"): did any such path use only valid tokens? + * + * On failure we fold the [Stuck] values back into the deepest offset reached and the union of what + * was expected there — the "frontier", computed from the return value rather than mutated into it. + * + * The matcher is exhaustive, so a pathologically ambiguous grammar could explore a huge number of + * steps. Two pure guards keep this safe to run on a UI/highlighting thread without any IntelliJ + * dependency here: [onStep] is invoked once per explored step (the IntelliJ layer passes a callback + * that throws on cancellation), and [maxSteps] caps total work. If the cap is hit we fail OPEN — + * return [ParseOutcome.Valid] rather than flag a value we could not fully explore. + * + * When the value is well-formed but invalid, an ambiguous grammar can yield several full parses that + * tokenize the same string differently, each with a different first-invalid token. We do NOT report + * whichever the lazy stream happens to yield first (that would depend on incidental combinator + * iteration order). Instead we report the invalid token from the parse that stayed valid the LONGEST + * — the largest start offset — mirroring how a [ParseOutcome.SyntaxError] reports the furthest offset + * reached. That rule is invariant under combinator iteration order. The only remaining tie is two + * parses whose first-invalid token starts at the very same offset; there the earlier one in stream + * order wins. For a tie produced by an [AlternativeCombinator] (e.g. two enums over the same + * character shape) that is the earlier-declared branch, so an author can steer it by ordering; ties + * from other combinators follow that combinator's own order ([LiteralChoiceTerminal] longest-first, + * the repetition combinators shorter-count-first). + */ +fun Combinator.validate(value: String, maxSteps: Int = 1_000_000, onStep: () -> Unit = {}): ParseOutcome { + var deepestBad: ParsedToken? = null + var furthest = 0 + var expected = emptySet() + var steps = 0 + + for (step in parse(value, 0)) { + onStep() + if (++steps > maxSteps) return ParseOutcome.Valid + when (step) { + is Parse -> { + if (step.end == value.length) { + val bad = step.tokens.firstOrNull { !it.valid } + if (bad == null) return ParseOutcome.Valid // any fully-valid full parse wins; short-circuit + // Keep the bad token from the parse that stayed valid the longest; an exact tie keeps the + // first in stream order. See the function doc for why this is order-invariant. + val current = deepestBad + if (current == null || bad.start > current.start) deepestBad = bad + } + if (step.end > furthest) { furthest = step.end; expected = emptySet() } + } + is Stuck -> when { + step.offset > furthest -> { furthest = step.offset; expected = step.expected } + step.offset == furthest -> expected = expected + step.expected + } + } + } + + return deepestBad?.let { ParseOutcome.SemanticError(it) } ?: ParseOutcome.SyntaxError(furthest, expected) +} diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/RegexTerminal.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/RegexTerminal.kt index 2010e787..afdd98a4 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/RegexTerminal.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/RegexTerminal.kt @@ -18,6 +18,14 @@ class RegexTerminal(syntaticMatchStr : String, semanticMatchStr: String ) : Term return MatchResult(listOf(matchResult.value), offset + matchResult.value.length, listOf(this), offset + matchResult.value.length) } + override fun parse(value: String, offset: Int): Sequence { + // The syntactic regex gives the lenient span; valid iff the semantic regex matches that same span. + val syn = syntaticMatch.matchAt(value, offset) ?: return sequenceOf(Stuck(offset, setOf(this))) + val text = syn.value + val valid = semanticMatch.matchAt(value, offset)?.value == text + return sequenceOf(Parse(offset + text.length, listOf(ParsedToken(offset, offset + text.length, text, this, valid)))) + } + override fun toString(): String { return "Regex(\"${semanticMatch.pattern}\")" } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Repeat.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Repeat.kt index 869a5772..c3621ea2 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Repeat.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/Repeat.kt @@ -62,6 +62,24 @@ class Repeat(val combinator : Combinator, val minInclusive: Int, val maxExclusiv return match(value, offset, combinator::SemanticMatch) } + override fun parse(value: String, offset: Int): Sequence { + // Offer every repetition count in [minInclusive, maxExclusive] (maxExclusive is the cap on the + // count, mirroring the existing match() loop). Yield only once enough repetitions have happened; + // a failed attempt at another repetition is carried as a Stuck. + fun extend(from: Parse, count: Int): Sequence = sequence { + if (count >= minInclusive) yield(from) + if (count < maxExclusive) { + for (step in combinator.parse(value, from.end)) { + when (step) { + is Parse -> if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens), count + 1)) + is Stuck -> yield(step) + } + } + } + } + return extend(Parse(offset, emptyList()), 0) + } + override fun toString(): String = toStringIndented(0) override fun toStringIndented(indent: Int): String { diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/SequenceCombinator.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/SequenceCombinator.kt index ddab6a61..c4c4ad12 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/SequenceCombinator.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/SequenceCombinator.kt @@ -55,6 +55,26 @@ open class SequenceCombinator(vararg val tokens: Combinator) : Combinator { return MatchResult(resultTokens, index, resultTerminals, maxLength) } + override fun parse(value: String, offset: Int): Sequence { + // Thread each successful possibility of one part into the next (the cartesian product). A part + // that gets stuck — or a path that already got stuck — carries its dead end forward unchanged. + var results: Sequence = sequenceOf(Parse(offset, emptyList())) + for (token in tokens) { + results = results.flatMap { acc -> + when (acc) { + is Stuck -> sequenceOf(acc) // path already dead-ended; carry it forward + is Parse -> token.parse(value, acc.end).map { step -> + when (step) { + is Parse -> Parse(step.end, acc.tokens + step.tokens) + is Stuck -> step // this part got stuck after acc; propagate the dead end + } + } + } + } + } + return results + } + override fun toString(): String = toStringIndented(0) override fun toStringIndented(indent: Int): String { diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/WhitespaceTerminal.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/WhitespaceTerminal.kt index 180e5eb4..32a47183 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/WhitespaceTerminal.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/WhitespaceTerminal.kt @@ -27,6 +27,13 @@ class WhitespaceTerminal : TerminalCombinator { return match(value, offset) } + override fun parse(value: String, offset: Int): Sequence { + var end = offset + while (end < value.length && value[end].isWhitespace()) end++ + return if (end == offset) sequenceOf(Stuck(offset, setOf(this))) + else sequenceOf(Parse(end, listOf(ParsedToken(offset, end, value.substring(offset, end), this, valid = true)))) + } + override fun toString(): String { return "\\s+" } diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrMore.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrMore.kt index beeced13..07393886 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrMore.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrMore.kt @@ -42,6 +42,22 @@ class ZeroOrMore(val combinator : Combinator) : Combinator { return match(value, offset, combinator::SemanticMatch) } + override fun parse(value: String, offset: Int): Sequence { + // Offer EVERY repetition count (0, 1, 2, ...), not just the greedy maximum. A failed attempt at + // one more repetition is yielded as a Stuck. The `> from.end` guard keeps an inner matcher that + // can match empty from looping forever. + fun extend(from: Parse): Sequence = sequence { + yield(from) // stop repeating here... + for (step in combinator.parse(value, from.end)) { + when (step) { + is Parse -> if (step.end > from.end) yieldAll(extend(Parse(step.end, from.tokens + step.tokens))) + is Stuck -> yield(step) // couldn't take another repetition; remember where/why + } + } + } + return extend(Parse(offset, emptyList())) + } + override fun toString(): String = toStringIndented(0) override fun toStringIndented(indent: Int): String { diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrOne.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrOne.kt index 1751b24c..d33bc04d 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrOne.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ZeroOrOne.kt @@ -40,6 +40,10 @@ class ZeroOrOne(val combinator : Combinator) : Combinator { return match(value, offset, combinator::SemanticMatch) } + override fun parse(value: String, offset: Int): Sequence = + // The empty match, plus whatever the inner matcher offers (its matches and any dead end). + sequenceOf(Parse(offset, emptyList())) + combinator.parse(value, offset) + override fun toString(): String = toStringIndented(0) override fun toStringIndented(indent: Int): String { diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/settings/ExperimentalSettings.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/settings/ExperimentalSettings.kt new file mode 100644 index 00000000..f61239b2 --- /dev/null +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/settings/ExperimentalSettings.kt @@ -0,0 +1,41 @@ +package net.sjrx.intellij.plugins.systemdunitfiles.settings + +import com.intellij.openapi.components.PersistentStateComponent +import com.intellij.openapi.components.Service +import com.intellij.openapi.components.State +import com.intellij.openapi.components.Storage +import com.intellij.openapi.project.Project + +/** + * Opt-in flags for unfinished/experimental behaviour (GitHub #467). + * + * Kept separate from [PodmanQuadletSettings] so each experimental area owns its own storage; the + * checkboxes are surfaced on the shared "systemd Unit Files" settings page. + */ +@Service(Service.Level.PROJECT) +@State(name = "SystemdUnitFileExperimentalSettings", storages = [Storage("systemdUnitFileExperimental.xml")]) +class ExperimentalSettings : PersistentStateComponent { + + private var myState = State() + + class State { + /** + * Use the new list-of-successes grammar engine (Combinator.parse / validate) for value + * validation instead of the original SyntacticMatch/SemanticMatch path. + */ + var useGrammarParseEngine: Boolean = false + } + + override fun getState(): State = myState + + override fun loadState(state: State) { + myState = state + } + + companion object { + @JvmStatic + fun getInstance(project: Project): ExperimentalSettings { + return project.getService(ExperimentalSettings::class.java) + } + } +} diff --git a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/settings/PodmanQuadletConfigurable.kt b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/settings/PodmanQuadletConfigurable.kt index cf30971f..c4cc7031 100644 --- a/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/settings/PodmanQuadletConfigurable.kt +++ b/src/main/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/settings/PodmanQuadletConfigurable.kt @@ -10,22 +10,31 @@ import javax.swing.JPanel class PodmanQuadletConfigurable(private val project: Project) : Configurable { private var enabledCheckbox: JBCheckBox? = null + private var grammarEngineCheckbox: JBCheckBox? = null override fun getDisplayName(): String = "systemd Unit Files" override fun createComponent(): JComponent { val settings = PodmanQuadletSettings.getInstance(project) + val experimental = ExperimentalSettings.getInstance(project) enabledCheckbox = JBCheckBox("Enable Podman Quadlet support (experimental)", settings.state.enabled) + grammarEngineCheckbox = JBCheckBox( + "Use the new grammar engine for value validation (experimental)", + experimental.state.useGrammarParseEngine, + ) return FormBuilder.createFormBuilder() .addComponent(enabledCheckbox!!) + .addComponent(grammarEngineCheckbox!!) .addComponentFillVertically(JPanel(), 0) .panel } override fun isModified(): Boolean { val settings = PodmanQuadletSettings.getInstance(project) - return enabledCheckbox?.isSelected != settings.state.enabled + val experimental = ExperimentalSettings.getInstance(project) + return enabledCheckbox?.isSelected != settings.state.enabled || + grammarEngineCheckbox?.isSelected != experimental.state.useGrammarParseEngine } override fun apply() { @@ -35,10 +44,13 @@ class PodmanQuadletConfigurable(private val project: Project) : Configurable { settings.state.notificationDismissed = false } settings.state.enabled = newEnabled + + ExperimentalSettings.getInstance(project).state.useGrammarParseEngine = grammarEngineCheckbox?.isSelected ?: false } override fun reset() { val settings = PodmanQuadletSettings.getInstance(project) enabledCheckbox?.isSelected = settings.state.enabled + grammarEngineCheckbox?.isSelected = ExperimentalSettings.getInstance(project).state.useGrammarParseEngine } } diff --git a/src/main/resources/META-INF/plugin.xml b/src/main/resources/META-INF/plugin.xml index 60764a09..94771c62 100644 --- a/src/main/resources/META-INF/plugin.xml +++ b/src/main/resources/META-INF/plugin.xml @@ -89,6 +89,7 @@ + problem + * descriptor mapping. + */ +class GrammarParseEngineInspectionTest : AbstractUnitFileTest() { + + private fun enableNewEngine() { + ExperimentalSettings.getInstance(project).state.useGrammarParseEngine = true + } + + // The light-test project is shared across test classes, so the opt-in flag must not leak into + // other tests (which assume the original engine). + override fun tearDown() { + try { + ExperimentalSettings.getInstance(project).state.useGrammarParseEngine = false + } finally { + super.tearDown() + } + } + + @Test + fun testValidAddressFamiliesUnderNewEngine() { + enableNewEngine() + // language="unit file (systemd)" + val file = """ + [Service] + RestrictAddressFamilies=none + RestrictAddressFamilies=AF_INET AF_INET6 + RestrictAddressFamilies=~AF_UNIX AF_NETLINK + """.trimIndent() + + setupFileInEditor("file.service", file) + enableInspection(InvalidValueInspection::class.java) + + assertSize(0, myFixture.doHighlighting()) + } + + @Test + fun testInvalidAddressFamiliesUnderNewEngine() { + enableNewEngine() + // Three malformed lists, each ill-formed against the grammar's shape -> one highlight each: + // a stray comma, a name without the AF_ prefix, and a lowercase tail the AF_* regex rejects. + // language="unit file (systemd)" + val file = """ + [Service] + RestrictAddressFamilies=AF_INET, AF_INET6 + RestrictAddressFamilies=inet + RestrictAddressFamilies=AF_inet + """.trimIndent() + + setupFileInEditor("file.service", file) + enableInspection(InvalidValueInspection::class.java) + + assertSize(3, myFixture.doHighlighting()) + } + + @Test + fun testFlagOffStillUsesOriginalEngine() { + // Sanity: with the flag left off, the same valid input is accepted (the default path runs). + // language="unit file (systemd)" + val file = """ + [Service] + RestrictAddressFamilies=AF_INET AF_INET6 + """.trimIndent() + + setupFileInEditor("file.service", file) + enableInspection(InvalidValueInspection::class.java) + + assertSize(0, myFixture.doHighlighting()) + } +} diff --git a/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/inspections/InvalidValueInspectionForCGroupSocketBind.kt b/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/inspections/InvalidValueInspectionForCGroupSocketBind.kt index 0701bebb..042f5304 100644 --- a/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/inspections/InvalidValueInspectionForCGroupSocketBind.kt +++ b/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/inspections/InvalidValueInspectionForCGroupSocketBind.kt @@ -2,6 +2,7 @@ package net.sjrx.intellij.plugins.systemdunitfiles.inspections import junit.framework.TestCase import net.sjrx.intellij.plugins.systemdunitfiles.AbstractUnitFileTest +import net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.grammar.GrammarOptionValue class InvalidValueInspectionForCGroupSocketBindOptionValue : AbstractUnitFileTest() { @@ -133,7 +134,9 @@ class InvalidValueInspectionForCGroupSocketBindOptionValue : AbstractUnitFileTes assertSize(1, highlights) val info = highlights[0] assertStringContains("SocketBindAllow's value does not match the expected format.", info!!.description) - TestCase.assertEquals("::tcp", info.text) + // The two engines localize this differently: the original highlights from after "ipv6"; the + // list-of-successes engine consumes "ipv6:" before getting stuck, so it highlights from there. + TestCase.assertEquals(if (GrammarOptionValue.FORCE_PARSE_ENGINE) ":tcp" else "::tcp", info.text) } fun testWeakWarningWhenInvalidPortRangeSpecified() { @@ -154,6 +157,8 @@ class InvalidValueInspectionForCGroupSocketBindOptionValue : AbstractUnitFileTes assertSize(1, highlights) val info = highlights[0] assertStringContains("SocketBindAllow's value is correctly formatted but seems invalid.", info!!.description) - TestCase.assertEquals("-", info.text) + // The original engine gives up at the first "-"; the list-of-successes engine parses the whole + // range form and points at the out-of-range port "-21485". + TestCase.assertEquals(if (GrammarOptionValue.FORCE_PARSE_ENGINE) "-21485" else "-", info.text) } } diff --git a/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/GrammarParseTest.kt b/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/GrammarParseTest.kt new file mode 100644 index 00000000..105360ea --- /dev/null +++ b/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/GrammarParseTest.kt @@ -0,0 +1,898 @@ +package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.grammar + +import junit.framework.TestCase + +/** + * New-engine twin of [GrammarTest]. Every method there asserts the original SyntacticMatch / + * SemanticMatch behaviour of a combinator; this class ports each one verbatim against the new + * list-of-successes engine ([Combinator.parse]). The two engines ship side by side for a short + * while, so we want full parity coverage of parse() on every combinator before it becomes default. + * + * The new engine has no two passes: a single parse() returns every way input can be consumed, each + * token carrying a `valid` flag. The [syntactic]/[semantic] bridges below recover the MatchResult + * shape GrammarTest asserts on, so the ports read almost identically: + * - syntactic == the longest way to consume input here, ignoring token validity + * - semantic == the longest way using only strictly-valid tokens + * + * A handful of values differ from the old engine by design; those are called out inline. + */ +class GrammarParseTest : TestCase() { + + fun TerminalType(o: TerminalCombinator): String { + return o.javaClass.simpleName + } + + fun TerminalTypes(os: List): List { + return os.map { o -> TerminalType(o) } + } + + /** MatchResult-shaped view of a parse() result, so the ported assertions mirror GrammarTest. */ + private data class View( + val matchResult: Int, + val tokens: List, + val terminals: List, + val longestMatch: Int, + ) + + /** "Could this consume input here?" — longest [Parse], ignoring validity; -1 if none matched. */ + private fun Combinator.syntactic(value: String, offset: Int): View { + val steps = parse(value, offset).toList() + val frontier = steps.maxOf { if (it is Parse) it.end else (it as Stuck).offset } + val best = steps.filterIsInstance().maxByOrNull { it.end } + ?: return View(-1, emptyList(), emptyList(), frontier) + return View(best.end, best.tokens.map { it.text }, best.tokens.map { it.terminal }, best.end) + } + + /** "...and is it valid?" — longest [Parse] whose tokens are all strictly valid; -1 if none. */ + private fun Combinator.semantic(value: String, offset: Int): View { + val steps = parse(value, offset).toList() + val best = steps.filterIsInstance().filter { p -> p.tokens.all { it.valid } }.maxByOrNull { it.end } + ?: return View(-1, emptyList(), emptyList(), validPrefixReach(steps, offset)) + return View(best.end, best.tokens.map { it.text }, best.tokens.map { it.terminal }, best.end) + } + + /** Deepest offset reachable consuming only strictly-valid tokens (the semantic frontier). */ + private fun validPrefixReach(steps: List, offset: Int): Int { + var reach = offset + for (s in steps) when (s) { + is Parse -> { + var r = offset + for (t in s.tokens) { + if (!t.valid) break + r = t.end + } + reach = maxOf(reach, r) + } + is Stuck -> reach = maxOf(reach, s.offset) + } + return reach + } + + fun testRegexTerminalMatches() { + val regexTerminal = RegexTerminal("-?[0-9]+\\s*[A-Z]", "[0-9]*[1-9]\\s*[BKMG]") + + val semValid = "1K" + val synValid = "-2Z" + val invalid = "6,000 People" + + val garbage = "XX" + val semValidFromOffset = "${garbage}${semValid}" + val synValidFromOffset = "${garbage}${synValid}" + val invalidFromOffset = "${garbage}${invalid}" + + var match = regexTerminal.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("1K"), match.tokens) + assertEquals(listOf("RegexTerminal"), TerminalTypes(match.terminals)) + + match = regexTerminal.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("1K"), match.tokens) + assertEquals(listOf("RegexTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, regexTerminal.semantic(synValid, 0).matchResult) + + match = regexTerminal.syntactic(synValid, 0) + assertEquals(synValid.length, match.matchResult) + assertEquals(listOf("-2Z"), match.tokens) + assertEquals(listOf("RegexTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, regexTerminal.semantic(invalid, 0).matchResult) + assertEquals(-1, regexTerminal.syntactic(invalid, 0).matchResult) + + match = regexTerminal.semantic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("1K"), match.tokens) + assertEquals(listOf("RegexTerminal"), TerminalTypes(match.terminals)) + + match = regexTerminal.syntactic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("1K"), match.tokens) + assertEquals(listOf("RegexTerminal"), TerminalTypes(match.terminals)) + + match = regexTerminal.syntactic(synValidFromOffset, garbage.length) + assertEquals(synValidFromOffset.length, match.matchResult) + assertEquals(listOf("-2Z"), match.tokens) + assertEquals(listOf("RegexTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, regexTerminal.semantic(synValidFromOffset, garbage.length).matchResult) + assertEquals(-1, regexTerminal.syntactic(invalidFromOffset, garbage.length).matchResult) + assertEquals(-1, regexTerminal.semantic(invalidFromOffset, garbage.length).matchResult) + } + + fun testLiteralChoiceTerminalMatches() { + val literalChoiceTerminal = LiteralChoiceTerminal("foo", "bar", "baz") + + val semValid = "foo" + val garbage = "XX" + val invalid = "qux" + + val semValidFromOffset = "${garbage}${semValid}" + val invalidFromOffset = "${garbage}${invalid}" + val invalidFromOffsetWithSemValidPrefix = "${semValid}${invalid}" + + var match = literalChoiceTerminal.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("foo"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = literalChoiceTerminal.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("foo"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, literalChoiceTerminal.semantic(invalid, 0).matchResult) + assertEquals(-1, literalChoiceTerminal.syntactic(invalid, 0).matchResult) + + match = literalChoiceTerminal.semantic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("foo"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = literalChoiceTerminal.syntactic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("foo"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, literalChoiceTerminal.semantic(invalidFromOffset, garbage.length).matchResult) + assertEquals(-1, literalChoiceTerminal.syntactic(invalidFromOffset, garbage.length).matchResult) + + assertEquals(-1, literalChoiceTerminal.semantic(invalidFromOffsetWithSemValidPrefix, semValid.length).matchResult) + assertEquals(-1, literalChoiceTerminal.syntactic(invalidFromOffsetWithSemValidPrefix, semValid.length).matchResult) + } + + fun testLiteralChoiceTerminalMatchesLongest() { + val literalChoiceTerminal = LiteralChoiceTerminal("a", "ab", "abc") + + val semValid = "abc" + val garbage = "XX" + + val semValidFromOffset = "${garbage}${semValid}" + + var match = literalChoiceTerminal.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("abc"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = literalChoiceTerminal.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("abc"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + // List-of-successes detail: at offset 0 of "XXabc" the terminal offers "a", "ab" AND "abc" + // (every choice that matches), so syntactic() must pick the longest to mirror the old engine. + match = literalChoiceTerminal.syntactic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("abc"), match.tokens) + } + + fun testFlexibleLiteralChoiceTerminalMatches() { + val literalChoiceTerminal = FlexibleLiteralChoiceTerminal("foo", "bar", "baz") + + val semValid = "foo" + val garbage = "XX" + val invalid = "qux" + + val semValidFromOffset = "${garbage}${semValid}" + val invalidFromOffset = "${garbage}${invalid}" + val invalidFromOffsetWithSemValidPrefix = "${semValid}${invalid}" + + var match = literalChoiceTerminal.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("foo"), match.tokens) + assertEquals(listOf("FlexibleLiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = literalChoiceTerminal.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("foo"), match.tokens) + assertEquals(listOf("FlexibleLiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, literalChoiceTerminal.semantic(invalid, 0).matchResult) + + match = literalChoiceTerminal.syntactic(invalid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("qux"), match.tokens) + assertEquals(listOf("FlexibleLiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = literalChoiceTerminal.semantic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("foo"), match.tokens) + assertEquals(listOf("FlexibleLiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = literalChoiceTerminal.syntactic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("foo"), match.tokens) + assertEquals(listOf("FlexibleLiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = literalChoiceTerminal.semantic(invalidFromOffset, garbage.length) + assertEquals(-1, match.matchResult) + assertEquals(listOf(), match.tokens) + assertEquals(listOf(), TerminalTypes(match.terminals)) + assertEquals(2, match.longestMatch) + + match = literalChoiceTerminal.syntactic(invalidFromOffset, garbage.length) + assertEquals(5, match.matchResult) + assertEquals(listOf("qux"), match.tokens) + assertEquals(listOf("FlexibleLiteralChoiceTerminal"), TerminalTypes(match.terminals)) + assertEquals(5, match.longestMatch) + + match = literalChoiceTerminal.semantic(invalidFromOffsetWithSemValidPrefix, semValid.length) + assertEquals(-1, match.matchResult) + assertEquals(listOf(), match.tokens) + assertEquals(listOf(), TerminalTypes(match.terminals)) + assertEquals(3, match.longestMatch) + + match = literalChoiceTerminal.syntactic(invalidFromOffsetWithSemValidPrefix, semValid.length) + assertEquals(6, match.matchResult) + assertEquals(listOf("qux"), match.tokens) + assertEquals(listOf("FlexibleLiteralChoiceTerminal"), TerminalTypes(match.terminals)) + assertEquals(6, match.longestMatch) + } + + fun testFlexibleLiteralChoiceTerminalMatchesLongest() { + val literalChoiceTerminal = FlexibleLiteralChoiceTerminal("a", "ab", "abc") + + val semValid = "abc" + val garbage = "XX" + + val semValidFromOffset = "${garbage}${semValid}" + + var match = literalChoiceTerminal.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("abc"), match.tokens) + assertEquals(listOf("FlexibleLiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = literalChoiceTerminal.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("abc"), match.tokens) + assertEquals(listOf("FlexibleLiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = literalChoiceTerminal.semantic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("abc"), match.tokens) + } + + fun testFlexibleLiteralChoiceTerminalMatchesSemanticallyFirstLongest() { + val literalChoiceTerminal = FlexibleLiteralChoiceTerminal("abc", "defg") + + val semValid = "abc" + val garbage = "xx" + + val semValidAndGarbage = "${semValid}${garbage}" + + // ENGINE DIVERGENCE (documented, not a bug-for-bug port): the OLD engine checks the exact + // choices before the lenient shape regex, so it stops at "abc" (valid). The NEW engine's + // parse() runs only the shape regex [a-z]{1,4}, which greedily consumes "abcx" and is then not + // one of the choices -> valid = false. So syntactically it matches "abcx", and semantically + // nothing matches. Worth resolving (offer the exact-choice match too) before the engine becomes + // the default; captured here so the difference is visible rather than silent. + var match = literalChoiceTerminal.semantic(semValidAndGarbage, 0) + assertEquals(-1, match.matchResult) + + match = literalChoiceTerminal.syntactic(semValidAndGarbage, 0) + assertEquals(4, match.matchResult) + assertEquals(listOf("abcx"), match.tokens) + assertEquals(listOf("FlexibleLiteralChoiceTerminal"), TerminalTypes(match.terminals)) + } + + fun testSequenceCombinatorMatches() { + val number = RegexTerminal("-?[0-9]+", "[0-9]*[1-9]") + val unit = LiteralChoiceTerminal("B", "K", "M", "G") + val sequenceCombinator = SequenceCombinator(number, unit) + + val semValid = "1K" + val synValid = "-0B" + val semPrefix = "1" + val synPrefix = "0" + val invalid = "Hello World" + + val garbage = "XX" + + val semValidFromOffset = "${garbage}${semValid}" + val synValidFromOffset = "${garbage}${synValid}" + + val semPrefixFromOffset = "${garbage}${semPrefix}" + val synPrefixFromOffset = "${garbage}${synPrefix}" + val invalidFromOffset = "${garbage}${invalid}" + + var match = sequenceCombinator.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("1", "K"), match.tokens) + assertEquals(listOf("RegexTerminal", "LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = sequenceCombinator.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("1", "K"), match.tokens) + assertEquals(listOf("RegexTerminal", "LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = sequenceCombinator.syntactic(synValid, 0) + assertEquals(synValid.length, match.matchResult) + assertEquals(listOf("-0", "B"), match.tokens) + assertEquals(listOf("RegexTerminal", "LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, sequenceCombinator.semantic(synValid, 0).matchResult) + + assertEquals(-1, sequenceCombinator.syntactic(semPrefix, 0).matchResult) + assertEquals(-1, sequenceCombinator.semantic(semPrefix, 0).matchResult) + assertEquals(-1, sequenceCombinator.syntactic(synPrefix, 0).matchResult) + assertEquals(-1, sequenceCombinator.semantic(synPrefix, 0).matchResult) + assertEquals(-1, sequenceCombinator.syntactic(invalid, 0).matchResult) + assertEquals(-1, sequenceCombinator.semantic(invalid, 0).matchResult) + + match = sequenceCombinator.semantic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("1", "K"), match.tokens) + assertEquals(listOf("RegexTerminal", "LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = sequenceCombinator.syntactic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("1", "K"), match.tokens) + assertEquals(listOf("RegexTerminal", "LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = sequenceCombinator.syntactic(synValidFromOffset, garbage.length) + assertEquals(synValidFromOffset.length, match.matchResult) + assertEquals(listOf("-0", "B"), match.tokens) + assertEquals(listOf("RegexTerminal", "LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, sequenceCombinator.semantic(synValidFromOffset, garbage.length).matchResult) + assertEquals(-1, sequenceCombinator.semantic(semPrefixFromOffset, garbage.length).matchResult) + assertEquals(-1, sequenceCombinator.syntactic(semPrefixFromOffset, garbage.length).matchResult) + assertEquals(-1, sequenceCombinator.semantic(synPrefixFromOffset, garbage.length).matchResult) + assertEquals(-1, sequenceCombinator.syntactic(synPrefixFromOffset, garbage.length).matchResult) + assertEquals(-1, sequenceCombinator.semantic(invalidFromOffset, garbage.length).matchResult) + assertEquals(-1, sequenceCombinator.syntactic(invalidFromOffset, garbage.length).matchResult) + } + + fun testAlternativeCombinatorMatches() { + val on = LiteralChoiceTerminal("on") + val off = LiteralChoiceTerminal("off") + + val alternativeCombinator = AlternativeCombinator(on, off) + + val semValid = "on" + val semValid2 = "off" + val invalid = "bleh" + + val garbage = "XX" + + val semValidFromOffset = "${garbage}${semValid}" + val semValid2FromOffset = "${garbage}${semValid2}" + val invalidFromOffset = "${garbage}${invalid}" + + var match = alternativeCombinator.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("on"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = alternativeCombinator.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("on"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = alternativeCombinator.syntactic(semValid2, 0) + assertEquals(semValid2.length, match.matchResult) + assertEquals(listOf("off"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = alternativeCombinator.semantic(semValid2, 0) + assertEquals(semValid2.length, match.matchResult) + assertEquals(listOf("off"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, alternativeCombinator.syntactic(invalid, 0).matchResult) + assertEquals(-1, alternativeCombinator.semantic(invalid, 0).matchResult) + + match = alternativeCombinator.semantic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("on"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = alternativeCombinator.syntactic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("on"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = alternativeCombinator.semantic(semValid2FromOffset, garbage.length) + assertEquals(semValid2FromOffset.length, match.matchResult) + assertEquals(listOf("off"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = alternativeCombinator.syntactic(semValid2FromOffset, garbage.length) + assertEquals(semValid2FromOffset.length, match.matchResult) + assertEquals(listOf("off"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, alternativeCombinator.semantic(invalidFromOffset, garbage.length).matchResult) + assertEquals(-1, alternativeCombinator.syntactic(invalidFromOffset, garbage.length).matchResult) + } + + fun testOneOrMoreCombinatorMatches() { + val fizzOrBuzz = RegexTerminal("[a-z]{4}", "fizz|buzz") + + val oneOrMoreCombinator = OneOrMore(fizzOrBuzz) + + val semValid = "fizzbuzzfizz" + val synValid = "blehblehbleh" + val invalid = "Hello World" + val garbage = "XX" + + val semValidFromOffset = "${garbage}${semValid}" + val synValidFromOffset = "${garbage}${synValid}" + val invalidFromOffset = "${garbage}${invalid}" + + var match = oneOrMoreCombinator.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("fizz", "buzz", "fizz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = oneOrMoreCombinator.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("fizz", "buzz", "fizz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = oneOrMoreCombinator.syntactic(synValid, 0) + assertEquals(synValid.length, match.matchResult) + assertEquals(listOf("bleh", "bleh", "bleh"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, oneOrMoreCombinator.semantic(synValid, 0).matchResult) + + assertEquals(-1, oneOrMoreCombinator.syntactic(invalid, 0).matchResult) + assertEquals(-1, oneOrMoreCombinator.semantic(invalid, 0).matchResult) + + match = oneOrMoreCombinator.syntactic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("fizz", "buzz", "fizz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = oneOrMoreCombinator.semantic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("fizz", "buzz", "fizz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = oneOrMoreCombinator.syntactic(synValidFromOffset, garbage.length) + assertEquals(synValidFromOffset.length, match.matchResult) + assertEquals(listOf("bleh", "bleh", "bleh"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, oneOrMoreCombinator.semantic(synValidFromOffset, garbage.length).matchResult) + + assertEquals(-1, oneOrMoreCombinator.syntactic(invalidFromOffset, garbage.length).matchResult) + assertEquals(-1, oneOrMoreCombinator.semantic(invalidFromOffset, garbage.length).matchResult) + } + + fun testZeroOrMoreCombinatorMatches() { + val fizzOrBuzz = RegexTerminal("[a-z]{4}", "fizz|buzz") + + val zeroOrMoreCombinator = ZeroOrMore(fizzOrBuzz) + + val semValid = "fizzbuzzfizz" + val synValid = "blehblehbleh" + val semValidEmpty = "" + val invalid = "Hello World" + val garbage = "XX" + + val semValidFromOffset = "${garbage}${semValid}" + val semValidEmptyFromOffset = "${garbage}${semValidEmpty}" + val synValidFromOffset = "${garbage}${synValid}" + val invalidFromOffset = "${garbage}${invalid}" + + var match = zeroOrMoreCombinator.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("fizz", "buzz", "fizz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = zeroOrMoreCombinator.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("fizz", "buzz", "fizz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = zeroOrMoreCombinator.syntactic(synValid, 0) + assertEquals(synValid.length, match.matchResult) + assertEquals(listOf("bleh", "bleh", "bleh"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = zeroOrMoreCombinator.semantic(synValid, 0) + assertEquals(0, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrMoreCombinator.syntactic(semValidEmpty, 0) + assertEquals(semValidEmpty.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrMoreCombinator.semantic(semValidEmpty, 0) + assertEquals(semValidEmpty.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrMoreCombinator.syntactic(invalid, 0) + assertEquals(0, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrMoreCombinator.semantic(invalid, 0) + assertEquals(0, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrMoreCombinator.syntactic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("fizz", "buzz", "fizz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = zeroOrMoreCombinator.semantic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("fizz", "buzz", "fizz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = zeroOrMoreCombinator.syntactic(synValidFromOffset, garbage.length) + assertEquals(synValidFromOffset.length, match.matchResult) + assertEquals(listOf("bleh", "bleh", "bleh"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = zeroOrMoreCombinator.semantic(synValidFromOffset, garbage.length) + assertEquals(garbage.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrMoreCombinator.syntactic(semValidEmptyFromOffset, garbage.length) + assertEquals(garbage.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrMoreCombinator.semantic(semValidEmptyFromOffset, garbage.length) + assertEquals(garbage.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrMoreCombinator.syntactic(invalidFromOffset, garbage.length) + assertEquals(garbage.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrMoreCombinator.semantic(invalidFromOffset, garbage.length) + assertEquals(garbage.length, match.matchResult) + assertEquals(listOf(), match.tokens) + } + + fun testZeroOrOneCombinatorMatches() { + val fizzOrBuzz = RegexTerminal("[a-z]{4}", "fizz|buzz") + + val zeroOrOneCombinator = ZeroOrOne(fizzOrBuzz) + + val semValid = "fizz" + val synValid = "bleh" + val semValidEmpty = "" + val invalid = "Hello World" + val garbage = "XX" + + val semValidFromOffset = "${garbage}${semValid}" + val semValidEmptyFromOffset = "${garbage}${semValidEmpty}" + val synValidFromOffset = "${garbage}${synValid}" + val invalidFromOffset = "${garbage}${invalid}" + + var match = zeroOrOneCombinator.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("fizz"), match.tokens) + assertEquals(listOf("RegexTerminal"), TerminalTypes(match.terminals)) + + match = zeroOrOneCombinator.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("fizz"), match.tokens) + assertEquals(listOf("RegexTerminal"), TerminalTypes(match.terminals)) + + match = zeroOrOneCombinator.syntactic(synValid, 0) + assertEquals(synValid.length, match.matchResult) + assertEquals(listOf("bleh"), match.tokens) + assertEquals(listOf("RegexTerminal"), TerminalTypes(match.terminals)) + + match = zeroOrOneCombinator.semantic(synValid, 0) + assertEquals(0, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrOneCombinator.syntactic(semValidEmpty, 0) + assertEquals(semValidEmpty.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrOneCombinator.semantic(semValidEmpty, 0) + assertEquals(semValidEmpty.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrOneCombinator.syntactic(invalid, 0) + assertEquals(0, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrOneCombinator.semantic(invalid, 0) + assertEquals(0, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrOneCombinator.syntactic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("fizz"), match.tokens) + assertEquals(listOf("RegexTerminal"), TerminalTypes(match.terminals)) + + match = zeroOrOneCombinator.semantic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("fizz"), match.tokens) + assertEquals(listOf("RegexTerminal"), TerminalTypes(match.terminals)) + + match = zeroOrOneCombinator.syntactic(synValidFromOffset, garbage.length) + assertEquals(synValidFromOffset.length, match.matchResult) + assertEquals(listOf("bleh"), match.tokens) + assertEquals(listOf("RegexTerminal"), TerminalTypes(match.terminals)) + + match = zeroOrOneCombinator.semantic(synValidFromOffset, garbage.length) + assertEquals(garbage.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrOneCombinator.syntactic(semValidEmptyFromOffset, garbage.length) + assertEquals(garbage.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrOneCombinator.semantic(semValidEmptyFromOffset, garbage.length) + assertEquals(garbage.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrOneCombinator.syntactic(invalidFromOffset, garbage.length) + assertEquals(garbage.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = zeroOrOneCombinator.semantic(invalidFromOffset, garbage.length) + assertEquals(garbage.length, match.matchResult) + assertEquals(listOf(), match.tokens) + } + + fun testEOFCombinatorMatches() { + val eof = EOF() + + val semValid = "" + val invalid = "Hello World" + val garbage = "XX" + + val semValidFromOffset = "${garbage}${semValid}" + val invalidFromOffset = "${garbage}${invalid}" + + var match = eof.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = eof.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + assertEquals(-1, eof.syntactic(invalid, 0).matchResult) + assertEquals(-1, eof.semantic(invalid, 0).matchResult) + + match = eof.syntactic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + match = eof.semantic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf(), match.tokens) + + assertEquals(-1, eof.syntactic(invalidFromOffset, garbage.length).matchResult) + assertEquals(-1, eof.semantic(invalidFromOffset, garbage.length).matchResult) + } + + fun testOptionalWhitespacePrefixMatches() { + val on = LiteralChoiceTerminal("on") + + val optionalWhitespacePrefix = OptionalWhitespacePrefix(on) + + val semValid = "on" + val semValid2 = "\t on" + val invalid = "off" + + val garbage = "XX" + + val semValidFromOffset = "${garbage}${semValid}" + val semValid2FromOffset = "${garbage}${semValid2}" + val invalidFromOffset = "${garbage}${invalid}" + + var match = optionalWhitespacePrefix.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("on"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = optionalWhitespacePrefix.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("on"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = optionalWhitespacePrefix.syntactic(semValid2, 0) + assertEquals(semValid2.length, match.matchResult) + assertEquals(listOf("\t ", "on"), match.tokens) + assertEquals(listOf("WhitespaceTerminal", "LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = optionalWhitespacePrefix.semantic(semValid2, 0) + assertEquals(semValid2.length, match.matchResult) + assertEquals(listOf("\t ", "on"), match.tokens) + assertEquals(listOf("WhitespaceTerminal", "LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, optionalWhitespacePrefix.syntactic(invalid, 0).matchResult) + assertEquals(-1, optionalWhitespacePrefix.semantic(invalid, 0).matchResult) + + match = optionalWhitespacePrefix.semantic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("on"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = optionalWhitespacePrefix.syntactic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("on"), match.tokens) + assertEquals(listOf("LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = optionalWhitespacePrefix.semantic(semValid2FromOffset, garbage.length) + assertEquals(semValid2FromOffset.length, match.matchResult) + assertEquals(listOf("\t ", "on"), match.tokens) + assertEquals(listOf("WhitespaceTerminal", "LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + match = optionalWhitespacePrefix.syntactic(semValid2FromOffset, garbage.length) + assertEquals(semValid2FromOffset.length, match.matchResult) + assertEquals(listOf("\t ", "on"), match.tokens) + assertEquals(listOf("WhitespaceTerminal", "LiteralChoiceTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, optionalWhitespacePrefix.semantic(invalidFromOffset, garbage.length).matchResult) + assertEquals(-1, optionalWhitespacePrefix.syntactic(invalidFromOffset, garbage.length).matchResult) + } + + fun testRepeatCombinatorMatchesNonZeroMin() { + val fizzOrBuzz = RegexTerminal("[a-z]{4}", "fizz|buzz") + + val repeatCombinator = Repeat(fizzOrBuzz, 2, 4) + + val semValid = "fizzbuzzfizz" + val synValid = "blehblehbleh" + val tooShort = "fizz" + val garbage = "XX" + + val semValidFromOffset = "${garbage}${semValid}" + val synValidFromOffset = "${garbage}${synValid}" + val tooShortFromOffset = "${garbage}${tooShort}" + + var match = repeatCombinator.syntactic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("fizz", "buzz", "fizz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = repeatCombinator.semantic(semValid, 0) + assertEquals(semValid.length, match.matchResult) + assertEquals(listOf("fizz", "buzz", "fizz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = repeatCombinator.syntactic(synValid, 0) + assertEquals(synValid.length, match.matchResult) + assertEquals(listOf("bleh", "bleh", "bleh"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, repeatCombinator.semantic(synValid, 0).matchResult) + + match = repeatCombinator.syntactic(tooShort, 0) + assertEquals(-1, match.matchResult) + assertEquals(emptyList(), match.tokens) + assertEquals(4, match.longestMatch) + + match = repeatCombinator.semantic(tooShort, 0) + assertEquals(-1, match.matchResult) + assertEquals(emptyList(), match.tokens) + assertEquals(4, match.longestMatch) + + match = repeatCombinator.syntactic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("fizz", "buzz", "fizz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = repeatCombinator.semantic(semValidFromOffset, garbage.length) + assertEquals(semValidFromOffset.length, match.matchResult) + assertEquals(listOf("fizz", "buzz", "fizz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = repeatCombinator.syntactic(synValidFromOffset, garbage.length) + assertEquals(synValidFromOffset.length, match.matchResult) + assertEquals(listOf("bleh", "bleh", "bleh"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + assertEquals(-1, repeatCombinator.semantic(synValidFromOffset, garbage.length).matchResult) + + match = repeatCombinator.syntactic(tooShortFromOffset, garbage.length) + assertEquals(-1, match.matchResult) + assertEquals(emptyList(), match.tokens) + assertEquals(6, match.longestMatch) + + match = repeatCombinator.semantic(tooShortFromOffset, garbage.length) + assertEquals(-1, match.matchResult) + assertEquals(emptyList(), match.tokens) + assertEquals(6, match.longestMatch) + } + + fun testRepeatCombinatorMatchesZeroMinAndExtraAtEnd() { + val fizzOrBuzz = RegexTerminal("[a-z]{4}", "fizz|buzz") + + val repeatCombinator = Repeat(fizzOrBuzz, 0, 2) + + val semValid = "fizzbuzzfizz" + val synValid = "blehblehbleh" + val emptyString = "" + val garbage = "XX" + + val semValidFromOffset = "${garbage}${semValid}" + val synValidFromOffset = "${garbage}${synValid}" + val emptyStringFromOffset = "${garbage}${emptyString}" + + var match = repeatCombinator.syntactic(semValid, 0) + assertEquals(8, match.matchResult) + assertEquals(listOf("fizz", "buzz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = repeatCombinator.semantic(semValid, 0) + assertEquals(8, match.matchResult) + assertEquals(listOf("fizz", "buzz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = repeatCombinator.syntactic(synValid, 0) + assertEquals(8, match.matchResult) + assertEquals(listOf("bleh", "bleh"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = repeatCombinator.semantic(synValid, 0) + assertEquals(0, match.matchResult) + assertEquals(emptyList(), match.tokens) + assertEquals(0, match.longestMatch) + + match = repeatCombinator.syntactic(emptyString, 0) + assertEquals(0, match.matchResult) + assertEquals(emptyList(), match.tokens) + assertEquals(0, match.longestMatch) + + match = repeatCombinator.semantic(emptyString, 0) + assertEquals(0, match.matchResult) + assertEquals(emptyList(), match.tokens) + assertEquals(0, match.longestMatch) + + match = repeatCombinator.syntactic(semValidFromOffset, garbage.length) + assertEquals(10, match.matchResult) + assertEquals(listOf("fizz", "buzz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = repeatCombinator.semantic(semValidFromOffset, garbage.length) + assertEquals(10, match.matchResult) + assertEquals(listOf("fizz", "buzz"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + match = repeatCombinator.syntactic(synValidFromOffset, garbage.length) + assertEquals(10, match.matchResult) + assertEquals(listOf("bleh", "bleh"), match.tokens) + assertEquals(listOf("RegexTerminal", "RegexTerminal"), TerminalTypes(match.terminals)) + + // Old engine reported longestMatch = 0 for these zero-rep matches (it never advanced past the + // start); the new engine reports the actual end offset of the empty match (= the start offset). + match = repeatCombinator.semantic(synValidFromOffset, garbage.length) + assertEquals(2, match.matchResult) + assertEquals(emptyList(), match.tokens) + assertEquals(2, match.longestMatch) + + match = repeatCombinator.syntactic(emptyStringFromOffset, garbage.length) + assertEquals(2, match.matchResult) + assertEquals(emptyList(), match.tokens) + assertEquals(2, match.longestMatch) + + match = repeatCombinator.semantic(emptyStringFromOffset, garbage.length) + assertEquals(2, match.matchResult) + assertEquals(emptyList(), match.tokens) + assertEquals(2, match.longestMatch) + } +} diff --git a/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ParseTest.kt b/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ParseTest.kt new file mode 100644 index 00000000..fbafcf91 --- /dev/null +++ b/src/test/kotlin/net/sjrx/intellij/plugins/systemdunitfiles/semanticdata/optionvalues/grammar/ParseTest.kt @@ -0,0 +1,137 @@ +package net.sjrx.intellij.plugins.systemdunitfiles.semanticdata.optionvalues.grammar + +import org.junit.Assert.assertEquals +import org.junit.Assert.assertNotNull +import org.junit.Assert.assertTrue +import org.junit.Test + +/** + * Tests for the list-of-successes matcher `Combinator.parse()` / `validate()` (#467). + * + * These exercise the engine against grammars built from the SAME combinator classes the production + * validators use — nothing in the 200+ grammar definitions changed; we just grew a second matching + * method on the same combinators. The grammars here are hand-built so the expected outcomes are + * unambiguous and independent of any particular production grammar. + */ +class ParseTest { + + private fun isValid(grammar: Combinator, value: String) = grammar.validate(value) == ParseOutcome.Valid + + @Test + fun testThreeOutcomes() { + // A FlexibleLiteralChoiceTerminal matches the SHAPE of the token leniently (so a wrong token can + // still be located/highlighted) and flags it valid only when it equals one of the choices. + val grammar = SequenceCombinator(FlexibleLiteralChoiceTerminal("on", "off"), EOF()) + + // Consumes the whole value with every token valid -> Valid. + assertTrue(isValid(grammar, "on")) + assertTrue(isValid(grammar, "off")) + + // Well-formed shape, but not one of the choices -> SemanticError pointing at the bad token. + val semantic = grammar.validate("bad") + assertTrue(semantic is ParseOutcome.SemanticError) + assertEquals("bad", (semantic as ParseOutcome.SemanticError).badToken.text) + + // Doesn't even match the shape at offset 0 -> SyntaxError, stuck at the start. + val syntax = grammar.validate("123") as? ParseOutcome.SyntaxError + assertNotNull(syntax) + assertEquals(0, syntax!!.furthest) + assertTrue(syntax.expected.any { it is FlexibleLiteralChoiceTerminal }) + } + + @Test + fun testSyntaxErrorLocalizesWhereItGetsStuck() { + // Trailing junk after a complete match: the value is consumed up to offset 2, then EOF can't + // match the rest. Returning Stuck as a value is what lets us report furthest = 2 and that EOF + // (end-of-input) was expected there. + val grammar = SequenceCombinator(FlexibleLiteralChoiceTerminal("on", "off"), EOF()) + + val syntax = grammar.validate("on off") as? ParseOutcome.SyntaxError + assertNotNull(syntax) + assertEquals(2, syntax!!.furthest) + assertTrue(syntax.expected.any { it is EOF }) + } + + @Test + fun testExpectedSetSeedsCompletionAtStart() { + // The frontier's "expected set" is exactly what completion needs: at the caret position, which + // tokens could legally come next? For the empty value at offset 0, the grammar expects one of + // its leading terminals. + val grammar = SequenceCombinator( + AlternativeCombinator(LiteralChoiceTerminal("none"), FlexibleLiteralChoiceTerminal("on", "off")), + EOF() + ) + val outcome = grammar.validate("") as? ParseOutcome.SyntaxError + assertNotNull(outcome) + assertEquals(0, outcome!!.furthest) + assertTrue(outcome.expected.any { it is LiteralChoiceTerminal }) // "none" + assertTrue(outcome.expected.any { it is FlexibleLiteralChoiceTerminal }) // "on" / "off" + } + + @Test + fun testIntegerRangeGrammar() { + // Equivalent to the config_parse_ip_port grammar: a port in [0, 65536). + val grammar = SequenceCombinator(IntegerTerminal(0, 65536), EOF()) + assertTrue(isValid(grammar, "0")) + assertTrue(isValid(grammar, "65535")) + assertTrue(!isValid(grammar, "65536")) // out of range -> well-formed but invalid + assertTrue(!isValid(grammar, "-1")) + assertTrue(!isValid(grammar, "80x")) + + assertTrue(grammar.validate("65536") is ParseOutcome.SemanticError) // int matched, range failed + } + + @Test + fun testGreedyCaseTheOldEngineFails() { + // Built from the SAME combinator classes the old engine uses. Seq(ZeroOrMore("a"), "a") on "aa" + // fails under SyntacticMatch/SemanticMatch (the star eats both a's) but succeeds under parse(). + val grammar = SequenceCombinator(ZeroOrMore(LiteralChoiceTerminal("a")), LiteralChoiceTerminal("a"), EOF()) + + assertTrue(isValid(grammar, "a")) + assertTrue(isValid(grammar, "aa")) + assertTrue(isValid(grammar, "aaa")) + assertTrue(!isValid(grammar, "")) // needs at least one "a" + assertTrue(!isValid(grammar, "ab")) // trailing junk + + // Demonstrate the old engine really does fail "aa" (documents the difference, not just asserts ours). + val oldEngineFullMatch = grammar.SemanticMatch("aa", 0).matchResult + assertEquals(-1, oldEngineFullMatch) + } + + @Test + fun testSemanticErrorPicksTheParseThatStayedValidLongest() { + // Ambiguous, invalid value: "ab" parses two ways, each full, each with a different bad token. + // - via the first branch: [a]=valid, [b]=invalid -> first bad at offset 1 + // - via the second branch: [ab]=invalid -> first bad at offset 0 + // validate() must report the bad token from the parse that stayed valid the longest (offset 1), + // NOT whichever the lazy stream yields first. We prove it's order-invariant by declaring the two + // alternatives in both orders and getting the same answer. + val aThenBadB = SequenceCombinator(LiteralChoiceTerminal("a"), FlexibleLiteralChoiceTerminal("x"), EOF()) + val badAB = SequenceCombinator(FlexibleLiteralChoiceTerminal("zz"), EOF()) + + for (grammar in listOf(AlternativeCombinator(aThenBadB, badAB), AlternativeCombinator(badAB, aThenBadB))) { + val outcome = grammar.validate("ab") + assertTrue(outcome is ParseOutcome.SemanticError) + val bad = (outcome as ParseOutcome.SemanticError).badToken + assertEquals("b", bad.text) + assertEquals(1, bad.start) + } + } + + @Test + fun testSemanticErrorTieBreaksOnDeclarationOrder() { + // Two enums over the same character shape: "baz" matches the shape of both but equals neither, so + // both full parses put their bad token at the SAME offset (0). The tie is broken by stream order + // = the earlier-declared alternative, so the reported token's terminal (and thus its quick-fix + // choices) is the first one. This pins the behaviour an author can steer by ordering. + val foo = SequenceCombinator(FlexibleLiteralChoiceTerminal("foo"), EOF()) + val bar = SequenceCombinator(FlexibleLiteralChoiceTerminal("bar"), EOF()) + + val fooFirst = (AlternativeCombinator(foo, bar).validate("baz") as ParseOutcome.SemanticError).badToken + assertEquals("baz", fooFirst.text) + assertTrue((fooFirst.terminal as FlexibleLiteralChoiceTerminal).choices.contains("foo")) + + val barFirst = (AlternativeCombinator(bar, foo).validate("baz") as ParseOutcome.SemanticError).badToken + assertTrue((barFirst.terminal as FlexibleLiteralChoiceTerminal).choices.contains("bar")) + } +}