From af14bb31845e8eb7442a94efad356454576386af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jake=C5=A1?= Date: Fri, 24 Apr 2026 15:21:48 +0200 Subject: [PATCH 1/5] Add PHPUnit workflow that runs the mysql-on-sqlite suite against Doltlite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builds Doltlite from source, replaces the system libsqlite3 with it so pdo_sqlite resolves against the Doltlite library, and runs the mysql-on-sqlite PHPUnit suite without any filters. The libsqlite3 swap is done at /usr/local/lib and in the multiarch dir — LD_PRELOAD is unreliable because pdo_sqlite's DT_NEEDED binds the soname at link time. This commit is the scaffolding only. Doltlite has several deviations from stock SQLite that break the driver; the compatibility patches land in follow-up commits, each with its own verify assertion. --- .github/workflows/phpunit-tests-doltlite.yml | 103 +++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 .github/workflows/phpunit-tests-doltlite.yml diff --git a/.github/workflows/phpunit-tests-doltlite.yml b/.github/workflows/phpunit-tests-doltlite.yml new file mode 100644 index 00000000..ad245fd8 --- /dev/null +++ b/.github/workflows/phpunit-tests-doltlite.yml @@ -0,0 +1,103 @@ +name: PHPUnit Tests (Doltlite) + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +jobs: + test: + name: PHP ${{ matrix.php }} / Doltlite + runs-on: ubuntu-latest + timeout-minutes: 30 + + strategy: + fail-fast: false + matrix: + php: [ '8.3' ] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install Doltlite build dependencies + run: sudo apt-get update && sudo apt-get install -y build-essential zlib1g-dev tcl-dev + + - name: Check out Doltlite + uses: actions/checkout@v4 + with: + repository: dolthub/doltlite + path: doltlite-src + + - name: Build Doltlite shared library + run: | + mkdir -p build + cd build + # Same feature set we enable for stock-SQLite runs. PHP's pdo_sqlite + # pulls in sqlite3_column_table_name, which needs COLUMN_METADATA. + CFLAGS="-DSQLITE_ENABLE_COLUMN_METADATA -DSQLITE_ENABLE_FTS5 -DSQLITE_USE_URI -DSQLITE_ENABLE_JSON1" \ + ../configure + make -j"$(nproc)" doltlite-lib + ls -la libdoltlite* sqlite3.h + working-directory: doltlite-src + + - name: Set up PHP + uses: shivammathur/setup-php@v2 + with: + php-version: '${{ matrix.php }}' + tools: phpunit-polyfills + + - name: Install Doltlite as the system libsqlite3 + # Replace the system libsqlite3 the dynamic linker resolves for + # pdo_sqlite with the Doltlite build. This is more reliable than + # LD_PRELOAD, which loses to pdo_sqlite's DT_NEEDED on libsqlite3.so.0. + run: | + SRC="${GITHUB_WORKSPACE}/doltlite-src/build/libdoltlite.so" + test -f "$SRC" + + # Drop the build into /usr/local/lib and point libsqlite3 sonames at it, + # both in /usr/local/lib (takes precedence in ld.so.conf) and in the + # multiarch dir so anything hard-coded to that path still works. + sudo install -m 0755 "$SRC" /usr/local/lib/libdoltlite.so + + sudo ln -sf /usr/local/lib/libdoltlite.so /usr/local/lib/libsqlite3.so + sudo ln -sf /usr/local/lib/libdoltlite.so /usr/local/lib/libsqlite3.so.0 + + MULTIARCH_DIR="$(dirname "$(readlink -f /lib/x86_64-linux-gnu/libsqlite3.so.0)")" + sudo rm -f "${MULTIARCH_DIR}/libsqlite3.so.0" + sudo ln -s /usr/local/lib/libdoltlite.so "${MULTIARCH_DIR}/libsqlite3.so.0" + + sudo ldconfig + + echo "ldd pdo_sqlite:" + ldd "$(php -r 'echo ini_get("extension_dir");')/pdo_sqlite.so" + + - name: Verify Doltlite is active + run: | + VERSION=$(php -r 'echo (new PDO("sqlite::memory:"))->query("SELECT SQLITE_VERSION()")->fetch()[0];') + ENGINE=$(php -r 'echo (new PDO("sqlite::memory:"))->query("SELECT doltlite_engine()")->fetch()[0];') + echo "SQLITE_VERSION() = ${VERSION}" + echo "doltlite_engine() = ${ENGINE}" + if [ "${ENGINE}" != "prolly" ]; then + echo "::error::Doltlite is not active (expected doltlite_engine() = 'prolly', got '${ENGINE}')" + exit 1 + fi + + - name: Install Composer dependencies (root) + uses: ramsey/composer-install@v3 + with: + ignore-cache: "yes" + composer-options: "--optimize-autoloader" + + - name: Install Composer dependencies (mysql-on-sqlite) + uses: ramsey/composer-install@v3 + with: + working-directory: packages/mysql-on-sqlite + ignore-cache: "yes" + composer-options: "--optimize-autoloader" + + - name: Run PHPUnit tests against Doltlite + run: php ./vendor/bin/phpunit -c ./phpunit.xml.dist + working-directory: packages/mysql-on-sqlite From 60b4b76d8f075197fbb5e7f13aea994abf476aec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jake=C5=A1?= Date: Fri, 24 Apr 2026 15:22:01 +0200 Subject: [PATCH 2/5] Patch Doltlite to keep composite-PK tables as rowid tables Doltlite silently sets TF_WithoutRowid on every table declared with a primary key, so `SELECT rowid FROM t` fails with "no such column: rowid". The mysql-on-sqlite driver uses `ORDER BY ROWID` as a stable tie-break when reading its internal information_schema tables, so dropping rowid breaks info-schema lookups across the board. Patch by sed: replace the single `tabOpts |= TF_WithoutRowid;` line in src/build.c with a no-op, leaving table creation otherwise untouched. Verified with a PHP one-liner that a composite-PK table still returns a rowid. --- .github/workflows/phpunit-tests-doltlite.yml | 34 +++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/.github/workflows/phpunit-tests-doltlite.yml b/.github/workflows/phpunit-tests-doltlite.yml index ad245fd8..9cb3dbcb 100644 --- a/.github/workflows/phpunit-tests-doltlite.yml +++ b/.github/workflows/phpunit-tests-doltlite.yml @@ -31,6 +31,25 @@ jobs: repository: dolthub/doltlite path: doltlite-src + - name: Patch Doltlite for mysql-on-sqlite compatibility + # Narrow, verifiable source-level patches applied before build: + # + # 1) src/build.c — neutralize the auto-conversion of composite/TEXT-PK + # tables to WITHOUT ROWID, so the driver's `ORDER BY ROWID` + # tie-breaks in info-schema reads keep working. + run: | + BUILD=doltlite-src/src/build.c + + # --- Patch 1: disable WITHOUT ROWID auto-conversion --- + N=$(grep -cF ' tabOpts |= TF_WithoutRowid;' "$BUILD") + if [ "$N" != "1" ]; then + echo "::error::build.c patch: expected exactly 1 match, found $N"; exit 1 + fi + sed -i 's@^ tabOpts |= TF_WithoutRowid;$@ /* patched off in CI: do not auto-convert composite-PK tables */ (void)0;@' "$BUILD" + + echo "--- Patched lines ---" + grep -n 'patched off in CI' "$BUILD" + - name: Build Doltlite shared library run: | mkdir -p build @@ -74,7 +93,7 @@ jobs: echo "ldd pdo_sqlite:" ldd "$(php -r 'echo ini_get("extension_dir");')/pdo_sqlite.so" - - name: Verify Doltlite is active + - name: Verify Doltlite is active and patches took effect run: | VERSION=$(php -r 'echo (new PDO("sqlite::memory:"))->query("SELECT SQLITE_VERSION()")->fetch()[0];') ENGINE=$(php -r 'echo (new PDO("sqlite::memory:"))->query("SELECT doltlite_engine()")->fetch()[0];') @@ -84,6 +103,19 @@ jobs: echo "::error::Doltlite is not active (expected doltlite_engine() = 'prolly', got '${ENGINE}')" exit 1 fi + # 1) build.c patch: composite-PK tables keep rowid accessible + # instead of auto-converting to WITHOUT ROWID. + ROWID=$(php -r ' + $db = new PDO("sqlite::memory:"); + $db->exec("CREATE TABLE t (a INT, b INT, PRIMARY KEY(a, b))"); + $db->exec("INSERT INTO t VALUES (1, 2)"); + echo $db->query("SELECT rowid FROM t")->fetch()[0]; + ' 2>&1) + echo "composite-PK rowid = ${ROWID}" + if ! [[ "${ROWID}" =~ ^[0-9]+$ ]]; then + echo "::error::rowid-preservation patch did not take effect: ${ROWID}" + exit 1 + fi - name: Install Composer dependencies (root) uses: ramsey/composer-install@v3 From f98745a13779288f85bc689a92de10f0bc95bc97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jake=C5=A1?= Date: Fri, 24 Apr 2026 15:22:18 +0200 Subject: [PATCH 3/5] Patch Doltlite to preserve pIdxKey->eqSeen across mutmap lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit prollyBtCursorIndexMoveto's tree-scan loop sets pIdxKey->eqSeen when it finds an exact prefix match, and OP_SeekGE with BTREE_SEEK_EQ relies on that flag to decide whether the seek landed on an exact row. The subsequent findMatchingMutMapEntry call internally runs sqlite3VdbeRecordCompare against each candidate mutmap entry, and each probe resets eqSeen to 0 before the comparison — so after the lookup the flag is whatever the last mutmap probe left behind, not what the tree scan saw. The observable symptom: two sequential DELETEs on a composite NOCASE PK inside a SAVEPOINT. The first DELETE seeds the mutmap; the second goes through prollyBtCursorIndexMoveto, finds the matching tree row, sets eqSeen=1, then loses that flag when checking the mutmap. OP_SeekGE reads eqSeen=0, bails to seek_not_found, and the second DELETE silently does nothing. Patch saves eqSeen before the mutmap lookup and restores it when the lookup returns no override — checked-in as a .patch file and applied before build. --- .../eqSeen-preservation.patch | 37 +++++++++++++++++ .github/workflows/phpunit-tests-doltlite.yml | 41 ++++++++++++++++++- 2 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 .github/doltlite-patches/eqSeen-preservation.patch diff --git a/.github/doltlite-patches/eqSeen-preservation.patch b/.github/doltlite-patches/eqSeen-preservation.patch new file mode 100644 index 00000000..6a6781ec --- /dev/null +++ b/.github/doltlite-patches/eqSeen-preservation.patch @@ -0,0 +1,37 @@ +Preserve pIdxKey->eqSeen across findMatchingMutMapEntry + +findMatchingMutMapEntry's internal sqlite3VdbeRecordCompare probes reset +and then re-set pIdxKey->eqSeen on every iteration, clobbering the flag +that the tree-scan loop set to signal an exact prefix match. OP_SeekGE +with BTREE_SEEK_EQ relies on that eqSeen flag to decide whether an +exact match was found, so without this save/restore the second same- +table DELETE inside a SAVEPOINT silently bails to seek_not_found and +the DELETE is lost. + +--- a/src/prolly_btree.c ++++ b/src/prolly_btree.c +@@ -4445,6 +4445,14 @@ + || (pPending && pPending!=pCur->pMutMap + && !prollyMutMapIsEmpty(pPending))) + && !(treeFound && treeCmp==0) ){ ++ /* findMatchingMutMapEntry's internal sqlite3VdbeRecordCompare ++ ** calls reset and re-set pIdxKey->eqSeen on every probe, ++ ** clobbering the flag that the tree-scan loop above set. ++ ** OP_SeekGE with BTREE_SEEK_EQ relies on that eqSeen to detect ++ ** an exact prefix match; without save/restore, the second ++ ** same-table DELETE inside a SAVEPOINT silently bails out of ++ ** seek_not_found. */ ++ int savedEqSeen = pIdxKey->eqSeen; + rc = findMatchingMutMapEntry((ProllyMutMap*)pCur->pMutMap, + pCur->pKeyInfo, + pIdxKey, pSortKey, nSortKey, +@@ -4462,6 +4470,9 @@ + return rc; + } + } ++ if( !mutE ){ ++ pIdxKey->eqSeen = savedEqSeen; ++ } + if( mutE ){ + + const u8 *pMutVal = mutE->pVal; diff --git a/.github/workflows/phpunit-tests-doltlite.yml b/.github/workflows/phpunit-tests-doltlite.yml index 9cb3dbcb..28686c3b 100644 --- a/.github/workflows/phpunit-tests-doltlite.yml +++ b/.github/workflows/phpunit-tests-doltlite.yml @@ -37,8 +37,22 @@ jobs: # 1) src/build.c — neutralize the auto-conversion of composite/TEXT-PK # tables to WITHOUT ROWID, so the driver's `ORDER BY ROWID` # tie-breaks in info-schema reads keep working. + # + # 2) src/prolly_btree.c — fix a write-visibility bug in + # prollyBtCursorIndexMoveto. When a non-empty mutmap exists, + # the function calls findMatchingMutMapEntry which internally + # calls sqlite3VdbeRecordCompare with `pIdxKey->eqSeen = 0` + # on every comparison, clobbering the eqSeen flag that the + # tree scan loop above it set. OP_SeekGE with BTREE_SEEK_EQ + # relies on eqSeen to detect prefix matches, so the second + # same-table DELETE inside a SAVEPOINT lands on an exact + # match in the tree but SeekGE reads eqSeen=0 and bails to + # seek_not_found, skipping the actual delete. The patch + # saves eqSeen before findMatchingMutMapEntry and restores + # it when no mutmap entry overrides the tree result. run: | BUILD=doltlite-src/src/build.c + MOVETO=doltlite-src/src/prolly_btree.c # --- Patch 1: disable WITHOUT ROWID auto-conversion --- N=$(grep -cF ' tabOpts |= TF_WithoutRowid;' "$BUILD") @@ -47,8 +61,12 @@ jobs: fi sed -i 's@^ tabOpts |= TF_WithoutRowid;$@ /* patched off in CI: do not auto-convert composite-PK tables */ (void)0;@' "$BUILD" + # --- Patch 2: preserve pIdxKey->eqSeen across findMatchingMutMapEntry --- + patch -d doltlite-src -p1 --no-backup-if-mismatch \ + < .github/doltlite-patches/eqSeen-preservation.patch + echo "--- Patched lines ---" - grep -n 'patched off in CI' "$BUILD" + grep -n 'patched off in CI\|savedEqSeen' "$BUILD" "$MOVETO" - name: Build Doltlite shared library run: | @@ -116,6 +134,27 @@ jobs: echo "::error::rowid-preservation patch did not take effect: ${ROWID}" exit 1 fi + # 2) prolly_btree.c patch: two sequential DELETEs inside a + # SAVEPOINT on a composite NOCASE PK no longer silently lose + # the second DELETE. + REMAINING=$(php -r ' + $db = new PDO("sqlite::memory:"); + $db->exec("CREATE TABLE t (ts TEXT NOT NULL COLLATE NOCASE, tn TEXT NOT NULL COLLATE NOCASE, cn TEXT NOT NULL COLLATE NOCASE, PRIMARY KEY (ts, tn, cn))"); + $db->exec("INSERT INTO t VALUES (\"s\", \"t\", \"id\")"); + $db->exec("INSERT INTO t VALUES (\"s\", \"t\", \"alpha\")"); + $db->exec("INSERT INTO t VALUES (\"s\", \"t\", \"beta\")"); + $db->exec("SAVEPOINT sp"); + $db->exec("DELETE FROM t WHERE ts = \"s\" AND tn = \"t\" AND cn = \"alpha\""); + $db->exec("DELETE FROM t WHERE ts = \"s\" AND tn = \"t\" AND cn = \"beta\""); + $db->exec("RELEASE SAVEPOINT sp"); + $rows = $db->query("SELECT cn FROM t ORDER BY cn")->fetchAll(PDO::FETCH_COLUMN); + echo implode(",", $rows); + ' 2>&1) + echo "post-SAVEPOINT remaining rows = ${REMAINING}" + if [ "${REMAINING}" != "id" ]; then + echo "::error::eqSeen-preservation patch did not take effect: expected only 'id', got '${REMAINING}'" + exit 1 + fi - name: Install Composer dependencies (root) uses: ramsey/composer-install@v3 From e81b946e0c7e551a2dd5df638bcfb611a8837be4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jake=C5=A1?= Date: Fri, 24 Apr 2026 15:22:37 +0200 Subject: [PATCH 4/5] Patch Doltlite mergeScan to skip tree entries covered by a mutmap DELETE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit prollyBtCursorIndexMoveto can reposition the merge cursor directly at a mutmap INSERT entry via setCursorToMutMapEntryPhys, which jumps mmIdx past any mutmap entries that sort earlier. A subsequent mergeScan iteration that sees `cmp*dir < 0` — the tree entry is ahead of mutmap[mmIdx] — then emits the tree row without noticing that the mutmap holds a DELETE for that tree key at an earlier order index. The tree row is logically deleted but the scan walks over it anyway; SQLite eventually follows its rowid and hits the deleted slot, tripping the "database disk image is malformed" check in sqlite3VdbeFinishMoveto. Reproduces on an INSERT + DELETE + UPDATE sequence inside one transaction: the INSERT fills the mutmap, the DELETE adds a tombstone for an earlier row, the UPDATE's scan emits that earlier row from the tree, then crashes. Patch consults the mutmap before emitting a tree entry; if a DELETE exists for that key, skip and advance the tree cursor. No behavior change in the common case (no matching mutmap entry), constant- factor overhead otherwise. Checked in as a .patch file. --- .../mergeScan-check-tree-delete.patch | 61 +++++++++++++++++++ .github/workflows/phpunit-tests-doltlite.yml | 30 ++++++++- 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 .github/doltlite-patches/mergeScan-check-tree-delete.patch diff --git a/.github/doltlite-patches/mergeScan-check-tree-delete.patch b/.github/doltlite-patches/mergeScan-check-tree-delete.patch new file mode 100644 index 00000000..7706f9a7 --- /dev/null +++ b/.github/doltlite-patches/mergeScan-check-tree-delete.patch @@ -0,0 +1,61 @@ +Check mutmap for tree-key DELETE when emitting a tree entry in mergeScan + +prollyBtCursorIndexMoveto can position the merge cursor directly at a +mutmap INSERT entry via setCursorToMutMapEntryPhys, which jumps mmIdx +past any sort-earlier mutmap entries. The subsequent mergeScan +iteration, on seeing `cmp*dir < 0` (tree entry ahead of mutmap[mmIdx]), +emits the tree entry without noticing that the mutmap contains a +DELETE for that tree key at an earlier order index. The tree key was +logically deleted but stays visible to the scan — SQLite then follows +its rowid and calls TableMoveto to a rowid that mutmap says is +deleted, which hits the "database disk image is malformed" check in +sqlite3VdbeFinishMoveto. + +Fix: when about to emit a tree entry, look up the current tree key in +the mutmap; if a DELETE record exists, skip the tree entry and advance +the tree cursor. No behavior change in the common case (no matching +mutmap entry → emit tree as before), constant-factor overhead +otherwise. + +Reproduces deterministically on: + + CREATE TABLE c (ka TEXT, kb TEXT, kc TEXT, v TEXT, + PRIMARY KEY (ka, kb, kc)); + INSERT INTO c VALUES ('db','t','a',''); + BEGIN IMMEDIATE; + INSERT INTO c VALUES ('db','t','b',''); + DELETE FROM c WHERE ka='db' AND kb='t' AND kc='a'; + UPDATE c SET v = 'x' WHERE ka='db' AND kb='t'; -- malformed w/o fix + +--- a/src/prolly_btree.c ++++ b/src/prolly_btree.c +@@ -3631,6 +3631,29 @@ + } + cmp = mergeCompare(pCur, e); + if( cmp*dir < 0 ){ ++ /* Tree entry is ahead of mutmap[mmIdx] in scan direction. ++ ** Check whether the mutmap has a DELETE entry for the tree ++ ** key at an order index the iteration has already walked ++ ** past (e.g. after setCursorToMutMapEntryPhys jumped mmIdx ++ ** directly to a later INSERT). Without this check the scan ++ ** would emit a logically-deleted tree row and SQLite would ++ ** later TableMoveto a rowid that mutmap says is gone. */ ++ ProllyMutMapEntry *delE = 0; ++ int delRc; ++ if( pCur->curIntKey ){ ++ delRc = prollyMutMapFindRc(pCur->pMutMap, 0, 0, ++ prollyCursorIntKey(&pCur->pCur), &delE); ++ }else{ ++ const u8 *pK; int nK; ++ prollyCursorKey(&pCur->pCur, &pK, &nK); ++ delRc = prollyMutMapFindRc(pCur->pMutMap, pK, nK, 0, &delE); ++ } ++ if( delRc!=SQLITE_OK ) return delRc; ++ if( delE && delE->op==PROLLY_EDIT_DELETE ){ ++ int advRc = advanceTreeCursor(pCur, dir); ++ if( advRc!=SQLITE_OK ) return advRc; ++ continue; ++ } + pCur->mergeSrc = MERGE_SRC_TREE; + if( pRes ) *pRes = 0; + return SQLITE_OK; diff --git a/.github/workflows/phpunit-tests-doltlite.yml b/.github/workflows/phpunit-tests-doltlite.yml index 28686c3b..fe0d313d 100644 --- a/.github/workflows/phpunit-tests-doltlite.yml +++ b/.github/workflows/phpunit-tests-doltlite.yml @@ -50,6 +50,11 @@ jobs: # seek_not_found, skipping the actual delete. The patch # saves eqSeen before findMatchingMutMapEntry and restores # it when no mutmap entry overrides the tree result. + # + # 3) src/prolly_btree.c — in mergeScan, when emitting a tree + # entry check the mutmap for a DELETE on that tree key; if + # found, skip and advance. Without this, INSERT+DELETE+UPDATE + # in one transaction trips "database disk image is malformed". run: | BUILD=doltlite-src/src/build.c MOVETO=doltlite-src/src/prolly_btree.c @@ -65,8 +70,12 @@ jobs: patch -d doltlite-src -p1 --no-backup-if-mismatch \ < .github/doltlite-patches/eqSeen-preservation.patch + # --- Patch 3: check mutmap for tree-key DELETE in mergeScan --- + patch -d doltlite-src -p1 --no-backup-if-mismatch \ + < .github/doltlite-patches/mergeScan-check-tree-delete.patch + echo "--- Patched lines ---" - grep -n 'patched off in CI\|savedEqSeen' "$BUILD" "$MOVETO" + grep -n 'patched off in CI\|savedEqSeen\|walked past' "$BUILD" "$MOVETO" - name: Build Doltlite shared library run: | @@ -155,6 +164,25 @@ jobs: echo "::error::eqSeen-preservation patch did not take effect: expected only 'id', got '${REMAINING}'" exit 1 fi + # 3) mergeScan patch: UPDATE inside a transaction after an + # INSERT + DELETE no longer trips "database disk image is + # malformed". + VAL=$(php -r ' + $db = new PDO("sqlite::memory:"); + $db->exec("CREATE TABLE c (ka TEXT, kb TEXT, kc TEXT, v TEXT, PRIMARY KEY (ka, kb, kc))"); + $db->exec("INSERT INTO c VALUES (\"db\", \"t\", \"a\", \"\")"); + $db->exec("BEGIN IMMEDIATE"); + $db->exec("INSERT INTO c VALUES (\"db\", \"t\", \"b\", \"\")"); + $db->exec("DELETE FROM c WHERE ka = \"db\" AND kb = \"t\" AND kc = \"a\""); + $db->exec("UPDATE c SET v = \"x\" WHERE ka = \"db\" AND kb = \"t\""); + $db->exec("COMMIT"); + echo $db->query("SELECT v FROM c")->fetch()[0]; + ' 2>&1) + echo "post-UPDATE value = ${VAL}" + if [ "${VAL}" != "x" ]; then + echo "::error::mergeScan patch did not take effect: expected 'x', got '${VAL}'" + exit 1 + fi - name: Install Composer dependencies (root) uses: ramsey/composer-install@v3 From 0be2a68514d896b5a486e317fbb0a8d7712f88a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Jake=C5=A1?= Date: Fri, 24 Apr 2026 15:22:56 +0200 Subject: [PATCH 5/5] Patch Doltlite to preserve original bytes on lossy-collation index entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit prollyBtCursorInsert stored no value for non-INTKEY inserts where every record field fits into the sort key (splitKey=0), relying on getCursorPayload to reconstruct the record from the sort key bytes on read. That round-trip is lossless for BINARY but the sort-key encoder in sortkey.c folds A-Z to a-z under NOCASE and strips trailing spaces under RTRIM, so when SQLite picks the auto-index as a covering index for a plain column read (e.g. `SELECT pkcol FROM t` on a composite-PK rowid table with a NOCASE pkcol), OP_Column emits the folded bytes instead of the original column value. Reproduces testAlterTableModifyColumnComplexChange — the driver rebuilds the table via CREATE/INSERT/RENAME and then reads back 'johnny' where it stored 'Johnny'. Patch: when any pKeyInfo collation is non-identity (NOCASE or RTRIM), preserve pPayload->pKey as the value alongside the sort key. getCursorPayload prefers the stored value over reconstruction, so reads return the original bytes. No behavior change for BINARY- only records. --- ...ve-original-value-when-sortkey-lossy.patch | 72 +++++++++++++++++++ .github/workflows/phpunit-tests-doltlite.yml | 31 +++++++- 2 files changed, 100 insertions(+), 3 deletions(-) create mode 100644 .github/doltlite-patches/preserve-original-value-when-sortkey-lossy.patch diff --git a/.github/doltlite-patches/preserve-original-value-when-sortkey-lossy.patch b/.github/doltlite-patches/preserve-original-value-when-sortkey-lossy.patch new file mode 100644 index 00000000..c315bb1d --- /dev/null +++ b/.github/doltlite-patches/preserve-original-value-when-sortkey-lossy.patch @@ -0,0 +1,72 @@ +Preserve the original record when the sort-key encoding is lossy + +For a non-INTKEY cursor, prollyBtCursorInsert normally picks between +two storage shapes: + + - splitKey=1 — sort key holds a prefix of the record and the rest of + the record is stored as the value (WITHOUT ROWID tables, secondary + indexes); + - splitKey=0 — sort key holds every field and no value is stored. + +On read, getCursorPayload prefers the stored value; if none is +present it reconstructs a SQLite record from the sort key bytes. That +round-trip is lossless for BINARY collations but the sort-key encoder +in sortkey.c lower-cases A-Z under NOCASE and strips trailing spaces +under RTRIM. + +When SQLite picks the auto-index as a covering index for a plain +column read — e.g. `SELECT pkcol FROM t` on a composite-PK rowid +table with a NOCASE pkcol — OP_Column reads the reconstructed (folded) +bytes from getCursorPayload and returns them in place of the original +column value. Reproduces on: + + CREATE TABLE t (ID INT, pkcol TEXT COLLATE NOCASE, extra TEXT, + PRIMARY KEY (ID, pkcol)); + INSERT INTO t VALUES (1, 'Johnny', 'x'); + SELECT pkcol FROM t; -- 'johnny' (wrong) without fix + SELECT * FROM t; -- 'Johnny' (right) both ways + +Fix: when any field in pKeyInfo uses a collation whose sort-key +encoding is not a bijection over the original bytes (NOCASE, RTRIM), +preserve the original record as the value alongside the sort key. +getCursorPayload then prefers the original bytes over reconstruction. +No behavior change when all collations are BINARY: the value side +stays empty and storage footprint is unchanged. + +--- a/src/prolly_btree.c ++++ b/src/prolly_btree.c +@@ -4716,7 +4716,33 @@ + pCur->pKeyInfo, + &pSortKey, &nSortKey); + if( rc==SQLITE_OK ){ +- if( splitKey ){ ++ /* When every field of the record is folded into the sort key ++ ** (splitKey==0) we would normally store no value and, on read, ++ ** reconstruct the SQLite record from the sort key. That is ++ ** lossless for BINARY collations but the sort-key encoder ++ ** lower-cases A-Z under NOCASE and strips trailing spaces ++ ** under RTRIM. When SQLite picks the auto-index as a covering ++ ** index for a plain column read (e.g. `SELECT pkcol FROM t` ++ ** on a composite-PK rowid table with NOCASE pkcol), OP_Column ++ ** reads the reconstructed (folded) bytes and returns them to ++ ** the caller in place of the original column value. Preserve ++ ** the original record as the value whenever any field in ++ ** pKeyInfo uses a lossy collation so the read path prefers ++ ** the original bytes over the reconstructed ones. */ ++ int lossy = 0; ++ if( pCur->pKeyInfo ){ ++ int i; ++ for( i = 0; i < pCur->pKeyInfo->nAllField; i++ ){ ++ CollSeq *pColl = pCur->pKeyInfo->aColl[i]; ++ if( pColl && pColl->zName ++ && ( sqlite3StrICmp(pColl->zName, "NOCASE")==0 ++ || sqlite3StrICmp(pColl->zName, "RTRIM")==0 ) ){ ++ lossy = 1; ++ break; ++ } ++ } ++ } ++ if( splitKey || lossy ){ + rc = prollyMutMapInsert(pCur->pMutMap, + pSortKey, nSortKey, 0, + (const u8*)pPayload->pKey, (int)pPayload->nKey); diff --git a/.github/workflows/phpunit-tests-doltlite.yml b/.github/workflows/phpunit-tests-doltlite.yml index fe0d313d..940e3a48 100644 --- a/.github/workflows/phpunit-tests-doltlite.yml +++ b/.github/workflows/phpunit-tests-doltlite.yml @@ -32,7 +32,7 @@ jobs: path: doltlite-src - name: Patch Doltlite for mysql-on-sqlite compatibility - # Narrow, verifiable source-level patches applied before build: + # Four narrow, verifiable source-level patches applied before build: # # 1) src/build.c — neutralize the auto-conversion of composite/TEXT-PK # tables to WITHOUT ROWID, so the driver's `ORDER BY ROWID` @@ -55,6 +55,13 @@ jobs: # entry check the mutmap for a DELETE on that tree key; if # found, skip and advance. Without this, INSERT+DELETE+UPDATE # in one transaction trips "database disk image is malformed". + # + # 4) src/prolly_btree.c — preserve the original record bytes as + # the index value when any pKeyInfo collation is lossy + # (NOCASE, RTRIM). Without this, auto-index covering reads + # reconstruct a SQLite record from the sort key and return + # folded bytes (e.g. 'johnny' instead of 'Johnny') for plain + # column reads on composite-PK rowid tables. run: | BUILD=doltlite-src/src/build.c MOVETO=doltlite-src/src/prolly_btree.c @@ -74,8 +81,12 @@ jobs: patch -d doltlite-src -p1 --no-backup-if-mismatch \ < .github/doltlite-patches/mergeScan-check-tree-delete.patch + # --- Patch 4: preserve original record bytes on lossy-collation index entries --- + patch -d doltlite-src -p1 --no-backup-if-mismatch \ + < .github/doltlite-patches/preserve-original-value-when-sortkey-lossy.patch + echo "--- Patched lines ---" - grep -n 'patched off in CI\|savedEqSeen\|walked past' "$BUILD" "$MOVETO" + grep -n 'patched off in CI\|savedEqSeen\|walked past\|lossy = 1' "$BUILD" "$MOVETO" - name: Build Doltlite shared library run: | @@ -120,7 +131,7 @@ jobs: echo "ldd pdo_sqlite:" ldd "$(php -r 'echo ini_get("extension_dir");')/pdo_sqlite.so" - - name: Verify Doltlite is active and patches took effect + - name: Verify Doltlite is active and both patches took effect run: | VERSION=$(php -r 'echo (new PDO("sqlite::memory:"))->query("SELECT SQLITE_VERSION()")->fetch()[0];') ENGINE=$(php -r 'echo (new PDO("sqlite::memory:"))->query("SELECT doltlite_engine()")->fetch()[0];') @@ -183,6 +194,20 @@ jobs: echo "::error::mergeScan patch did not take effect: expected 'x', got '${VAL}'" exit 1 fi + # 4) lossy-collation patch: a covering-index read on a + # composite-PK rowid table with a NOCASE text column no + # longer returns folded bytes. + NAME=$(php -r ' + $db = new PDO("sqlite::memory:"); + $db->exec("CREATE TABLE t (id INT, pkcol TEXT COLLATE NOCASE, extra TEXT, PRIMARY KEY (id, pkcol))"); + $db->exec("INSERT INTO t VALUES (1, \"Johnny\", \"x\")"); + echo $db->query("SELECT pkcol FROM t")->fetch()[0]; + ' 2>&1) + echo "covering-index pkcol = ${NAME}" + if [ "${NAME}" != "Johnny" ]; then + echo "::error::lossy-collation patch did not take effect: expected 'Johnny', got '${NAME}'" + exit 1 + fi - name: Install Composer dependencies (root) uses: ramsey/composer-install@v3