diff --git a/.github/doltlite-patches/eqSeen-preservation.patch b/.github/doltlite-patches/eqSeen-preservation.patch new file mode 100644 index 00000000..6a6781ec --- /dev/null +++ b/.github/doltlite-patches/eqSeen-preservation.patch @@ -0,0 +1,37 @@ +Preserve pIdxKey->eqSeen across findMatchingMutMapEntry + +findMatchingMutMapEntry's internal sqlite3VdbeRecordCompare probes reset +and then re-set pIdxKey->eqSeen on every iteration, clobbering the flag +that the tree-scan loop set to signal an exact prefix match. OP_SeekGE +with BTREE_SEEK_EQ relies on that eqSeen flag to decide whether an +exact match was found, so without this save/restore the second same- +table DELETE inside a SAVEPOINT silently bails to seek_not_found and +the DELETE is lost. + +--- a/src/prolly_btree.c ++++ b/src/prolly_btree.c +@@ -4445,6 +4445,14 @@ + || (pPending && pPending!=pCur->pMutMap + && !prollyMutMapIsEmpty(pPending))) + && !(treeFound && treeCmp==0) ){ ++ /* findMatchingMutMapEntry's internal sqlite3VdbeRecordCompare ++ ** calls reset and re-set pIdxKey->eqSeen on every probe, ++ ** clobbering the flag that the tree-scan loop above set. ++ ** OP_SeekGE with BTREE_SEEK_EQ relies on that eqSeen to detect ++ ** an exact prefix match; without save/restore, the second ++ ** same-table DELETE inside a SAVEPOINT silently bails out of ++ ** seek_not_found. */ ++ int savedEqSeen = pIdxKey->eqSeen; + rc = findMatchingMutMapEntry((ProllyMutMap*)pCur->pMutMap, + pCur->pKeyInfo, + pIdxKey, pSortKey, nSortKey, +@@ -4462,6 +4470,9 @@ + return rc; + } + } ++ if( !mutE ){ ++ pIdxKey->eqSeen = savedEqSeen; ++ } + if( mutE ){ + + const u8 *pMutVal = mutE->pVal; diff --git a/.github/doltlite-patches/mergeScan-check-tree-delete.patch b/.github/doltlite-patches/mergeScan-check-tree-delete.patch new file mode 100644 index 00000000..7706f9a7 --- /dev/null +++ b/.github/doltlite-patches/mergeScan-check-tree-delete.patch @@ -0,0 +1,61 @@ +Check mutmap for tree-key DELETE when emitting a tree entry in mergeScan + +prollyBtCursorIndexMoveto can position the merge cursor directly at a +mutmap INSERT entry via setCursorToMutMapEntryPhys, which jumps mmIdx +past any sort-earlier mutmap entries. The subsequent mergeScan +iteration, on seeing `cmp*dir < 0` (tree entry ahead of mutmap[mmIdx]), +emits the tree entry without noticing that the mutmap contains a +DELETE for that tree key at an earlier order index. The tree key was +logically deleted but stays visible to the scan — SQLite then follows +its rowid and calls TableMoveto to a rowid that mutmap says is +deleted, which hits the "database disk image is malformed" check in +sqlite3VdbeFinishMoveto. + +Fix: when about to emit a tree entry, look up the current tree key in +the mutmap; if a DELETE record exists, skip the tree entry and advance +the tree cursor. No behavior change in the common case (no matching +mutmap entry → emit tree as before), constant-factor overhead +otherwise. + +Reproduces deterministically on: + + CREATE TABLE c (ka TEXT, kb TEXT, kc TEXT, v TEXT, + PRIMARY KEY (ka, kb, kc)); + INSERT INTO c VALUES ('db','t','a',''); + BEGIN IMMEDIATE; + INSERT INTO c VALUES ('db','t','b',''); + DELETE FROM c WHERE ka='db' AND kb='t' AND kc='a'; + UPDATE c SET v = 'x' WHERE ka='db' AND kb='t'; -- malformed w/o fix + +--- a/src/prolly_btree.c ++++ b/src/prolly_btree.c +@@ -3631,6 +3631,29 @@ + } + cmp = mergeCompare(pCur, e); + if( cmp*dir < 0 ){ ++ /* Tree entry is ahead of mutmap[mmIdx] in scan direction. ++ ** Check whether the mutmap has a DELETE entry for the tree ++ ** key at an order index the iteration has already walked ++ ** past (e.g. after setCursorToMutMapEntryPhys jumped mmIdx ++ ** directly to a later INSERT). Without this check the scan ++ ** would emit a logically-deleted tree row and SQLite would ++ ** later TableMoveto a rowid that mutmap says is gone. */ ++ ProllyMutMapEntry *delE = 0; ++ int delRc; ++ if( pCur->curIntKey ){ ++ delRc = prollyMutMapFindRc(pCur->pMutMap, 0, 0, ++ prollyCursorIntKey(&pCur->pCur), &delE); ++ }else{ ++ const u8 *pK; int nK; ++ prollyCursorKey(&pCur->pCur, &pK, &nK); ++ delRc = prollyMutMapFindRc(pCur->pMutMap, pK, nK, 0, &delE); ++ } ++ if( delRc!=SQLITE_OK ) return delRc; ++ if( delE && delE->op==PROLLY_EDIT_DELETE ){ ++ int advRc = advanceTreeCursor(pCur, dir); ++ if( advRc!=SQLITE_OK ) return advRc; ++ continue; ++ } + pCur->mergeSrc = MERGE_SRC_TREE; + if( pRes ) *pRes = 0; + return SQLITE_OK; diff --git a/.github/doltlite-patches/preserve-original-value-when-sortkey-lossy.patch b/.github/doltlite-patches/preserve-original-value-when-sortkey-lossy.patch new file mode 100644 index 00000000..c315bb1d --- /dev/null +++ b/.github/doltlite-patches/preserve-original-value-when-sortkey-lossy.patch @@ -0,0 +1,72 @@ +Preserve the original record when the sort-key encoding is lossy + +For a non-INTKEY cursor, prollyBtCursorInsert normally picks between +two storage shapes: + + - splitKey=1 — sort key holds a prefix of the record and the rest of + the record is stored as the value (WITHOUT ROWID tables, secondary + indexes); + - splitKey=0 — sort key holds every field and no value is stored. + +On read, getCursorPayload prefers the stored value; if none is +present it reconstructs a SQLite record from the sort key bytes. That +round-trip is lossless for BINARY collations but the sort-key encoder +in sortkey.c lower-cases A-Z under NOCASE and strips trailing spaces +under RTRIM. + +When SQLite picks the auto-index as a covering index for a plain +column read — e.g. `SELECT pkcol FROM t` on a composite-PK rowid +table with a NOCASE pkcol — OP_Column reads the reconstructed (folded) +bytes from getCursorPayload and returns them in place of the original +column value. Reproduces on: + + CREATE TABLE t (ID INT, pkcol TEXT COLLATE NOCASE, extra TEXT, + PRIMARY KEY (ID, pkcol)); + INSERT INTO t VALUES (1, 'Johnny', 'x'); + SELECT pkcol FROM t; -- 'johnny' (wrong) without fix + SELECT * FROM t; -- 'Johnny' (right) both ways + +Fix: when any field in pKeyInfo uses a collation whose sort-key +encoding is not a bijection over the original bytes (NOCASE, RTRIM), +preserve the original record as the value alongside the sort key. +getCursorPayload then prefers the original bytes over reconstruction. +No behavior change when all collations are BINARY: the value side +stays empty and storage footprint is unchanged. + +--- a/src/prolly_btree.c ++++ b/src/prolly_btree.c +@@ -4716,7 +4716,33 @@ + pCur->pKeyInfo, + &pSortKey, &nSortKey); + if( rc==SQLITE_OK ){ +- if( splitKey ){ ++ /* When every field of the record is folded into the sort key ++ ** (splitKey==0) we would normally store no value and, on read, ++ ** reconstruct the SQLite record from the sort key. That is ++ ** lossless for BINARY collations but the sort-key encoder ++ ** lower-cases A-Z under NOCASE and strips trailing spaces ++ ** under RTRIM. When SQLite picks the auto-index as a covering ++ ** index for a plain column read (e.g. `SELECT pkcol FROM t` ++ ** on a composite-PK rowid table with NOCASE pkcol), OP_Column ++ ** reads the reconstructed (folded) bytes and returns them to ++ ** the caller in place of the original column value. Preserve ++ ** the original record as the value whenever any field in ++ ** pKeyInfo uses a lossy collation so the read path prefers ++ ** the original bytes over the reconstructed ones. */ ++ int lossy = 0; ++ if( pCur->pKeyInfo ){ ++ int i; ++ for( i = 0; i < pCur->pKeyInfo->nAllField; i++ ){ ++ CollSeq *pColl = pCur->pKeyInfo->aColl[i]; ++ if( pColl && pColl->zName ++ && ( sqlite3StrICmp(pColl->zName, "NOCASE")==0 ++ || sqlite3StrICmp(pColl->zName, "RTRIM")==0 ) ){ ++ lossy = 1; ++ break; ++ } ++ } ++ } ++ if( splitKey || lossy ){ + rc = prollyMutMapInsert(pCur->pMutMap, + pSortKey, nSortKey, 0, + (const u8*)pPayload->pKey, (int)pPayload->nKey); diff --git a/.github/workflows/phpunit-tests-doltlite.yml b/.github/workflows/phpunit-tests-doltlite.yml new file mode 100644 index 00000000..940e3a48 --- /dev/null +++ b/.github/workflows/phpunit-tests-doltlite.yml @@ -0,0 +1,227 @@ +name: PHPUnit Tests (Doltlite) + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +jobs: + test: + name: PHP ${{ matrix.php }} / Doltlite + runs-on: ubuntu-latest + timeout-minutes: 30 + + strategy: + fail-fast: false + matrix: + php: [ '8.3' ] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install Doltlite build dependencies + run: sudo apt-get update && sudo apt-get install -y build-essential zlib1g-dev tcl-dev + + - name: Check out Doltlite + uses: actions/checkout@v4 + with: + repository: dolthub/doltlite + path: doltlite-src + + - name: Patch Doltlite for mysql-on-sqlite compatibility + # Four narrow, verifiable source-level patches applied before build: + # + # 1) src/build.c — neutralize the auto-conversion of composite/TEXT-PK + # tables to WITHOUT ROWID, so the driver's `ORDER BY ROWID` + # tie-breaks in info-schema reads keep working. + # + # 2) src/prolly_btree.c — fix a write-visibility bug in + # prollyBtCursorIndexMoveto. When a non-empty mutmap exists, + # the function calls findMatchingMutMapEntry which internally + # calls sqlite3VdbeRecordCompare with `pIdxKey->eqSeen = 0` + # on every comparison, clobbering the eqSeen flag that the + # tree scan loop above it set. OP_SeekGE with BTREE_SEEK_EQ + # relies on eqSeen to detect prefix matches, so the second + # same-table DELETE inside a SAVEPOINT lands on an exact + # match in the tree but SeekGE reads eqSeen=0 and bails to + # seek_not_found, skipping the actual delete. The patch + # saves eqSeen before findMatchingMutMapEntry and restores + # it when no mutmap entry overrides the tree result. + # + # 3) src/prolly_btree.c — in mergeScan, when emitting a tree + # entry check the mutmap for a DELETE on that tree key; if + # found, skip and advance. Without this, INSERT+DELETE+UPDATE + # in one transaction trips "database disk image is malformed". + # + # 4) src/prolly_btree.c — preserve the original record bytes as + # the index value when any pKeyInfo collation is lossy + # (NOCASE, RTRIM). Without this, auto-index covering reads + # reconstruct a SQLite record from the sort key and return + # folded bytes (e.g. 'johnny' instead of 'Johnny') for plain + # column reads on composite-PK rowid tables. + run: | + BUILD=doltlite-src/src/build.c + MOVETO=doltlite-src/src/prolly_btree.c + + # --- Patch 1: disable WITHOUT ROWID auto-conversion --- + N=$(grep -cF ' tabOpts |= TF_WithoutRowid;' "$BUILD") + if [ "$N" != "1" ]; then + echo "::error::build.c patch: expected exactly 1 match, found $N"; exit 1 + fi + sed -i 's@^ tabOpts |= TF_WithoutRowid;$@ /* patched off in CI: do not auto-convert composite-PK tables */ (void)0;@' "$BUILD" + + # --- Patch 2: preserve pIdxKey->eqSeen across findMatchingMutMapEntry --- + patch -d doltlite-src -p1 --no-backup-if-mismatch \ + < .github/doltlite-patches/eqSeen-preservation.patch + + # --- Patch 3: check mutmap for tree-key DELETE in mergeScan --- + patch -d doltlite-src -p1 --no-backup-if-mismatch \ + < .github/doltlite-patches/mergeScan-check-tree-delete.patch + + # --- Patch 4: preserve original record bytes on lossy-collation index entries --- + patch -d doltlite-src -p1 --no-backup-if-mismatch \ + < .github/doltlite-patches/preserve-original-value-when-sortkey-lossy.patch + + echo "--- Patched lines ---" + grep -n 'patched off in CI\|savedEqSeen\|walked past\|lossy = 1' "$BUILD" "$MOVETO" + + - name: Build Doltlite shared library + run: | + mkdir -p build + cd build + # Same feature set we enable for stock-SQLite runs. PHP's pdo_sqlite + # pulls in sqlite3_column_table_name, which needs COLUMN_METADATA. + CFLAGS="-DSQLITE_ENABLE_COLUMN_METADATA -DSQLITE_ENABLE_FTS5 -DSQLITE_USE_URI -DSQLITE_ENABLE_JSON1" \ + ../configure + make -j"$(nproc)" doltlite-lib + ls -la libdoltlite* sqlite3.h + working-directory: doltlite-src + + - name: Set up PHP + uses: shivammathur/setup-php@v2 + with: + php-version: '${{ matrix.php }}' + tools: phpunit-polyfills + + - name: Install Doltlite as the system libsqlite3 + # Replace the system libsqlite3 the dynamic linker resolves for + # pdo_sqlite with the Doltlite build. This is more reliable than + # LD_PRELOAD, which loses to pdo_sqlite's DT_NEEDED on libsqlite3.so.0. + run: | + SRC="${GITHUB_WORKSPACE}/doltlite-src/build/libdoltlite.so" + test -f "$SRC" + + # Drop the build into /usr/local/lib and point libsqlite3 sonames at it, + # both in /usr/local/lib (takes precedence in ld.so.conf) and in the + # multiarch dir so anything hard-coded to that path still works. + sudo install -m 0755 "$SRC" /usr/local/lib/libdoltlite.so + + sudo ln -sf /usr/local/lib/libdoltlite.so /usr/local/lib/libsqlite3.so + sudo ln -sf /usr/local/lib/libdoltlite.so /usr/local/lib/libsqlite3.so.0 + + MULTIARCH_DIR="$(dirname "$(readlink -f /lib/x86_64-linux-gnu/libsqlite3.so.0)")" + sudo rm -f "${MULTIARCH_DIR}/libsqlite3.so.0" + sudo ln -s /usr/local/lib/libdoltlite.so "${MULTIARCH_DIR}/libsqlite3.so.0" + + sudo ldconfig + + echo "ldd pdo_sqlite:" + ldd "$(php -r 'echo ini_get("extension_dir");')/pdo_sqlite.so" + + - name: Verify Doltlite is active and both patches took effect + run: | + VERSION=$(php -r 'echo (new PDO("sqlite::memory:"))->query("SELECT SQLITE_VERSION()")->fetch()[0];') + ENGINE=$(php -r 'echo (new PDO("sqlite::memory:"))->query("SELECT doltlite_engine()")->fetch()[0];') + echo "SQLITE_VERSION() = ${VERSION}" + echo "doltlite_engine() = ${ENGINE}" + if [ "${ENGINE}" != "prolly" ]; then + echo "::error::Doltlite is not active (expected doltlite_engine() = 'prolly', got '${ENGINE}')" + exit 1 + fi + # 1) build.c patch: composite-PK tables keep rowid accessible + # instead of auto-converting to WITHOUT ROWID. + ROWID=$(php -r ' + $db = new PDO("sqlite::memory:"); + $db->exec("CREATE TABLE t (a INT, b INT, PRIMARY KEY(a, b))"); + $db->exec("INSERT INTO t VALUES (1, 2)"); + echo $db->query("SELECT rowid FROM t")->fetch()[0]; + ' 2>&1) + echo "composite-PK rowid = ${ROWID}" + if ! [[ "${ROWID}" =~ ^[0-9]+$ ]]; then + echo "::error::rowid-preservation patch did not take effect: ${ROWID}" + exit 1 + fi + # 2) prolly_btree.c patch: two sequential DELETEs inside a + # SAVEPOINT on a composite NOCASE PK no longer silently lose + # the second DELETE. + REMAINING=$(php -r ' + $db = new PDO("sqlite::memory:"); + $db->exec("CREATE TABLE t (ts TEXT NOT NULL COLLATE NOCASE, tn TEXT NOT NULL COLLATE NOCASE, cn TEXT NOT NULL COLLATE NOCASE, PRIMARY KEY (ts, tn, cn))"); + $db->exec("INSERT INTO t VALUES (\"s\", \"t\", \"id\")"); + $db->exec("INSERT INTO t VALUES (\"s\", \"t\", \"alpha\")"); + $db->exec("INSERT INTO t VALUES (\"s\", \"t\", \"beta\")"); + $db->exec("SAVEPOINT sp"); + $db->exec("DELETE FROM t WHERE ts = \"s\" AND tn = \"t\" AND cn = \"alpha\""); + $db->exec("DELETE FROM t WHERE ts = \"s\" AND tn = \"t\" AND cn = \"beta\""); + $db->exec("RELEASE SAVEPOINT sp"); + $rows = $db->query("SELECT cn FROM t ORDER BY cn")->fetchAll(PDO::FETCH_COLUMN); + echo implode(",", $rows); + ' 2>&1) + echo "post-SAVEPOINT remaining rows = ${REMAINING}" + if [ "${REMAINING}" != "id" ]; then + echo "::error::eqSeen-preservation patch did not take effect: expected only 'id', got '${REMAINING}'" + exit 1 + fi + # 3) mergeScan patch: UPDATE inside a transaction after an + # INSERT + DELETE no longer trips "database disk image is + # malformed". + VAL=$(php -r ' + $db = new PDO("sqlite::memory:"); + $db->exec("CREATE TABLE c (ka TEXT, kb TEXT, kc TEXT, v TEXT, PRIMARY KEY (ka, kb, kc))"); + $db->exec("INSERT INTO c VALUES (\"db\", \"t\", \"a\", \"\")"); + $db->exec("BEGIN IMMEDIATE"); + $db->exec("INSERT INTO c VALUES (\"db\", \"t\", \"b\", \"\")"); + $db->exec("DELETE FROM c WHERE ka = \"db\" AND kb = \"t\" AND kc = \"a\""); + $db->exec("UPDATE c SET v = \"x\" WHERE ka = \"db\" AND kb = \"t\""); + $db->exec("COMMIT"); + echo $db->query("SELECT v FROM c")->fetch()[0]; + ' 2>&1) + echo "post-UPDATE value = ${VAL}" + if [ "${VAL}" != "x" ]; then + echo "::error::mergeScan patch did not take effect: expected 'x', got '${VAL}'" + exit 1 + fi + # 4) lossy-collation patch: a covering-index read on a + # composite-PK rowid table with a NOCASE text column no + # longer returns folded bytes. + NAME=$(php -r ' + $db = new PDO("sqlite::memory:"); + $db->exec("CREATE TABLE t (id INT, pkcol TEXT COLLATE NOCASE, extra TEXT, PRIMARY KEY (id, pkcol))"); + $db->exec("INSERT INTO t VALUES (1, \"Johnny\", \"x\")"); + echo $db->query("SELECT pkcol FROM t")->fetch()[0]; + ' 2>&1) + echo "covering-index pkcol = ${NAME}" + if [ "${NAME}" != "Johnny" ]; then + echo "::error::lossy-collation patch did not take effect: expected 'Johnny', got '${NAME}'" + exit 1 + fi + + - name: Install Composer dependencies (root) + uses: ramsey/composer-install@v3 + with: + ignore-cache: "yes" + composer-options: "--optimize-autoloader" + + - name: Install Composer dependencies (mysql-on-sqlite) + uses: ramsey/composer-install@v3 + with: + working-directory: packages/mysql-on-sqlite + ignore-cache: "yes" + composer-options: "--optimize-autoloader" + + - name: Run PHPUnit tests against Doltlite + run: php ./vendor/bin/phpunit -c ./phpunit.xml.dist + working-directory: packages/mysql-on-sqlite