sqliteai · marcobambini · Apr 22, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/README.md b/README.md
@@ -36,7 +36,7 @@ sqlite-memory bridges these concepts, allowing any SQLite-powered application to
 - **Hybrid Search**: Combines vector similarity (cosine distance) with FTS5 full-text search for superior retrieval
 - **Smart Chunking**: Markdown-aware parsing preserves semantic boundaries
 - **Intelligent Sync**: Content-hash change detection skips unchanged files, atomically replaces modified ones, and cleans up deleted ones
-- **Transactional Safety**: Every sync operation runs inside a SAVEPOINT transaction - either fully succeeds or fully rolls back, no partially-indexed content
+- **Transactional Safety**: Text/file ingests run inside SAVEPOINT transactions, and directory sync uses transactional cleanup plus per-file transactional updates so failed files do not leave partial rows behind
 - **Efficient Storage**: Binary embeddings with configurable dimensions
 - **Embedding Cache**: Automatically caches computed embeddings, so re-indexing the same text skips redundant API calls and computation
 - **Flexible Embedding**: Use local models (llama.cpp) or [vectors.space](https://vectors.space) remote API
@@ -61,6 +61,9 @@ sqlite-memory bridges these concepts, allowing any SQLite-powered application to
 
 ## Getting Started
 
+> [!IMPORTANT]
+> Databases created with sqlite-memory versions earlier than `1.0.0` must be rebuilt before use with `1.0.0+`, because the internal schema changed.
+
 ### Prerequisites
 
 - SQLite
@@ -74,7 +77,7 @@ sqlite-memory bridges these concepts, allowing any SQLite-powered application to
 ```sql
 -- Load extensions (sync is optional)
 .load ./vector
-.load ./sync
+.load ./cloudsync
 .load ./memory
 
 -- Configure embedding model (choose one):
@@ -84,8 +87,8 @@ SELECT memory_set_model('local', '/path/to/nomic-embed-text-v1.5.Q8_0.gguf');
 
 -- Option 2: Remote embedding via vectors.space (requires free API key from https://vectors.space)
 -- The provider name 'openai' selects the vectors.space OpenAI-compatible endpoint.
--- SELECT memory_set_model('openai', 'text-embedding-3-small');
 -- SELECT memory_set_apikey('your-vectorspace-api-key');
+-- SELECT memory_set_model('openai', 'text-embedding-3-small');
 
 -- Add some knowledge
 SELECT memory_add_text('SQLite is a C-language library that implements a small, fast,
@@ -160,7 +163,7 @@ All `memory_add_*` functions use content-hash change detection to avoid redundan
   1. **Cleanup**: Removes database entries for files that no longer exist on disk
   2. **Scan**: Recursively processes all matching files - adding new ones, replacing modified ones, and skipping unchanged ones
 
-Every sync operation is wrapped in a SQLite SAVEPOINT transaction. If anything fails mid-sync (embedding error, disk issue, etc.), the entire operation rolls back cleanly. There is no risk of partially-indexed files or orphaned entries.
+`memory_add_text()` and `memory_add_file()` each run inside a SQLite SAVEPOINT transaction. `memory_add_directory()` performs its cleanup pass transactionally and then processes each file in its own transaction. If one file fails, that file rolls back cleanly and previously-committed files remain valid; there are no partially-indexed rows or orphaned chunk/FTS entries for the failed file.
 
 This makes all sync functions safe to call repeatedly - for example, on a cron schedule or at agent startup - with minimal overhead.
 
@@ -258,8 +261,8 @@ FROM dbmem_content;
 -- Delete by context
 SELECT memory_delete_context('old-project');
 
--- Delete specific memory
-SELECT memory_delete(1234567890);
+-- Delete specific memory by hash
+SELECT memory_delete('9e3779b97f4a7c15');
 
 -- Clear all memories
 SELECT memory_clear();
@@ -279,8 +282,11 @@ cd sqlite-memory
 # Build (full build with local + remote engines)
 make
 
-# Run tests
+# Run parser/core unit tests + extension loading smoke test
 make test
+
+# Run the full SQL extension unit suite
+make test DEFINES="-DTEST_SQLITE_EXTENSION"
 ```
 
 ### Build Configurations

diff --git a/src/dbmem-embed.h b/src/dbmem-embed.h
@@ -29,6 +29,7 @@ void dbmem_local_engine_free (dbmem_local_engine_t *engine);
 
 dbmem_remote_engine_t *dbmem_remote_engine_init (void *ctx, const char *provider, const char *model, char err_msg[DBMEM_ERRBUF_SIZE]);
 int  dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *text, int text_len, embedding_result_t *result);
+int  dbmem_remote_engine_set_apikey (dbmem_remote_engine_t *engine, const char *api_key, char err_msg[DBMEM_ERRBUF_SIZE]);
 void dbmem_remote_engine_free (dbmem_remote_engine_t *engine);
 
 // Custom provider (always available, defined in sqlite-memory.c)

diff --git a/src/dbmem-lembed.c b/src/dbmem-lembed.c
@@ -100,9 +100,15 @@ void dbmem_logger (enum ggml_log_level level, const char *text, void *user_data)
 
 // MARK: -
 
+static void dbmem_local_set_error(dbmem_local_engine_t *engine, const char *message) {
+    if (!engine || !engine->context) return;
+    dbmem_context_set_error(engine->context, message);
+}
+
 dbmem_local_engine_t *dbmem_local_engine_init (void *ctx, const char *model_path, char err_msg[DBMEM_ERRBUF_SIZE]) {
     dbmem_local_engine_t *engine = (dbmem_local_engine_t *)dbmemory_zeroalloc(sizeof(dbmem_local_engine_t));
     if (!engine) return NULL;
+    engine->context = (dbmem_context *)ctx;
 
     // set logger
     llama_log_set(dbmem_logger, engine);
@@ -212,7 +218,7 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex
     // Tokenize
     int n_tokens = llama_tokenize(engine->vocab, text, text_len, engine->tokens, engine->tokens_capacity, true, true);
     if (n_tokens < 0) {
-        dbmem_context_set_error(engine->context, "Tokenization failed (text too long?)");
+        dbmem_local_set_error(engine, "Tokenization failed (text too long?)");
         return -1;
     }
 
@@ -242,7 +248,7 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex
     // Encode
     int ret = llama_encode(engine->ctx, batch);
     if (ret != 0) {
-        dbmem_context_set_error(engine->context, "Llama_encode failed");
+        dbmem_local_set_error(engine, "Llama_encode failed");
         return -1;
     }
 
@@ -255,7 +261,7 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex
     }
 
     if (!emb_ptr) {
-        dbmem_context_set_error(engine->context, "Failed to get embeddings");
+        dbmem_local_set_error(engine, "Failed to get embeddings");
         return -1;
     }
 
@@ -301,5 +307,5 @@ void dbmem_local_engine_free (dbmem_local_engine_t *engine) {
     }
 
     llama_backend_free();
+    dbmemory_free(engine);
 }
-
diff --git a/src/dbmem-parser.c b/src/dbmem-parser.c
@@ -28,6 +28,7 @@
 typedef struct {
     size_t  start;              // Byte offset in source buffer
     size_t  end;                // Byte end in source buffer
+    int     is_heading;         // True if this section starts with a heading block
     char   *text;               // Stripped plain text (allocated)
     size_t  text_len;           // Length of stripped text
 } section_t;
@@ -113,8 +114,6 @@ static size_t find_split (const char *text, size_t len, size_t max_chars) {
 
 // Push a section to dynamic array
 static int section_push (parse_ctx_t *ctx, size_t start, size_t end, int is_heading) {
-    UNUSED_PARAM(is_heading);
-
     if (ctx->sec_count >= ctx->sec_cap) {
         size_t new_cap = ctx->sec_cap ? ctx->sec_cap * 2 : 16;
         section_t *tmp = (section_t *)dbmemory_realloc(ctx->sections, new_cap * sizeof(section_t));
@@ -126,6 +125,7 @@ static int section_push (parse_ctx_t *ctx, size_t start, size_t end, int is_head
     section_t *s = &ctx->sections[ctx->sec_count++];
     s->start = start;
     s->end = end;
+    s->is_heading = is_heading;
     s->text = NULL;
     s->text_len = 0;
 
@@ -607,7 +607,7 @@ static int parse_sections (const char *buffer, size_t buffer_size, bool skip_sem
     for (size_t i = 0; i < ctx->sec_count; i++) {
         section_t *s = &ctx->sections[i];
         // First section or heading starts new section
-        if (write_idx == 0) {
+        if (write_idx == 0 || s->is_heading) {
             ctx->sections[write_idx++] = *s;
         } else {
             // Extend previous section to include this one

diff --git a/src/dbmem-rembed.c b/src/dbmem-rembed.c
@@ -26,6 +26,7 @@ static size_t cacert_len = sizeof(cacert_pem) - 1;
 
 #ifndef DBMEM_OMIT_CURL
 static size_t dbmem_remote_receive_data(void *contents, size_t size, size_t nmemb, void *xdata);
+static struct curl_slist *dbmem_remote_build_headers (const char *api_key);
 #endif
 
 struct dbmem_remote_engine_t {
@@ -67,6 +68,27 @@ struct dbmem_remote_engine_t {
 #include <stdbool.h>
 #include <stddef.h>
 
+#ifndef DBMEM_OMIT_CURL
+static struct curl_slist *dbmem_remote_build_headers (const char *api_key) {
+    char auth_header[512];
+    struct curl_slist *headers = NULL;
+    struct curl_slist *next = NULL;
+
+    snprintf(auth_header, sizeof(auth_header), "Authorization: Bearer %s", api_key);
+    headers = curl_slist_append(headers, auth_header);
+    if (!headers) return NULL;
+
+    next = curl_slist_append(headers, "Content-Type: application/json");
+    if (!next) {
+        curl_slist_free_all(headers);
+        return NULL;
+    }
+    headers = next;
+
+    return headers;
+}
+#endif
+
 static bool text_needs_json_escape (const char *text, size_t *len) {
     size_t original_len = *len;
     size_t required_len = 0;
@@ -263,11 +285,7 @@ dbmem_remote_engine_t *dbmem_remote_engine_init (void *ctx, const char *provider
     #endif
 
     // set up headers
-    char auth_header[512];
-    snprintf(auth_header, sizeof(auth_header), "Authorization: Bearer %s", api_key);
-    struct curl_slist *headers = NULL;
-    headers = curl_slist_append(headers, auth_header);
-    if (headers) headers = curl_slist_append(headers, "Content-Type: application/json");
+    struct curl_slist *headers = dbmem_remote_build_headers(api_key);
     if (!headers) {
         snprintf(err_msg, DBMEM_ERRBUF_SIZE, "Failed to allocate HTTP headers");
         curl_easy_cleanup(curl);
@@ -522,6 +540,36 @@ int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *t
     return 0;
 }
 
+int dbmem_remote_engine_set_apikey (dbmem_remote_engine_t *engine, const char *api_key, char err_msg[DBMEM_ERRBUF_SIZE]) {
+    if (!engine || !api_key) {
+        if (err_msg) snprintf(err_msg, DBMEM_ERRBUF_SIZE, "Invalid remote engine or API key");
+        return SQLITE_MISUSE;
+    }
+
+#ifndef DBMEM_OMIT_CURL
+    struct curl_slist *headers = dbmem_remote_build_headers(api_key);
+    if (!headers) {
+        if (err_msg) snprintf(err_msg, DBMEM_ERRBUF_SIZE, "Failed to allocate HTTP headers");
+        return SQLITE_NOMEM;
+    }
+
+    curl_easy_setopt(engine->curl, CURLOPT_HTTPHEADER, headers);
+    if (engine->headers) curl_slist_free_all(engine->headers);
+    engine->headers = headers;
+#else
+    char *copy = dbmem_strdup(api_key);
+    if (!copy) {
+        if (err_msg) snprintf(err_msg, DBMEM_ERRBUF_SIZE, "Unable to duplicate API key (insufficient memory)");
+        return SQLITE_NOMEM;
+    }
+
+    if (engine->api_key) dbmemory_free(engine->api_key);
+    engine->api_key = copy;
+#endif
+
+    return SQLITE_OK;
+}
+
 void dbmem_remote_engine_free (dbmem_remote_engine_t *engine) {
     if (!engine) return;