From ccfc0c0e3fbf10dcb4bdf6ffad76bee500ce373e Mon Sep 17 00:00:00 2001 From: Alex Gaetano Padula Date: Tue, 9 Jun 2026 18:22:09 -0400 Subject: [PATCH] align cpp binding with db.h abi and bump to 2.5.2 add init and finalize for custom allocators, raise open file limit, cancel background work, the six built in comparators, and object store connector factories for filesystem and s3 including the full s3 config struct kept header inline so it links against a library built without s3 expose the write amplification counters on column family and database stats, carry the comparator context string through create update save and load, and add the busy error code free caller owned buffers with tidesdb free instead of std free so the binding stays correct under a custom allocator add tests for all of the new surface and fix two stale baseline tests the default max concurrent flushes is zero by design and the tombstone stats test needed a large write buffer so compaction would not purge the tombstones before they could be observed --- CMakeLists.txt | 2 +- include/tidesdb/tidesdb.hpp | 205 ++++++++++++++++++++++- src/tidesdb.cpp | 68 +++++++- tests/tidesdb_test.cpp | 322 +++++++++++++++++++++++++++++++++++- 4 files changed, 587 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c4e6da..7c79a37 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.16) -project(tidesdb_cpp VERSION 2.5.1 LANGUAGES CXX) +project(tidesdb_cpp VERSION 2.5.2 LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/include/tidesdb/tidesdb.hpp b/include/tidesdb/tidesdb.hpp index 301f6ae..71ab606 100644 --- a/include/tidesdb/tidesdb.hpp +++ b/include/tidesdb/tidesdb.hpp @@ -114,7 +114,8 @@ enum class ErrorCode InvalidDB = TDB_ERR_INVALID_DB, Unknown = TDB_ERR_UNKNOWN, Locked = TDB_ERR_LOCKED, - Readonly = TDB_ERR_READONLY + Readonly = TDB_ERR_READONLY, + Busy = TDB_ERR_BUSY }; /** @@ -163,6 +164,8 @@ class Exception : public std::runtime_error return "database is locked"; case TDB_ERR_READONLY: return "database is read-only"; + case TDB_ERR_BUSY: + return "database is busy"; default: return "unknown error"; } @@ -172,6 +175,172 @@ class Exception : public std::runtime_error ErrorCode code_; }; +/** + * @brief Initialize TidesDB with optional custom memory allocation functions + * + * Calling this is optional: the first @ref TidesDB open lazily initializes the + * library with the system allocator. Call init() explicitly *before* opening any + * database only when you need to install custom allocators (e.g. embedding inside + * Redis or another host that owns memory). Pass nullptr for any function to keep + * the corresponding system allocator. Must be called at most once before a + * matching @ref finalize. + * + * @param mallocFn Custom malloc (or nullptr for system malloc) + * @param callocFn Custom calloc (or nullptr for system calloc) + * @param reallocFn Custom realloc (or nullptr for system realloc) + * @param freeFn Custom free (or nullptr for system free) + * @return true if this call performed initialization, false if TidesDB was + * already initialized (the allocators are left unchanged) + */ +bool init(tidesdb_malloc_fn mallocFn = nullptr, tidesdb_calloc_fn callocFn = nullptr, + tidesdb_realloc_fn reallocFn = nullptr, tidesdb_free_fn freeFn = nullptr); + +/** + * @brief Finalize TidesDB and reset the allocator + * + * Call after all TidesDB operations are complete (all databases closed). After + * this returns, @ref init may be called again with different allocators. + */ +void finalize(); + +/** + * @brief Raise this process's open-file ceiling toward @p desired descriptors + * + * Lets a database keep more SSTables open. Must be called BEFORE opening the + * database -- the engine sizes its open-SSTable cap to fit the ceiling at open + * time. This is an explicit, opt-in operator action; TidesDB never raises the + * limit itself. A failed or partial raise is non-fatal. + * + * @param desired Target descriptor count; <= 0 just reports the current ceiling + * @return The open-file ceiling in effect after the attempt + */ +long raiseOpenFileLimit(long desired); + +/** + * @brief Built-in comparator function pointers + * + * These match the comparators TidesDB registers by name. Use the matching name + * string in @ref ColumnFamilyConfig::comparatorName to select one for a column + * family, or pass the function pointer directly to + * @ref TidesDB::registerComparator when wrapping one under a different name. + */ +namespace comparators +{ +/** Binary byte-by-byte comparison (registered name "memcmp", the default). */ +inline const tidesdb_comparator_fn memcmp = tidesdb_comparator_memcmp; +/** Null-terminated string comparison (registered name "lexicographic"). */ +inline const tidesdb_comparator_fn lexicographic = tidesdb_comparator_lexicographic; +/** Unsigned 64-bit integer comparison (registered name "uint64"). */ +inline const tidesdb_comparator_fn uint64 = tidesdb_comparator_uint64; +/** Signed 64-bit integer comparison (registered name "int64"). */ +inline const tidesdb_comparator_fn int64 = tidesdb_comparator_int64; +/** Reverse binary comparison (registered name "reverse"). */ +inline const tidesdb_comparator_fn reverseMemcmp = tidesdb_comparator_reverse_memcmp; +/** Case-insensitive ASCII comparison (registered name "case_insensitive"). */ +inline const tidesdb_comparator_fn caseInsensitive = tidesdb_comparator_case_insensitive; +} // namespace comparators + +/** + * @brief Object store connector factories + * + * A connector returned by these factories is handed to @ref Config::objectStore. + * Ownership transfers to the database on a successful open -- the database frees + * the connector when it closes, so callers must not destroy it themselves. If + * @ref TidesDB construction throws, the connector is leaked by the C library, so + * only build a connector immediately before opening. + */ +namespace objstore +{ +/** + * @brief Create a filesystem-backed object store connector + * + * Stores objects as files under @p rootDir mirroring the key path structure. + * Always available. Intended for testing and local replication. + * @param rootDir Directory to store objects in + * @return Connector handle, or nullptr on error + */ +inline tidesdb_objstore_t* filesystem(const std::string& rootDir) +{ + return tidesdb_objstore_fs_create(rootDir.c_str()); +} + +/** + * @brief Create an S3-compatible connector (AWS S3, MinIO, etc.) + * + * The underlying libtidesdb must have been built with TIDESDB_WITH_S3=ON; + * otherwise this symbol is unresolved at link time. Empty @p prefix or + * @p region are forwarded as nullptr. + * + * @param endpoint S3 endpoint (e.g. "s3.amazonaws.com" or "minio.local:9000") + * @param bucket Bucket name + * @param prefix Key prefix (e.g. "production/db1/"), or empty for none + * @param accessKey AWS access key ID + * @param secretKey AWS secret access key + * @param region AWS region (e.g. "us-east-1"), or empty for MinIO + * @param useSsl true for HTTPS, false for HTTP + * @param usePathStyle true for path-style URLs (MinIO), false for virtual-hosted (AWS) + * @return Connector handle, or nullptr on error + */ +inline tidesdb_objstore_t* s3(const std::string& endpoint, const std::string& bucket, + const std::string& prefix, const std::string& accessKey, + const std::string& secretKey, const std::string& region, bool useSsl, + bool usePathStyle) +{ + return tidesdb_objstore_s3_create(endpoint.c_str(), bucket.c_str(), + prefix.empty() ? nullptr : prefix.c_str(), accessKey.c_str(), + secretKey.c_str(), region.empty() ? nullptr : region.c_str(), + useSsl ? 1 : 0, usePathStyle ? 1 : 0); +} + +/** + * @brief Full S3 connector configuration including TLS and multipart tuning + * + * The all-default values are secure (TLS verification on, no custom CA) and use + * the library's built-in multipart sizes. + */ +struct S3Config +{ + std::string endpoint; // S3 endpoint (required) + std::string bucket; // Bucket name (required) + std::string prefix; // Key prefix, or empty for none + std::string accessKey; // AWS access key ID (required) + std::string secretKey; // AWS secret access key (required) + std::string region; // AWS region, or empty for the default + bool useSsl = true; // true for HTTPS, false for HTTP + bool usePathStyle = false; // true for path-style URLs (MinIO) + std::string tlsCaPath; // Custom CA bundle path, or empty for the system bundle + bool tlsInsecureSkipVerify = false; // true disables TLS verification (test only, insecure) + std::size_t multipartThreshold = 0; // Multipart upload threshold in bytes (0 = default) + std::size_t multipartPartSize = 0; // Multipart chunk size in bytes (0 = default) +}; + +/** + * @brief Create an S3-compatible connector from a full configuration struct + * + * Exposes TLS and multipart settings the positional @ref s3 overload cannot. + * Requires a libtidesdb built with TIDESDB_WITH_S3=ON. + * @param config Connector configuration (fields are copied; need not outlive the call) + * @return Connector handle, or nullptr on error + */ +inline tidesdb_objstore_t* s3(const S3Config& config) +{ + tidesdb_objstore_s3_config_t c{}; + c.endpoint = config.endpoint.c_str(); + c.bucket = config.bucket.c_str(); + c.prefix = config.prefix.empty() ? nullptr : config.prefix.c_str(); + c.access_key = config.accessKey.c_str(); + c.secret_key = config.secretKey.c_str(); + c.region = config.region.empty() ? nullptr : config.region.c_str(); + c.use_ssl = config.useSsl ? 1 : 0; + c.use_path_style = config.usePathStyle ? 1 : 0; + c.tls_ca_path = config.tlsCaPath.empty() ? nullptr : config.tlsCaPath.c_str(); + c.tls_insecure_skip_verify = config.tlsInsecureSkipVerify ? 1 : 0; + c.multipart_threshold = config.multipartThreshold; + c.multipart_part_size = config.multipartPartSize; + return tidesdb_objstore_s3_create_config(&c); +} +} // namespace objstore + // Forward declarations class TidesDB; class Transaction; @@ -196,6 +365,7 @@ struct ColumnFamilyConfig SyncMode syncMode = SyncMode::Interval; std::uint64_t syncIntervalUs = 128000; std::string comparatorName; + std::string comparatorCtxStr; // Context string persisted alongside comparatorName int skipListMaxLevel = 12; float skipListProbability = 0.25f; IsolationLevel defaultIsolationLevel = IsolationLevel::ReadCommitted; @@ -322,6 +492,17 @@ struct Stats levelTombstoneCounts; // Per-level tombstone counts (parallels levelKeyCounts) double maxSstDensity = 0.0; // Worst per-SSTable tombstone density observed (0.0 to 1.0) int maxSstDensityLevel = 0; // 1-based level where maxSstDensity was observed (0 if none) + // Write-amplification counters (lifetime since open, on-disk framed bytes). Divide the + // write totals by userBytesWritten for this CF's write amplification. walBytesWritten is + // zero in unified memtable mode (the shared WAL volume is reported db-wide in + // DbStats::uwalBytesWritten). The *Count fields count output SSTables. + std::uint64_t walBytesWritten = 0; // Framed bytes appended to this CF's WAL + std::uint64_t flushBytesWritten = 0; // On-disk bytes this CF's flushes wrote to L0 + std::uint64_t compactionBytesWritten = 0; // On-disk bytes this CF's compactions wrote + std::uint64_t compactionBytesRead = 0; // On-disk bytes this CF's compactions read as input + std::uint64_t userBytesWritten = 0; // Logical key+value bytes committed (WA denominator) + std::uint64_t flushCount = 0; // Flushed SSTables produced by this CF + std::uint64_t compactionCount = 0; // Compaction output SSTables produced by this CF }; /** @@ -360,6 +541,18 @@ struct DbStats std::uint64_t totalUploads = 0; std::uint64_t totalUploadFailures = 0; bool replicaMode = false; + // Write-amplification counters (lifetime since open, on-disk framed bytes). uwalBytesWritten + // is the shared unified WAL volume (zero when unified mode is off); the remaining fields are + // summed across all column families. db-wide WA = (uwal + wal + flush + compaction) / user + // bytes. The *Count fields count output SSTables, not logical runs. + std::uint64_t uwalBytesWritten = 0; // Framed bytes appended to the shared unified WAL + std::uint64_t walBytesWritten = 0; // Per-CF WAL bytes summed across all CFs + std::uint64_t flushBytesWritten = 0; // Flush output bytes summed across all CFs + std::uint64_t compactionBytesWritten = 0; // Compaction output bytes summed across all CFs + std::uint64_t compactionBytesRead = 0; // Compaction input bytes summed across all CFs + std::uint64_t userBytesWritten = 0; // Logical committed bytes summed across all CFs + std::uint64_t flushCount = 0; // Flushed SSTables summed across all CFs + std::uint64_t compactionCount = 0; // Compaction output SSTables summed across all CFs }; /** @@ -830,6 +1023,16 @@ class TidesDB */ void promoteToPrimary(); + /** + * @brief Cancel background compaction db-wide for a fast shutdown + * + * In-flight merges bail out safely and queued compaction is skipped; flushes + * are unaffected so durability is preserved. Blocks (bounded) until compaction + * is idle. The cancellation is sticky for the session and reset on the next + * open -- intended to be called right before closing the database. + */ + void cancelBackgroundWork(); + /** * @brief Get default database configuration * @return Default Config struct diff --git a/src/tidesdb.cpp b/src/tidesdb.cpp index b9cbe8a..6af6a3e 100644 --- a/src/tidesdb.cpp +++ b/src/tidesdb.cpp @@ -38,6 +38,25 @@ void checkResult(int result, const std::string& context) } // anonymous namespace +bool init(tidesdb_malloc_fn mallocFn, tidesdb_calloc_fn callocFn, tidesdb_realloc_fn reallocFn, + tidesdb_free_fn freeFn) +{ + // tidesdb_init returns 0 on success and -1 if TidesDB was already initialized. + // The -1 is not an error condition (it just means the allocators are unchanged), + // so we report it as a bool rather than throwing. + return tidesdb_init(mallocFn, callocFn, reallocFn, freeFn) == 0; +} + +void finalize() +{ + tidesdb_finalize(); +} + +long raiseOpenFileLimit(long desired) +{ + return tidesdb_raise_open_file_limit(desired); +} + ColumnFamilyConfig ColumnFamilyConfig::defaultConfig() { tidesdb_column_family_config_t cConfig = tidesdb_default_column_family_config(); @@ -58,6 +77,7 @@ ColumnFamilyConfig ColumnFamilyConfig::defaultConfig() config.syncMode = static_cast(cConfig.sync_mode); config.syncIntervalUs = cConfig.sync_interval_us; config.comparatorName = cConfig.comparator_name; + config.comparatorCtxStr = cConfig.comparator_ctx_str; config.skipListMaxLevel = cConfig.skip_list_max_level; config.skipListProbability = cConfig.skip_list_probability; config.defaultIsolationLevel = static_cast(cConfig.default_isolation_level); @@ -96,6 +116,7 @@ ColumnFamilyConfig ColumnFamilyConfig::loadFromIni(const std::string& iniFile, config.syncMode = static_cast(cConfig.sync_mode); config.syncIntervalUs = cConfig.sync_interval_us; config.comparatorName = cConfig.comparator_name; + config.comparatorCtxStr = cConfig.comparator_ctx_str; config.skipListMaxLevel = cConfig.skip_list_max_level; config.skipListProbability = cConfig.skip_list_probability; config.defaultIsolationLevel = static_cast(cConfig.default_isolation_level); @@ -150,6 +171,11 @@ void ColumnFamilyConfig::saveToIni(const std::string& iniFile, const std::string TDB_MAX_COMPARATOR_NAME - 1); } std::memset(cConfig.comparator_ctx_str, 0, TDB_MAX_COMPARATOR_CTX); + if (!config.comparatorCtxStr.empty()) + { + std::strncpy(cConfig.comparator_ctx_str, config.comparatorCtxStr.c_str(), + TDB_MAX_COMPARATOR_CTX - 1); + } cConfig.comparator_fn_cached = nullptr; cConfig.comparator_ctx_cached = nullptr; cConfig.commit_hook_fn = nullptr; @@ -230,6 +256,14 @@ Stats ColumnFamily::getStats() const stats.maxSstDensity = cStats->max_sst_density; stats.maxSstDensityLevel = cStats->max_sst_density_level; + stats.walBytesWritten = cStats->wal_bytes_written; + stats.flushBytesWritten = cStats->flush_bytes_written; + stats.compactionBytesWritten = cStats->compaction_bytes_written; + stats.compactionBytesRead = cStats->compaction_bytes_read; + stats.userBytesWritten = cStats->user_bytes_written; + stats.flushCount = cStats->flush_count; + stats.compactionCount = cStats->compaction_count; + if (cStats->num_levels > 0 && cStats->level_tombstone_counts != nullptr) { stats.levelTombstoneCounts.resize(cStats->num_levels); @@ -257,6 +291,7 @@ Stats ColumnFamily::getStats() const cfConfig.syncMode = static_cast(cStats->config->sync_mode); cfConfig.syncIntervalUs = cStats->config->sync_interval_us; cfConfig.comparatorName = cStats->config->comparator_name; + cfConfig.comparatorCtxStr = cStats->config->comparator_ctx_str; cfConfig.skipListMaxLevel = cStats->config->skip_list_max_level; cfConfig.skipListProbability = cStats->config->skip_list_probability; cfConfig.defaultIsolationLevel = @@ -425,6 +460,11 @@ void ColumnFamily::updateRuntimeConfig(const ColumnFamilyConfig& config, bool pe TDB_MAX_COMPARATOR_NAME - 1); } std::memset(cConfig.comparator_ctx_str, 0, TDB_MAX_COMPARATOR_CTX); + if (!config.comparatorCtxStr.empty()) + { + std::strncpy(cConfig.comparator_ctx_str, config.comparatorCtxStr.c_str(), + TDB_MAX_COMPARATOR_CTX - 1); + } cConfig.comparator_fn_cached = nullptr; cConfig.comparator_ctx_cached = nullptr; cConfig.commit_hook_fn = nullptr; @@ -603,7 +643,7 @@ std::vector Transaction::get(ColumnFamily& cf, std::string_view ke checkResult(result, "failed to get value"); std::vector valueVec(valueData, valueData + valueSize); - std::free(valueData); + tidesdb_free(valueData); return valueVec; } @@ -616,7 +656,7 @@ std::vector Transaction::get(ColumnFamily& cf, const std::vector valueVec(valueData, valueData + valueSize); - std::free(valueData); + tidesdb_free(valueData); return valueVec; } @@ -808,6 +848,11 @@ void TidesDB::createColumnFamily(const std::string& name, const ColumnFamilyConf TDB_MAX_COMPARATOR_NAME - 1); } std::memset(cConfig.comparator_ctx_str, 0, TDB_MAX_COMPARATOR_CTX); + if (!config.comparatorCtxStr.empty()) + { + std::strncpy(cConfig.comparator_ctx_str, config.comparatorCtxStr.c_str(), + TDB_MAX_COMPARATOR_CTX - 1); + } cConfig.comparator_fn_cached = nullptr; cConfig.comparator_ctx_cached = nullptr; cConfig.commit_hook_fn = config.commitHookFn; @@ -854,9 +899,9 @@ std::vector TidesDB::listColumnFamilies() for (int i = 0; i < count; ++i) { cfNames.emplace_back(names[i]); - std::free(names[i]); + tidesdb_free(names[i]); } - std::free(names); + tidesdb_free(names); return cfNames; } @@ -917,6 +962,15 @@ DbStats TidesDB::getDbStats() stats.totalUploadFailures = cStats.total_upload_failures; stats.replicaMode = cStats.replica_mode != 0; + stats.uwalBytesWritten = cStats.uwal_bytes_written; + stats.walBytesWritten = cStats.wal_bytes_written; + stats.flushBytesWritten = cStats.flush_bytes_written; + stats.compactionBytesWritten = cStats.compaction_bytes_written; + stats.compactionBytesRead = cStats.compaction_bytes_read; + stats.userBytesWritten = cStats.user_bytes_written; + stats.flushCount = cStats.flush_count; + stats.compactionCount = cStats.compaction_count; + return stats; } @@ -988,6 +1042,12 @@ void TidesDB::promoteToPrimary() checkResult(result, "failed to promote to primary"); } +void TidesDB::cancelBackgroundWork() +{ + int result = tidesdb_cancel_background_work(db_); + checkResult(result, "failed to cancel background work"); +} + ObjectStoreConfig ObjectStoreConfig::defaultConfig() { tidesdb_objstore_config_t cCfg = tidesdb_objstore_default_config(); diff --git a/tests/tidesdb_test.cpp b/tests/tidesdb_test.cpp index ca129c5..12338b6 100644 --- a/tests/tidesdb_test.cpp +++ b/tests/tidesdb_test.cpp @@ -1709,7 +1709,11 @@ TEST_F(TidesDBTest, TombstoneStatsAfterDeletes) tidesdb::TidesDB db(getConfig()); auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); - cfConfig.writeBufferSize = 1024; // small buffer to make flushes cheap + // Use a large write buffer so the puts and the deletes each flush to a single + // L0 SSTable. With a tiny buffer the keys spread across many L0 files, crossing + // l1_file_count_trigger and letting background compaction purge the delete + // tombstones before they can be observed here. + cfConfig.writeBufferSize = 64 * 1024 * 1024; db.createColumnFamily("ts_cf", cfConfig); auto cf = db.getColumnFamily("ts_cf"); @@ -1867,9 +1871,319 @@ TEST_F(TidesDBTest, MaxConcurrentFlushesDefault) { auto defaultConfig = tidesdb::TidesDB::defaultConfig(); - // Proves the value is sourced from tidesdb_default_config() rather than zero-initialized - ASSERT_GT(defaultConfig.maxConcurrentFlushes, 0) - << "default max_concurrent_flushes should be sourced from C library defaults"; + // 0 is the documented sentinel returned by tidesdb_default_config(): at open time + // the engine pins max_concurrent_flushes to the resolved num_flush_threads. Assert + // the sentinel is carried through faithfully rather than altered by the wrapper. + ASSERT_EQ(defaultConfig.maxConcurrentFlushes, 0) + << "default max_concurrent_flushes should match the C library sentinel (0 = auto)"; + + // The sentinel must be accepted by open and resolved internally without error. + auto cfg = getConfig(); + cfg.maxConcurrentFlushes = defaultConfig.maxConcurrentFlushes; + ASSERT_NO_THROW({ tidesdb::TidesDB db(cfg); }); +} + +TEST_F(TidesDBTest, ErrorCodeBusy) +{ + // Verify the Busy error code maps correctly + ASSERT_EQ(static_cast(tidesdb::ErrorCode::Busy), TDB_ERR_BUSY); + ASSERT_EQ(static_cast(tidesdb::ErrorCode::Busy), -14); + + std::string msg = tidesdb::Exception::errorMessage(TDB_ERR_BUSY); + ASSERT_EQ(msg, "database is busy"); +} + +// Counting allocator state shared with non-capturing lambdas (statics, not captures) +static std::atomic g_allocMallocCalls{0}; +static std::atomic g_allocFreeCalls{0}; + +TEST_F(TidesDBTest, InitFinalizeCustomAllocator) +{ + // Reset any lazy initialization performed by earlier tests so init() is deterministic. + tidesdb::finalize(); + + g_allocMallocCalls = 0; + g_allocFreeCalls = 0; + + auto countingMalloc = [](std::size_t size) -> void* + { + g_allocMallocCalls.fetch_add(1); + return std::malloc(size); + }; + auto countingCalloc = [](std::size_t n, std::size_t size) -> void* + { return std::calloc(n, size); }; + auto countingRealloc = [](void* p, std::size_t size) -> void* { return std::realloc(p, size); }; + auto countingFree = [](void* p) + { + if (p) g_allocFreeCalls.fetch_add(1); + std::free(p); + }; + + // Non-capturing lambdas convert to plain C function pointers. + ASSERT_TRUE(tidesdb::init(countingMalloc, countingCalloc, countingRealloc, countingFree)); + // A second init is a no-op while already initialized. + ASSERT_FALSE(tidesdb::init()); + + { + tidesdb::TidesDB db(getConfig()); + auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); + db.createColumnFamily("alloc_cf", cfConfig); + auto cf = db.getColumnFamily("alloc_cf"); + auto txn = db.beginTransaction(); + txn.put(cf, "k", "v", -1); + txn.commit(); + } + + EXPECT_GT(g_allocMallocCalls.load(), 0); + EXPECT_GT(g_allocFreeCalls.load(), 0); + + // Reset and re-establish a clean default-allocator init for subsequent tests. + tidesdb::finalize(); + ASSERT_TRUE(tidesdb::init()); +} + +TEST_F(TidesDBTest, RaiseOpenFileLimit) +{ + // desired <= 0 just reports the current ceiling + long current = tidesdb::raiseOpenFileLimit(0); + ASSERT_GT(current, 0); + + // Requesting the current ceiling should report a non-negative ceiling back + long raised = tidesdb::raiseOpenFileLimit(current); + ASSERT_GE(raised, 0); +} + +TEST_F(TidesDBTest, CancelBackgroundWork) +{ + tidesdb::TidesDB db(getConfig()); + + auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); + cfConfig.writeBufferSize = 1024; // small buffer to spawn flushes/compaction + db.createColumnFamily("cbw_cf", cfConfig); + + auto cf = db.getColumnFamily("cbw_cf"); + + for (int batch = 0; batch < 3; ++batch) + { + auto txn = db.beginTransaction(); + for (int i = 0; i < 50; ++i) + { + txn.put(cf, "k" + std::to_string(batch) + "_" + std::to_string(i), "value", -1); + } + txn.commit(); + cf.flushMemtable(); + } + + // Sticky cancellation of background compaction -- should return cleanly + ASSERT_NO_THROW(db.cancelBackgroundWork()); +} + +TEST_F(TidesDBTest, BuiltInComparators) +{ + tidesdb::TidesDB db(getConfig()); + + for (const std::string& name : {std::string("memcmp"), std::string("lexicographic"), + std::string("reverse"), std::string("case_insensitive")}) + { + auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); + cfConfig.comparatorName = name; + db.createColumnFamily("cmp_" + name, cfConfig); + + auto cf = db.getColumnFamily("cmp_" + name); + { + auto txn = db.beginTransaction(); + txn.put(cf, "alpha", "1", -1); + txn.put(cf, "beta", "2", -1); + txn.commit(); + } + + auto txn = db.beginTransaction(); + auto value = txn.get(cf, "alpha"); + ASSERT_EQ(std::string(value.begin(), value.end()), "1"); + + auto stats = cf.getStats(); + if (stats.config.has_value()) + { + ASSERT_EQ(stats.config->comparatorName, name); + } + } +} + +TEST_F(TidesDBTest, ReverseComparatorOrdering) +{ + tidesdb::TidesDB db(getConfig()); + + auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); + cfConfig.comparatorName = "reverse"; + db.createColumnFamily("rev_cf", cfConfig); + + auto cf = db.getColumnFamily("rev_cf"); + { + auto txn = db.beginTransaction(); + txn.put(cf, "a", "1", -1); + txn.put(cf, "b", "2", -1); + txn.put(cf, "c", "3", -1); + txn.commit(); + } + + // Reverse comparator: the first key in iteration order is the largest + auto txn = db.beginTransaction(); + auto iter = txn.newIterator(cf); + iter.seekToFirst(); + ASSERT_TRUE(iter.valid()); + auto key = iter.key(); + ASSERT_EQ(std::string(key.begin(), key.end()), "c"); +} + +TEST_F(TidesDBTest, ComparatorFunctionPointers) +{ + tidesdb::TidesDB db(getConfig()); + + // The exposed built-in function pointers are non-null + ASSERT_NE(tidesdb::comparators::memcmp, nullptr); + ASSERT_NE(tidesdb::comparators::uint64, nullptr); + ASSERT_NE(tidesdb::comparators::reverseMemcmp, nullptr); + + // Register a built-in function pointer under a custom name and read it back + db.registerComparator("my_reverse", tidesdb::comparators::reverseMemcmp); + + tidesdb_comparator_fn fn = nullptr; + void* ctx = nullptr; + db.getComparator("my_reverse", &fn, &ctx); + ASSERT_EQ(fn, tidesdb::comparators::reverseMemcmp); +} + +TEST_F(TidesDBTest, ComparatorCtxStrIniRoundTrip) +{ + auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); + cfConfig.comparatorName = "uint64"; + cfConfig.comparatorCtxStr = "endian=little"; + + std::string iniPath = testDbPath_ + "_cfg.ini"; + fs::remove(iniPath); + + tidesdb::ColumnFamilyConfig::saveToIni(iniPath, "cf_section", cfConfig); + auto loaded = tidesdb::ColumnFamilyConfig::loadFromIni(iniPath, "cf_section"); + + ASSERT_EQ(loaded.comparatorName, "uint64"); + ASSERT_EQ(loaded.comparatorCtxStr, "endian=little"); + + fs::remove(iniPath); +} + +TEST_F(TidesDBTest, WriteAmplificationStats) +{ + tidesdb::TidesDB db(getConfig()); + + auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); + cfConfig.writeBufferSize = 1024; // small buffer to force a flush + db.createColumnFamily("wa_cf", cfConfig); + + auto cf = db.getColumnFamily("wa_cf"); + { + auto txn = db.beginTransaction(); + for (int i = 0; i < 200; ++i) + { + txn.put(cf, "key" + std::to_string(i), std::string(64, 'x'), -1); + } + txn.commit(); + } + cf.flushMemtable(); + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + + auto stats = cf.getStats(); + + ASSERT_GT(stats.userBytesWritten, 0u); + ASSERT_GT(stats.flushBytesWritten, 0u); + ASSERT_GE(stats.flushCount, 1u); + ASSERT_GE(stats.compactionBytesWritten, 0u); + ASSERT_GE(stats.compactionBytesRead, 0u); + ASSERT_GE(stats.walBytesWritten, 0u); +} + +TEST_F(TidesDBTest, DbStatsWriteAmplification) +{ + tidesdb::TidesDB db(getConfig()); + + auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); + cfConfig.writeBufferSize = 1024; + db.createColumnFamily("dbwa_cf", cfConfig); + + auto cf = db.getColumnFamily("dbwa_cf"); + { + auto txn = db.beginTransaction(); + for (int i = 0; i < 200; ++i) + { + txn.put(cf, "key" + std::to_string(i), std::string(64, 'y'), -1); + } + txn.commit(); + } + cf.flushMemtable(); + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + + auto stats = db.getDbStats(); + + ASSERT_GT(stats.userBytesWritten, 0u); + ASSERT_GT(stats.flushBytesWritten, 0u); + // Per-CF WAL volume is reported (unified mode is off, so uwal stays zero) + ASSERT_GE(stats.walBytesWritten, 0u); + ASSERT_EQ(stats.uwalBytesWritten, 0u); +} + +TEST_F(TidesDBTest, FilesystemObjectStore) +{ + std::string objDir = testDbPath_ + "_objstore"; + fs::remove_all(objDir); + + { + tidesdb::Config config = getConfig(); + config.objectStore = tidesdb::objstore::filesystem(objDir); + ASSERT_NE(config.objectStore, nullptr); + config.objectStoreConfig = tidesdb::ObjectStoreConfig::defaultConfig(); + + // Object store mode requires (and auto-enables) unified memtable mode + tidesdb::TidesDB db(config); + + auto cfConfig = tidesdb::ColumnFamilyConfig::defaultConfig(); + db.createColumnFamily("os_cf", cfConfig); + + auto cf = db.getColumnFamily("os_cf"); + { + auto txn = db.beginTransaction(); + txn.put(cf, "okey", "ovalue", -1); + txn.commit(); + } + cf.flushMemtable(); + std::this_thread::sleep_for(std::chrono::milliseconds(300)); + + auto dbStats = db.getDbStats(); + ASSERT_TRUE(dbStats.objectStoreEnabled); + + { + auto txn = db.beginTransaction(); + auto value = txn.get(cf, "okey"); + ASSERT_EQ(std::string(value.begin(), value.end()), "ovalue"); + } + } + + fs::remove_all(objDir); +} + +TEST_F(TidesDBTest, S3ConfigStructDefaults) +{ + // Construct the S3 config struct (does not link the S3 connector symbol). + tidesdb::objstore::S3Config cfg; + cfg.endpoint = "s3.amazonaws.com"; + cfg.bucket = "my-bucket"; + cfg.accessKey = "ak"; + cfg.secretKey = "sk"; + cfg.region = "us-east-1"; + + ASSERT_TRUE(cfg.useSsl); + ASSERT_FALSE(cfg.usePathStyle); + ASSERT_FALSE(cfg.tlsInsecureSkipVerify); + ASSERT_EQ(cfg.multipartThreshold, 0u); + ASSERT_EQ(cfg.multipartPartSize, 0u); } int main(int argc, char** argv)