diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 319752482..07b4f6fc5 100755
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -129,7 +129,16 @@ else ()
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g")
elseif (CMAKE_BUILD_TYPE STREQUAL "Release")
- set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2")
+ # -flto + MinGW gcc + statically-linked antlr4_static produces
+ # unresolved-reference errors at link time (LTO intermediate objects
+ # can't see the .a's vtable thunks). -march=native is also a poor
+ # default for CI binaries shipped to other machines. Keep both on
+ # Linux/macOS where the optimization actually pays off.
+ if (MINGW OR WIN32)
+ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
+ else ()
+ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
+ endif ()
elseif (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O2 -g")
elseif (CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
diff --git a/cpp/pom.xml b/cpp/pom.xml
index 5415212f0..153e75dc2 100644
--- a/cpp/pom.xml
+++ b/cpp/pom.xml
@@ -99,8 +99,8 @@
plugin's generate goal throw an NPE.
-->
-
-
+
+
diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt
index 93342c113..895c1ddba 100644
--- a/cpp/src/CMakeLists.txt
+++ b/cpp/src/CMakeLists.txt
@@ -37,6 +37,9 @@ message("cmake using: ENABLE_LZOKAY=${ENABLE_LZOKAY}")
option(ENABLE_ZLIB "Enable Zlib compression" ON)
message("cmake using: ENABLE_ZLIB=${ENABLE_ZLIB}")
+# ENABLE_SIMD is defined in the top-level CMakeLists.txt
+message("cmake using: ENABLE_SIMD=${ENABLE_SIMD}")
+
message("Running in src directory")
if (${COV_ENABLED})
add_compile_options(-fprofile-arcs -ftest-coverage)
@@ -89,6 +92,13 @@ if (ENABLE_ANTLR4)
message("Adding ANTLR4 include directory")
endif()
+if (ENABLE_SIMD)
+ add_definitions(-DENABLE_SIMD)
+ list(APPEND PROJECT_INCLUDE_DIR
+ ${CMAKE_SOURCE_DIR}/third_party/simde-0.8.4-rc3
+ )
+endif()
+
include_directories(${PROJECT_INCLUDE_DIR})
# Mark every translation unit that is compiled into the tsfile library so that
@@ -144,10 +154,17 @@ add_library(tsfile SHARED)
if (${COV_ENABLED})
message("Enable code cov...")
+ # Apple clang ships coverage runtime via --coverage; libgcov isn't a
+ # standalone library on macOS. Use --coverage there.
+ if (APPLE)
+ set(COV_LINK_LIB --coverage)
+ else()
+ set(COV_LINK_LIB -lgcov)
+ endif()
if (ENABLE_ANTLR4)
- target_link_libraries(tsfile common_obj compress_obj cwrapper_obj file_obj read_obj write_obj parser_obj -lgcov)
+ target_link_libraries(tsfile common_obj compress_obj cwrapper_obj file_obj read_obj write_obj parser_obj ${COV_LINK_LIB})
else()
- target_link_libraries(tsfile common_obj compress_obj cwrapper_obj file_obj read_obj write_obj -lgcov)
+ target_link_libraries(tsfile common_obj compress_obj cwrapper_obj file_obj read_obj write_obj ${COV_LINK_LIB})
endif()
else()
message("Disable code cov...")
@@ -171,4 +188,4 @@ set_target_properties(tsfile PROPERTIES SOVERSION ${LIBTSFILE_SO_VERSION})
install(TARGETS tsfile
RUNTIME DESTINATION ${LIBRARY_OUTPUT_PATH}
LIBRARY DESTINATION ${LIBRARY_OUTPUT_PATH}
- ARCHIVE DESTINATION ${LIBRARY_OUTPUT_PATH})
\ No newline at end of file
+ ARCHIVE DESTINATION ${LIBRARY_OUTPUT_PATH})
diff --git a/cpp/src/common/CMakeLists.txt b/cpp/src/common/CMakeLists.txt
index 4406cb219..60e0fdccf 100644
--- a/cpp/src/common/CMakeLists.txt
+++ b/cpp/src/common/CMakeLists.txt
@@ -22,21 +22,15 @@ aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} common_SRC_LIST)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/allocator common_allocator_SRC_LIST)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/container common_container_SRC_LIST)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/tsblock common_tsblock_SRC_LIST)
-aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/mutex common_mutex_SRC_LIST)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/datatype common_datatype_SRC_LIST)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-add_library(common_obj OBJECT ${common_SRC_LIST}
+add_library(common_obj OBJECT ${common_SRC_LIST}
${common_allocator_SRC_LIST}
${common_container_SRC_LIST}
- ${common_tsblock_SRC_LIST}
- ${common_mutex_SRC_LIST}
+ ${common_tsblock_SRC_LIST}
${common_datatype_SRC_LIST})
-if (ENABLE_ANTLR4)
- target_compile_definitions(common_obj PRIVATE ENABLE_ANTLR4)
-endif()
-
# install header files recursively
file(GLOB_RECURSE HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/*.h")
copy_to_dir(${HEADERS} "common_obj")
\ No newline at end of file
diff --git a/cpp/src/common/allocator/alloc_base.h b/cpp/src/common/allocator/alloc_base.h
index c89aed077..dd2e0ab61 100644
--- a/cpp/src/common/allocator/alloc_base.h
+++ b/cpp/src/common/allocator/alloc_base.h
@@ -82,35 +82,43 @@ class ModStat {
}
void init();
void destroy();
- INLINE void update_alloc(AllocModID mid, int32_t size) {
+ INLINE void update_alloc(AllocModID mid, int64_t size) {
#ifdef ENABLE_MEM_STAT
ASSERT(mid < __LAST_MOD_ID);
ATOMIC_FAA(get_item(mid), size);
#endif
}
- void update_free(AllocModID mid, uint32_t size) {
+ void update_free(AllocModID mid, uint64_t size) {
#ifdef ENABLE_MEM_STAT
ASSERT(mid < __LAST_MOD_ID);
- ATOMIC_FAA(get_item(mid), 0 - size);
+ ATOMIC_FAA(get_item(mid), -static_cast(size));
#endif
}
void print_stat();
+ int64_t get_stat(int8_t mid) {
+#ifdef ENABLE_MEM_STAT
+ if (stat_arr_ != NULL && mid < __LAST_MOD_ID)
+ return ATOMIC_FAA(get_item(mid), 0LL);
+#endif
+ return 0;
+ }
+
#ifdef ENABLE_TEST
- int32_t TEST_get_stat(int8_t mid) { return ATOMIC_FAA(get_item(mid), 0); }
+ int64_t TEST_get_stat(int8_t mid) { return ATOMIC_FAA(get_item(mid), 0LL); }
#endif
private:
- INLINE int32_t* get_item(int8_t mid) {
- return &(stat_arr_[mid * (ITEM_SIZE / sizeof(int32_t))]);
+ INLINE int64_t* get_item(int8_t mid) {
+ return &(stat_arr_[mid * (ITEM_SIZE / sizeof(int64_t))]);
}
private:
static const int32_t ITEM_SIZE = CACHE_LINE_SIZE;
static const int32_t ITEM_COUNT = __LAST_MOD_ID;
- int32_t* stat_arr_;
+ int64_t* stat_arr_;
- STATIC_ASSERT((ITEM_SIZE % sizeof(int32_t) == 0), ModStat_ITEM_SIZE_ERROR);
+ STATIC_ASSERT((ITEM_SIZE % sizeof(int64_t) == 0), ModStat_ITEM_SIZE_ERROR);
};
/* base allocator */
diff --git a/cpp/src/common/allocator/byte_stream.h b/cpp/src/common/allocator/byte_stream.h
index 435a1f6fd..ad8dbb90d 100644
--- a/cpp/src/common/allocator/byte_stream.h
+++ b/cpp/src/common/allocator/byte_stream.h
@@ -24,6 +24,7 @@
#include
#include
+#include
#include
#include
@@ -33,51 +34,51 @@
namespace common {
+// std::atomic as the actual storage so the MSVC fallback no longer needs
+// `reinterpret_cast*>(T*)` — that cast is UB because the underlying
+// object was never constructed as a std::atomic. When the caller asks for
+// non-atomic mode we still go through the atomic interface but with
+// memory_order_relaxed, which on x86/ARM compiles to a plain load/store.
+// std::atomic is non-copyable, so neither is OptionalAtomic; existing
+// callers either construct in place or use shallow_clone_from / store.
template
class OptionalAtomic {
public:
OptionalAtomic(T t, bool enable_atomic = false)
: val_(t), enable_atomic_(enable_atomic) {}
+ OptionalAtomic(const OptionalAtomic&) = delete;
+ OptionalAtomic& operator=(const OptionalAtomic&) = delete;
+ OptionalAtomic(OptionalAtomic&&) = delete;
+ OptionalAtomic& operator=(OptionalAtomic&&) = delete;
+
FORCE_INLINE T load() const {
- if (UNLIKELY(enable_atomic_)) {
- return ATOMIC_LOAD(&val_);
- } else {
- return val_;
- }
+ return val_.load(UNLIKELY(enable_atomic_) ? std::memory_order_seq_cst
+ : std::memory_order_relaxed);
}
FORCE_INLINE void store(const T t) {
- if (UNLIKELY(enable_atomic_)) {
- ATOMIC_STORE(&val_, t);
- } else {
- val_ = t;
- }
+ val_.store(t, UNLIKELY(enable_atomic_) ? std::memory_order_seq_cst
+ : std::memory_order_relaxed);
}
FORCE_INLINE T atomic_faa(const T increment) {
- if (UNLIKELY(enable_atomic_)) {
- return ATOMIC_FAA(&val_, increment);
- } else {
- T old_val = val_;
- val_ = val_ + increment;
- return old_val;
- }
+ return val_.fetch_add(increment, UNLIKELY(enable_atomic_)
+ ? std::memory_order_seq_cst
+ : std::memory_order_relaxed);
}
FORCE_INLINE T atomic_aaf(const T increment) {
- if (UNLIKELY(enable_atomic_)) {
- return ATOMIC_AAF(&val_, increment);
- } else {
- val_ = val_ + increment;
- return val_;
- }
+ return val_.fetch_add(increment, UNLIKELY(enable_atomic_)
+ ? std::memory_order_seq_cst
+ : std::memory_order_relaxed) +
+ increment;
}
FORCE_INLINE bool enable_atomic() const { return enable_atomic_; }
private:
- T val_;
+ std::atomic val_;
bool enable_atomic_;
};
@@ -231,6 +232,23 @@ FORCE_INLINE double bytes_to_double(uint8_t bytes[8]) {
// TODO define a WrappedByteStream class
+// Round n up to the next power of two (>=1). Used to normalize ByteStream
+// page sizes so that `& page_mask_` is equivalent to `% page_size_`.
+// Values above the largest power-of-two that fits in uint32_t are clamped to
+// 0x80000000 — the previous `while (ps < n) ps <<= 1` would shift past 2^31
+// and overflow to 0, looping forever.
+FORCE_INLINE uint32_t round_up_pow2(uint32_t n) {
+ if (n <= 1) return 1;
+ if (n > 0x80000000u) return 0x80000000u;
+ uint32_t v = n - 1;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ return v + 1;
+}
+
// auto extend buffer for serialization
class ByteStream {
private:
@@ -253,6 +271,8 @@ class ByteStream {
};
public:
+ static const uint32_t DEFAULT_PAGE_SIZE = 1024;
+
ByteStream(uint32_t page_size, AllocModID mid, bool enable_atomic = false,
BaseAllocator& allocator = g_base_allocator)
: allocator_(allocator),
@@ -262,11 +282,16 @@ class ByteStream {
total_size_(0, enable_atomic),
read_pos_(0),
marked_read_pos_(0),
- page_size_(page_size),
+ // page_mask_ is used as a bitmask in the hot read/write paths
+ // (`x & page_mask_` instead of `x % page_size_`), which only
+ // matches modulo arithmetic when page_size_ is a power of two.
+ // Round up so callers passing non-power-of-2 sizes still get a
+ // correctly-sized page, at the cost of <2x memory in the worst
+ // case (e.g. 1000 → 1024).
+ page_size_(round_up_pow2(page_size)),
+ page_mask_(round_up_pow2(page_size) - 1),
mid_(mid),
- wrapped_page_(false, nullptr) {
- // assert(page_size >= 16); // commented out by gxh on 2023.03.09
- }
+ wrapped_page_(false, nullptr) {}
// for wrap plain buffer to ByteStream
ByteStream(AllocModID mid = MOD_DEFAULT)
@@ -278,6 +303,7 @@ class ByteStream {
read_pos_(0),
marked_read_pos_(0),
page_size_(0),
+ page_mask_(0),
mid_(mid),
wrapped_page_(false, nullptr) {}
@@ -290,7 +316,10 @@ class ByteStream {
wrapped_page_.next_.store(nullptr);
wrapped_page_.buf_ = (uint8_t*)buf;
- page_size_ = buf_len;
+ // page_mask_ is used as a bitmask; only correct for power-of-2
+ // page sizes (see ByteStream ctor comment).
+ page_size_ = round_up_pow2(static_cast(buf_len));
+ page_mask_ = page_size_ - 1;
head_.store(&wrapped_page_);
tail_.store(&wrapped_page_);
total_size_.store(buf_len);
@@ -305,14 +334,14 @@ class ByteStream {
void clear_wrapped_buf() { wrapped_page_.buf_ = nullptr; }
/* ================ Part 1: basic ================ */
- FORCE_INLINE uint32_t remaining_size() const {
+ FORCE_INLINE uint64_t remaining_size() const {
ASSERT(total_size_.load() >= read_pos_);
return total_size_.load() - read_pos_;
}
FORCE_INLINE bool has_remaining() const { return remaining_size() > 0; }
FORCE_INLINE void mark_read_pos() { marked_read_pos_ = read_pos_; }
- FORCE_INLINE uint32_t get_mark_len() const {
+ FORCE_INLINE uint64_t get_mark_len() const {
ASSERT(marked_read_pos_ <= read_pos_);
return read_pos_ - marked_read_pos_;
}
@@ -339,30 +368,46 @@ class ByteStream {
// never used TODO
void shallow_clone_from(ByteStream& other) {
this->page_size_ = other.page_size_;
+ this->page_mask_ = other.page_mask_;
this->mid_ = other.mid_;
this->head_.store(other.head_.load());
this->tail_.store(other.tail_.load());
this->total_size_.store(other.total_size_.load());
}
- FORCE_INLINE uint32_t total_size() const { return total_size_.load(); }
- FORCE_INLINE uint32_t read_pos() const { return read_pos_; };
+ FORCE_INLINE uint64_t total_size() const { return total_size_.load(); }
+ FORCE_INLINE uint64_t read_pos() const { return read_pos_; };
+ // Sum of bytes physically allocated for this stream's pages. For a
+ // wrapped stream this just reports total_size(); for an owning stream
+ // it counts page_size_ per backing page so callers doing memory-pressure
+ // accounting see the real footprint, not the few bytes that happen to
+ // have been written into the latest 64 KiB page.
+ FORCE_INLINE uint64_t allocated_bytes() const {
+ if (is_wrapped()) return total_size_.load();
+ uint64_t total = 0;
+ Page* p = head_.load();
+ while (p != nullptr) {
+ total += page_size_;
+ p = p->next_.load();
+ }
+ return total;
+ }
/**
* Seek the read cursor to an absolute offset. Re-anchors read_page_ for
* multi-page streams.
*/
- void set_read_pos(uint32_t pos) {
+ void set_read_pos(uint64_t pos) {
ASSERT(pos <= total_size());
read_pos_ = pos;
Page* p = head_.load();
- uint32_t skipped = 0;
+ uint64_t skipped = 0;
while (p != nullptr && skipped + page_size_ <= pos) {
skipped += page_size_;
p = p->next_.load();
}
read_page_ = p;
}
- FORCE_INLINE void wrapped_buf_advance_read_pos(uint32_t size) {
+ FORCE_INLINE void wrapped_buf_advance_read_pos(uint64_t size) {
if (size + read_pos_ > total_size_.load()) {
read_pos_ = total_size_.load();
} else {
@@ -380,10 +425,10 @@ class ByteStream {
std::cout << "write_buf error " << ret << std::endl;
return ret;
}
- uint32_t remainder = page_size_ - (total_size_.load() % page_size_);
+ uint32_t remainder = page_size_ - (total_size_.load() & page_mask_);
uint32_t copy_len =
remainder < (len - write_len) ? remainder : (len - write_len);
- memcpy(tail_.load()->buf_ + total_size_.load() % page_size_,
+ memcpy(tail_.load()->buf_ + (total_size_.load() & page_mask_),
buf + write_len, copy_len);
total_size_.atomic_aaf(copy_len);
write_len += copy_len;
@@ -404,11 +449,11 @@ class ByteStream {
if (RET_FAIL(check_space())) {
return ret;
}
- uint32_t remainder = page_size_ - (read_pos_ % page_size_);
+ uint32_t remainder = page_size_ - (read_pos_ & page_mask_);
uint32_t copy_len = remainder < want_len_limited - read_len
? remainder
: want_len_limited - read_len;
- memcpy(buf + read_len, read_page_->buf_ + (read_pos_ % page_size_),
+ memcpy(buf + read_len, read_page_->buf_ + (read_pos_ & page_mask_),
copy_len);
read_len += copy_len;
read_pos_ += copy_len;
@@ -460,16 +505,17 @@ class ByteStream {
return b;
}
b.buf_ =
- (char*)(tail_.load()->buf_ + (total_size_.load() % page_size_));
- b.len_ = page_size_ - (total_size_.load() % page_size_);
+ (char*)(tail_.load()->buf_ + (total_size_.load() & page_mask_));
+ b.len_ = page_size_ - (total_size_.load() & page_mask_);
return b;
}
void buffer_used(uint32_t used_bytes) {
ASSERT(used_bytes >= 1);
// would not span page
- ASSERT((total_size_.load() / page_size_) ==
- ((total_size_.load() + used_bytes - 1) / page_size_));
+ ASSERT(page_size_ == 0 ||
+ (total_size_.load() / page_size_) ==
+ ((total_size_.load() + used_bytes - 1) / page_size_));
total_size_.atomic_aaf(used_bytes);
}
@@ -485,7 +531,7 @@ class ByteStream {
if (RET_FAIL(prepare_space())) {
return ret;
}
- uint32_t remainder = page_size_ - (total_size_.load() % page_size_);
+ uint32_t remainder = page_size_ - (total_size_.load() & page_mask_);
uint32_t step =
remainder < (len - advanced) ? remainder : (len - advanced);
total_size_.atomic_aaf(step);
@@ -504,6 +550,7 @@ class ByteStream {
Page* cur_;
Page* end_;
int64_t total_size_;
+ int64_t consumed_ = 0;
BufferIterator(const ByteStream& bs) : host_(bs) {
cur_ = bs.head_.load();
end_ = bs.tail_.load();
@@ -514,13 +561,17 @@ class ByteStream {
Buffer b;
if (cur_ != nullptr) {
b.buf_ = (char*)cur_->buf_;
- if (cur_ == end_ &&
- host_.total_size_.load() % host_.page_size_ != 0) {
- b.len_ = host_.total_size_.load() % host_.page_size_;
+ if (cur_ == end_) {
+ // Last page: clamp to remaining total_size_. For wrapped
+ // streams page_size_ may have been rounded up past the
+ // user buffer (see wrap_from), so we must not return
+ // page_size_ as the length here.
+ b.len_ = static_cast(total_size_ - consumed_);
} else {
b.len_ = host_.page_size_;
}
ASSERT(b.len_ > 0);
+ consumed_ += b.len_;
cur_ = cur_->next_.load();
}
return b;
@@ -566,7 +617,7 @@ class ByteStream {
// get tail position atomically
Page* host_end = nullptr;
- uint32_t host_total_size = 0;
+ uint64_t host_total_size = 0;
while (true) {
host_end = host_.tail_.load();
host_total_size = host_.total_size_.load();
@@ -577,7 +628,7 @@ class ByteStream {
while (true) {
if (cur_ == host_end) {
- if (host_total_size % host_.page_size_ == 0) {
+ if ((host_total_size & host_.page_mask_) == 0) {
if (read_offset_within_cur_page_ == host_.page_size_) {
return b;
} else {
@@ -591,15 +642,15 @@ class ByteStream {
}
} else {
if (read_offset_within_cur_page_ ==
- (host_total_size % host_.page_size_)) {
+ (host_total_size & host_.page_mask_)) {
return b;
} else {
b.buf_ = ((char*)(cur_->buf_)) +
read_offset_within_cur_page_;
- b.len_ = (host_total_size % host_.page_size_) -
+ b.len_ = (host_total_size & host_.page_mask_) -
read_offset_within_cur_page_;
read_offset_within_cur_page_ =
- (host_total_size % host_.page_size_);
+ (host_total_size & host_.page_mask_);
total_end_offset_ += b.len_;
return b;
}
@@ -629,7 +680,7 @@ class ByteStream {
FORCE_INLINE int prepare_space() {
int ret = common::E_OK;
if (UNLIKELY(tail_.load() == nullptr ||
- total_size_.load() % page_size_ == 0)) {
+ (total_size_.load() & page_mask_) == 0)) {
Page* p = nullptr;
if (RET_FAIL(alloc_page(p))) {
return ret;
@@ -646,7 +697,7 @@ class ByteStream {
}
if (UNLIKELY(read_page_ == nullptr)) {
read_page_ = head_.load();
- } else if (UNLIKELY(read_pos_ % page_size_ == 0)) {
+ } else if (UNLIKELY((read_pos_ & page_mask_) == 0)) {
read_page_ = read_page_->next_.load();
}
if (UNLIKELY(read_page_ == nullptr)) {
@@ -682,10 +733,14 @@ class ByteStream {
OptionalAtomic head_;
OptionalAtomic tail_;
Page* read_page_; // only one thread is allow to reader this ByteStream
- OptionalAtomic total_size_; // total size in byte
- uint32_t read_pos_; // current reader position
- uint32_t marked_read_pos_; // current reader position
+ OptionalAtomic total_size_; // total size in byte
+ // 64-bit so streams that legitimately grow past 4 GiB don't truncate
+ // the read cursor (e.g. concatenated chunk buffers in the writer's
+ // write_stream_ before the next flush).
+ uint64_t read_pos_; // current reader position
+ uint64_t marked_read_pos_; // current reader position
uint32_t page_size_;
+ uint32_t page_mask_; // page_size_ - 1, for bitwise AND instead of modulo
AllocModID mid_;
public:
@@ -1185,6 +1240,7 @@ class SerializationUtil {
// indicates that memory has been allocated and must be freed.
FORCE_INLINE static int read_var_char_ptr(std::string*& str,
ByteStream& in) {
+ str = nullptr;
int ret = common::E_OK;
int32_t len = 0;
int32_t read_len = 0;
@@ -1192,7 +1248,6 @@ class SerializationUtil {
return ret;
} else {
if (len == storage::NO_STR_TO_READ) {
- str = nullptr;
return ret;
} else {
char* tmp_buf =
diff --git a/cpp/src/common/allocator/mem_alloc.cc b/cpp/src/common/allocator/mem_alloc.cc
index 524287e75..b7c5c09c1 100644
--- a/cpp/src/common/allocator/mem_alloc.cc
+++ b/cpp/src/common/allocator/mem_alloc.cc
@@ -95,7 +95,7 @@ void* mem_alloc(uint32_t size, AllocModID mid) {
auto high4b = static_cast(header >> 32);
*reinterpret_cast(raw) = high4b;
*reinterpret_cast(raw + 4) = low4b;
- ModStat::get_instance().update_alloc(mid, static_cast(size));
+ ModStat::get_instance().update_alloc(mid, static_cast(size));
return raw + header_size;
}
@@ -158,7 +158,7 @@ void* mem_realloc(void* ptr, uint32_t size) {
*reinterpret_cast(p) = high4b;
*reinterpret_cast(p + 4) = low4b;
ModStat::get_instance().update_alloc(
- mid, int32_t(size) - int32_t(original_size));
+ mid, int64_t(size) - int64_t(original_size));
return p + ALIGNMENT;
}
@@ -166,9 +166,9 @@ void ModStat::init() {
if (stat_arr_ != NULL) {
return;
}
- stat_arr_ = (int32_t*)(::malloc(ITEM_SIZE * ITEM_COUNT));
+ stat_arr_ = (int64_t*)(::malloc(ITEM_SIZE * ITEM_COUNT));
for (int8_t i = 0; i < __LAST_MOD_ID; i++) {
- int32_t* item = get_item(i);
+ int64_t* item = get_item(i);
*item = 0;
}
}
@@ -183,14 +183,14 @@ void ModStat::print_stat() {
struct Entry {
const char* name;
- int32_t val;
+ int64_t val;
};
Entry entries[__LAST_MOD_ID];
int count = 0;
int64_t total = 0;
for (int i = 0; i < __LAST_MOD_ID; i++) {
- int32_t val = ATOMIC_FAA(get_item(i), 0);
+ int64_t val = ATOMIC_FAA(get_item(i), 0LL);
total += val;
if (val != 0) {
entries[count++] = {g_mod_names[i], val};
diff --git a/cpp/src/common/allocator/page_arena.h b/cpp/src/common/allocator/page_arena.h
index 9b8ce5ef6..c0dfbebb9 100644
--- a/cpp/src/common/allocator/page_arena.h
+++ b/cpp/src/common/allocator/page_arena.h
@@ -47,6 +47,19 @@ class PageArena {
FORCE_INLINE void destroy() { reset(); }
void reset();
+ // Returns the number of bytes actually consumed across all pages.
+ // This is the precise M_meta size: metadata structs are not data-encoded,
+ // so arena used bytes == metadata memory exactly.
+ int64_t get_total_used_bytes() const {
+ int64_t total = 0;
+ Page* p = dummy_head_.next_;
+ while (p) {
+ total += p->cur_alloc_ - reinterpret_cast(p + 1);
+ p = p->next_;
+ }
+ return total;
+ }
+
#ifdef ENABLE_TEST
int TEST_get_page_count() const {
int count = 0;
diff --git a/cpp/src/common/config/config.h b/cpp/src/common/config/config.h
index e2b2039a7..5cf968688 100644
--- a/cpp/src/common/config/config.h
+++ b/cpp/src/common/config/config.h
@@ -36,7 +36,7 @@ typedef struct ConfigValue {
TSEncoding time_encoding_type_;
TSDataType time_data_type_;
CompressionType time_compress_type_;
- int32_t chunk_group_size_threshold_;
+ int64_t chunk_group_size_threshold_;
int32_t record_count_for_next_mem_check_;
bool encrypt_flag_ = false;
TSEncoding boolean_encoding_type_;
@@ -46,14 +46,21 @@ typedef struct ConfigValue {
TSEncoding double_encoding_type_;
TSEncoding string_encoding_type_;
CompressionType default_compression_type_;
+ bool parallel_read_enabled_;
bool parallel_write_enabled_;
- int32_t write_thread_count_;
- // When true, aligned writer enforces page size limit strictly by
- // interleaving time/value writes and sealing pages together when any side
- // becomes full.
- // When false, aligned writer may disable some page-size checks to improve
- // write performance.
- bool strict_page_size_ = true;
+ // Size of the single global worker pool (common::g_thread_pool_) shared by
+ // the parallel write and parallel read paths. The pool is (re)created from
+ // this value in init_common(). Like sync_on_close_/encrypt_flag_ it keeps
+ // its in-class default rather than being reset by init_config_value(), so a
+ // set_thread_count() call made before libtsfile_init() actually sizes the
+ // pool instead of being clobbered by the init-time defaults.
+ int32_t thread_count_ = 6;
+ // Durability knob: when true (default), TsFileIOWriter::end_file() issues
+ // an fsync() before closing so that a process / OS crash cannot leave a
+ // partially-flushed file behind. Disabling this trades durability for
+ // throughput: writes return success as soon as data is in the page cache.
+ // Only set to false if the caller drives its own fsync policy.
+ bool sync_on_close_ = true;
} ConfigValue;
extern void init_config_value();
@@ -62,10 +69,14 @@ extern CompressionType get_default_compressor();
// In the future, configuration items need to be dynamically adjusted according
// to the level
extern void set_config_value();
-extern void config_set_page_max_point_count(uint32_t page_max_point_count);
-extern void config_set_max_degree_of_index_node(
+// Public config setters: validate at the entry point and return
+// E_INVALID_ARG when the requested value is outside the supported range.
+// On rejection the underlying field is left untouched so the writer keeps
+// running with whatever value it had before — callers that don't check the
+// return are no worse off than they were before validation existed.
+extern int config_set_page_max_point_count(uint32_t page_max_point_count);
+extern int config_set_max_degree_of_index_node(
uint32_t max_degree_of_index_node);
-extern void config_set_strict_page_size(bool strict_page_size);
} // namespace common
diff --git a/cpp/src/common/container/bit_map.cc b/cpp/src/common/container/bit_map.cc
index 407605e56..3b1af6ab2 100644
--- a/cpp/src/common/container/bit_map.cc
+++ b/cpp/src/common/container/bit_map.cc
@@ -31,14 +31,15 @@ BitMap::~BitMap() {
}
}
-int BitMap::init(uint32_t item_size, bool init_as_zero) {
+int BitMap::init(uint32_t item_size, bool init_as_zero, AllocModID mod_id) {
uint32_t size = (item_size + 7) / 8;
- bitmap_ = static_cast(mem_alloc(size, MOD_TSBLOCK));
+ bitmap_ = static_cast(mem_alloc(size, mod_id));
// need set to 0, otherwise there will be wrong data
const char initial_char = init_as_zero ? 0x00 : 0xFF;
memset(bitmap_, initial_char, size);
size_ = size;
init_as_zero_ = init_as_zero;
+ has_set_bits_ = !init_as_zero;
return common::E_OK;
}
diff --git a/cpp/src/common/container/bit_map.h b/cpp/src/common/container/bit_map.h
index 757ab1fb1..90ed0e0b6 100644
--- a/cpp/src/common/container/bit_map.h
+++ b/cpp/src/common/container/bit_map.h
@@ -25,16 +25,13 @@
#include
#endif
+#include "common/allocator/alloc_base.h"
#include "utils/errno_define.h"
#include "utils/util_define.h"
namespace common {
-// Cross-platform bit-twiddling helpers. GCC/Clang use their builtins; MSVC
-// uses the equivalent intrinsics from ; any other compiler falls
-// back to a portable loop.
namespace bitops {
-// Population count of an 8-bit value.
FORCE_INLINE int popcount8(uint8_t v) {
#if defined(__GNUC__) || defined(__clang__)
return __builtin_popcount(v);
@@ -49,7 +46,7 @@ FORCE_INLINE int popcount8(uint8_t v) {
return c;
#endif
}
-// Count trailing zero bits. The argument must be non-zero.
+
FORCE_INLINE int ctz_nonzero(uint32_t v) {
#if defined(__GNUC__) || defined(__clang__)
return __builtin_ctz(v);
@@ -66,23 +63,13 @@ FORCE_INLINE int ctz_nonzero(uint32_t v) {
return c;
#endif
}
-// Count trailing zero bits of a 64-bit value. The argument must be non-zero.
-FORCE_INLINE int ctz64_nonzero(uint64_t v) {
+
+FORCE_INLINE int ctz_nonzero(uint64_t v) {
#if defined(__GNUC__) || defined(__clang__)
return __builtin_ctzll(v);
#elif defined(_MSC_VER)
unsigned long idx;
-#if defined(_M_X64) || defined(_M_ARM64)
_BitScanForward64(&idx, v);
-#else
- // 32-bit MSVC has no _BitScanForward64.
- if (static_cast(v) != 0) {
- _BitScanForward(&idx, static_cast(v));
- } else {
- _BitScanForward(&idx, static_cast(v >> 32));
- idx += 32;
- }
-#endif
return static_cast(idx);
#else
int c = 0;
@@ -97,13 +84,19 @@ FORCE_INLINE int ctz64_nonzero(uint64_t v) {
class BitMap {
public:
- BitMap() : bitmap_(nullptr), size_(0), init_as_zero_(true) {}
+ BitMap()
+ : bitmap_(nullptr),
+ size_(0),
+ init_as_zero_(true),
+ has_set_bits_(false) {}
~BitMap();
- int init(uint32_t item_size, bool init_as_zero = true);
+ int init(uint32_t item_size, bool init_as_zero = true,
+ AllocModID mod_id = MOD_TSBLOCK);
FORCE_INLINE void reset() {
const char initial_char = init_as_zero_ ? 0x00 : 0xFF;
memset(bitmap_, initial_char, size_);
+ has_set_bits_ = !init_as_zero_;
}
FORCE_INLINE void set(uint32_t index) {
@@ -113,6 +106,7 @@ class BitMap {
char* start_addr = bitmap_ + offset;
uint8_t bit_mask = get_bit_mask(index);
*start_addr = (*start_addr) | (bit_mask);
+ has_set_bits_ = true;
}
FORCE_INLINE void clear(uint32_t index) {
@@ -124,7 +118,26 @@ class BitMap {
*start_addr = (*start_addr) & (~bit_mask);
}
- FORCE_INLINE void clear_all() { memset(bitmap_, 0x00, size_); }
+ FORCE_INLINE void clear_all() {
+ memset(bitmap_, 0x00, size_);
+ has_set_bits_ = false;
+ }
+
+ // Copy `bytes` of externally-owned bitmap data into this BitMap's buffer
+ // and keep has_set_bits_ in sync. Without this, callers that memcpy
+ // directly into get_bitmap() can leave the has_set_bits_ shortcut stale
+ // and downstream readers (may_have_set_bits()) will falsely treat the
+ // bitmap as empty.
+ FORCE_INLINE void copy_from(const char* src, uint32_t bytes) {
+ ASSERT(bytes <= size_);
+ memcpy(bitmap_, src, bytes);
+ // Conservative: assume the caller-provided bitmap can have set bits.
+ // We could scan to be precise, but the false-positive only costs a
+ // bit of per-cell testing in writers — never silent data loss.
+ if (bytes > 0) {
+ has_set_bits_ = true;
+ }
+ }
FORCE_INLINE bool test(uint32_t index) {
uint32_t offset = index >> 3;
@@ -135,7 +148,6 @@ class BitMap {
return (*start_addr & bit_mask);
}
- // Count the number of bits set to 1 (i.e., number of null entries).
FORCE_INLINE uint32_t count_set_bits() const {
uint32_t count = 0;
const uint8_t* p = reinterpret_cast(bitmap_);
@@ -145,26 +157,21 @@ class BitMap {
return count;
}
- // Find the next set bit (null position) at or after @from,
- // within [0, total_bits). Returns total_bits if none found.
- // Skips zero bytes in bulk so cost is proportional to the number
- // of null bytes, not total rows.
FORCE_INLINE uint32_t next_set_bit(uint32_t from,
uint32_t total_bits) const {
if (from >= total_bits) return total_bits;
const uint8_t* p = reinterpret_cast(bitmap_);
uint32_t byte_idx = from >> 3;
- // Check remaining bits in the first (partial) byte
uint8_t byte_val = p[byte_idx] >> (from & 7);
if (byte_val) {
- return from + bitops::ctz_nonzero(byte_val);
+ return from + bitops::ctz_nonzero(static_cast(byte_val));
}
- // Scan subsequent full bytes, skipping zeros
const uint32_t byte_end = (total_bits + 7) >> 3;
for (++byte_idx; byte_idx < byte_end; ++byte_idx) {
if (p[byte_idx]) {
uint32_t pos =
- (byte_idx << 3) + bitops::ctz_nonzero(p[byte_idx]);
+ (byte_idx << 3) +
+ bitops::ctz_nonzero(static_cast(p[byte_idx]));
return pos < total_bits ? pos : total_bits;
}
}
@@ -175,6 +182,10 @@ class BitMap {
FORCE_INLINE char* get_bitmap() { return bitmap_; }
+ // Fast check: returns false only when guaranteed no bits are set.
+ // May return true even when no bits are actually set (conservative).
+ FORCE_INLINE bool may_have_set_bits() const { return has_set_bits_; }
+
private:
FORCE_INLINE uint8_t get_bit_mask(uint32_t index) {
return 1 << (index & 7);
@@ -184,6 +195,7 @@ class BitMap {
char* bitmap_;
uint32_t size_;
bool init_as_zero_;
+ bool has_set_bits_;
};
} // namespace common
diff --git a/cpp/src/common/container/byte_buffer.h b/cpp/src/common/container/byte_buffer.h
index 88006dac6..4e2dfab15 100644
--- a/cpp/src/common/container/byte_buffer.h
+++ b/cpp/src/common/container/byte_buffer.h
@@ -107,11 +107,11 @@ class ByteBuffer {
// for variable len value
FORCE_INLINE char* read(uint32_t offset, uint32_t* len) {
+ ASSERT(offset + variable_type_len_ <= real_data_size_);
uint32_t tmp;
- // Directly memcpy to avoid potential alignment issues when casting
- // int32_t array pointer
std::memcpy(&tmp, data_ + offset, sizeof(tmp));
*len = tmp;
+ ASSERT(offset + variable_type_len_ + *len <= real_data_size_);
char* p = &data_[offset + variable_type_len_];
return p;
}
@@ -128,4 +128,4 @@ class ByteBuffer {
};
} // namespace common
-#endif // COMMON_CONTAINER_BYTE_BUFFER_H
\ No newline at end of file
+#endif // COMMON_CONTAINER_BYTE_BUFFER_H
diff --git a/cpp/src/common/device_id.cc b/cpp/src/common/device_id.cc
index b35a8593f..e88cdac8a 100644
--- a/cpp/src/common/device_id.cc
+++ b/cpp/src/common/device_id.cc
@@ -144,7 +144,7 @@ int StringArrayDeviceID::deserialize(common::ByteStream& read_stream) {
segments_.clear();
for (uint32_t i = 0; i < num_segments; ++i) {
- std::string* segment;
+ std::string* segment = nullptr;
if (RET_FAIL(common::SerializationUtil::read_var_char_ptr(
segment, read_stream))) {
delete segment;
diff --git a/cpp/src/common/global.cc b/cpp/src/common/global.cc
index b49b55657..cc6c5117f 100644
--- a/cpp/src/common/global.cc
+++ b/cpp/src/common/global.cc
@@ -19,31 +19,31 @@
#include "global.h"
+#ifdef ENABLE_THREADS
+#include "common/thread_pool.h"
+#endif
+
#ifndef _WIN32
#include
+#include // strncasecmp
#endif
#include
+#include // strlen
-#include
-
-#ifdef ENABLE_THREADS
-#include "common/thread_pool.h"
-#endif
#include "utils/injection.h"
-#include "utils/util_define.h" // strncasecmp and other platform-compat shims
+#include "utils/util_define.h" // strncasecmp -> _strnicmp shim on Windows
namespace common {
ColumnSchema g_time_column_schema;
+ConfigValue g_config_value_;
#ifdef ENABLE_THREADS
-ThreadPool* g_write_thread_pool_ = nullptr;
+ThreadPool* g_thread_pool_ = nullptr;
#endif
-ConfigValue g_config_value_;
void init_config_value() {
- g_config_value_.tsblock_mem_inc_step_size_ = 8000; // 8k
- g_config_value_.tsblock_max_memory_ = 64000; // 64k
- // g_config_value_.tsblock_max_memory_ = 32;
+ g_config_value_.tsblock_mem_inc_step_size_ = 8000; // 8k
+ g_config_value_.tsblock_max_memory_ = 2 * 1024 * 1024; // 2 MB
g_config_value_.page_writer_max_point_num_ = 10000;
g_config_value_.page_writer_max_memory_bytes_ = 128 * 1024; // 128 k
g_config_value_.max_degree_of_index_node_ = 256;
@@ -64,19 +64,21 @@ void init_config_value() {
g_config_value_.float_encoding_type_ = GORILLA;
g_config_value_.double_encoding_type_ = GORILLA;
g_config_value_.string_encoding_type_ = PLAIN;
- // Default compression type is LZ4
-#ifdef ENABLE_LZ4
+ // Pick the strongest compressor that was actually compiled in. Gating on
+ // ENABLE_LZ4 while setting SNAPPY (the original code) would request a
+ // compressor that the factory can't produce when the build disables
+ // Snappy, returning nullptr at write time.
+#ifdef ENABLE_SNAPPY
+ g_config_value_.default_compression_type_ = SNAPPY;
+#elif defined(ENABLE_LZ4)
g_config_value_.default_compression_type_ = LZ4;
#else
g_config_value_.default_compression_type_ = UNCOMPRESSED;
#endif
- unsigned int hw_cores = std::thread::hardware_concurrency();
- if (hw_cores == 0) hw_cores = 1; // fallback if detection fails
- g_config_value_.parallel_write_enabled_ = (hw_cores > 1);
- g_config_value_.write_thread_count_ =
- static_cast(std::min(hw_cores, 64u));
- // Enforce aligned page size limits strictly by default.
- g_config_value_.strict_page_size_ = true;
+ g_config_value_.parallel_read_enabled_ = true;
+ g_config_value_.parallel_write_enabled_ = true;
+ // thread_count_ keeps its in-class default (see config.h) so a
+ // set_thread_count() before libtsfile_init() is not reset here.
}
extern TSEncoding get_value_encoder(TSDataType data_type) {
@@ -113,16 +115,20 @@ extern CompressionType get_default_compressor() {
return g_config_value_.default_compression_type_;
}
-void config_set_page_max_point_count(uint32_t page_max_point_count) {
+int config_set_page_max_point_count(uint32_t page_max_point_count) {
+ if (page_max_point_count == 0) {
+ return E_INVALID_ARG;
+ }
g_config_value_.page_writer_max_point_num_ = page_max_point_count;
+ return E_OK;
}
-void config_set_max_degree_of_index_node(uint32_t max_degree_of_index_node) {
+int config_set_max_degree_of_index_node(uint32_t max_degree_of_index_node) {
+ if (max_degree_of_index_node < 2u) {
+ return E_INVALID_ARG;
+ }
g_config_value_.max_degree_of_index_node_ = max_degree_of_index_node;
-}
-
-void config_set_strict_page_size(bool strict_page_size) {
- g_config_value_.strict_page_size_ = strict_page_size;
+ return E_OK;
}
void set_config_value() {}
@@ -145,17 +151,35 @@ int init_common() {
g_time_column_schema.compression_ = UNCOMPRESSED;
g_time_column_schema.column_name_ = storage::TIME_COLUMN_NAME;
#ifdef ENABLE_THREADS
- // (Re)create the global write thread pool with the configured size.
- delete g_write_thread_pool_;
- size_t pool_size =
- g_config_value_.write_thread_count_ > 0
- ? static_cast(g_config_value_.write_thread_count_)
- : size_t{1};
- g_write_thread_pool_ = new ThreadPool(pool_size);
+ // (Re)create the single global worker pool with the configured size. All
+ // parallel write/read paths submit here; torn down in libtsfile_destroy().
+ delete g_thread_pool_;
+ size_t pool_size = g_config_value_.thread_count_ > 0
+ ? static_cast(g_config_value_.thread_count_)
+ : size_t{1};
+ g_thread_pool_ = new ThreadPool(pool_size);
#endif
return ret;
}
+int set_thread_count(int32_t count) {
+ if (count < 1 || count > 64) return E_INVALID_ARG;
+ g_config_value_.thread_count_ = count;
+#ifdef ENABLE_THREADS
+ // If the global pool already exists (libtsfile_init has run) rebuild it at
+ // the new size so the change takes effect immediately instead of only at
+ // the next libtsfile_init(). This joins all current workers and recreates
+ // them, so the caller must ensure no read/write is concurrently using the
+ // pool — intended for setup / benchmark reconfiguration, not mid-operation
+ // resizing.
+ if (g_thread_pool_ != nullptr) {
+ delete g_thread_pool_;
+ g_thread_pool_ = new ThreadPool(static_cast(count));
+ }
+#endif
+ return E_OK;
+}
+
bool is_timestamp_column_name(const char* time_col_name) {
// both "time" and "timestamp" refer to timestamp column.
int32_t len = strlen(time_col_name);
diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h
index 5bee0fa60..ae04c6afa 100644
--- a/cpp/src/common/global.h
+++ b/cpp/src/common/global.h
@@ -29,6 +29,15 @@ namespace common {
extern TSFILE_API ConfigValue g_config_value_;
extern TSFILE_API ColumnSchema g_time_column_schema;
+#ifdef ENABLE_THREADS
+class ThreadPool;
+// The single process-wide worker pool shared by every parallel code path
+// (write column encoding, read column decoding). Created in init_common()
+// and torn down in libtsfile_destroy(); null until libtsfile_init() runs, so
+// every caller must fall back to the serial path when it is null.
+extern TSFILE_API ThreadPool* g_thread_pool_;
+#endif
+
FORCE_INLINE int set_global_time_data_type(uint8_t data_type) {
ASSERT(data_type >= BOOLEAN && data_type <= STRING);
if (data_type != INT64) {
@@ -163,29 +172,28 @@ FORCE_INLINE uint8_t get_global_compression() {
return static_cast(g_config_value_.default_compression_type_);
}
+FORCE_INLINE void set_parallel_read_enabled(bool enabled) {
+ g_config_value_.parallel_read_enabled_ = enabled;
+}
+
+FORCE_INLINE bool get_parallel_read_enabled() {
+ return g_config_value_.parallel_read_enabled_;
+}
+
FORCE_INLINE void set_parallel_write_enabled(bool enabled) {
g_config_value_.parallel_write_enabled_ = enabled;
}
FORCE_INLINE bool get_parallel_write_enabled() {
- return g_config_value_.parallel_write_enabled_ &&
- g_config_value_.write_thread_count_ > 1;
-}
-
-// Set the number of threads for parallel writes. Must be called before
-// init_common() / libtsfile_init() — the global thread pool is created
-// during initialization and is not resized at runtime.
-FORCE_INLINE int set_write_thread_count(int32_t count) {
- if (count < 1 || count > 64) return E_INVALID_ARG;
- g_config_value_.write_thread_count_ = count;
- return E_OK;
+ return g_config_value_.parallel_write_enabled_;
}
-#ifdef ENABLE_THREADS
-class ThreadPool;
-// Global write thread pool, created by init_common().
-extern ThreadPool* g_write_thread_pool_;
-#endif
+// Size of the single global worker pool. Rejects values outside [1, 64] with
+// E_INVALID_ARG, leaving the field untouched. If the pool already exists
+// (libtsfile_init has run) it is rebuilt at the new size immediately; the
+// caller must ensure no read/write is concurrently using the pool. Defined in
+// global.cc (needs the full ThreadPool type).
+extern int set_thread_count(int32_t count);
extern int init_common();
extern bool is_timestamp_column_name(const char* time_col_name);
diff --git a/cpp/src/common/mutex/CMakeLists.txt b/cpp/src/common/mutex/CMakeLists.txt
deleted file mode 100644
index e7ef66faa..000000000
--- a/cpp/src/common/mutex/CMakeLists.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-#[[
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- https://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied. See the License for the
-specific language governing permissions and limitations
-under the License.
-]]
-
-
diff --git a/cpp/src/common/mutex/mutex.h b/cpp/src/common/mutex/mutex.h
deleted file mode 100644
index b35d328de..000000000
--- a/cpp/src/common/mutex/mutex.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef COMMON_MUTEX_MUTEX_H
-#define COMMON_MUTEX_MUTEX_H
-
-#include
-
-#include "utils/util_define.h"
-
-namespace common {
-
-// Thin wrapper over std::mutex. Implemented with the C++11 standard library
-// (instead of pthreads directly) so it builds on every platform, including
-// MSVC where pthreads is not available.
-class Mutex {
- public:
- Mutex() {}
- ~Mutex() {}
-
- void lock() { mutex_.lock(); }
-
- void unlock() { mutex_.unlock(); }
-
- bool try_lock() { return mutex_.try_lock(); }
-
- private:
- std::mutex mutex_;
-};
-
-class MutexGuard {
- public:
- MutexGuard(Mutex& m) : m_(m) { m_.lock(); }
- ~MutexGuard() { m_.unlock(); }
-
- private:
- Mutex& m_;
-};
-
-} // end namespace common
-#endif // COMMON_MUTEX_MUTEX_H
diff --git a/cpp/src/common/path.cc b/cpp/src/common/path.cc
deleted file mode 100644
index d70a9d6c6..000000000
--- a/cpp/src/common/path.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "common/path.h"
-
-#include "common/constant/tsfile_constant.h"
-
-#ifdef ENABLE_ANTLR4
-#include "parser/path_nodes_generator.h"
-#endif
-
-namespace storage {
-
-Path::Path() = default;
-
-Path::Path(std::string& device, std::string& measurement)
- : measurement_(measurement),
- device_id_(std::make_shared(device)) {
- full_path_ = device + "." + measurement;
-}
-
-Path::Path(const std::string& path_sc, bool if_split) {
- if (!path_sc.empty()) {
- if (!if_split) {
- full_path_ = path_sc;
- device_id_ = std::make_shared(path_sc);
- } else {
-#ifdef ENABLE_ANTLR4
- std::vector nodes =
- PathNodesGenerator::invokeParser(path_sc);
-#else
- std::vector nodes =
- IDeviceID::split_string(path_sc, '.');
-#endif
- if (nodes.size() > 1) {
- // Join nodes, then parse like write path / Java Path (not
- // per-segment vector).
- std::string device_joined;
- for (size_t i = 0; i + 1 < nodes.size(); ++i) {
- if (i > 0) {
- device_joined += PATH_SEPARATOR_CHAR;
- }
- device_joined += nodes[i];
- }
- device_id_ =
- std::make_shared(device_joined);
- measurement_ = nodes[nodes.size() - 1];
- full_path_ = device_id_->get_device_name() + "." + measurement_;
- } else {
- full_path_ = path_sc;
- device_id_ = std::make_shared();
- measurement_ = path_sc;
- }
- }
- } else {
- full_path_ = "";
- device_id_ = std::make_shared();
- measurement_ = "";
- }
-}
-
-} // namespace storage
diff --git a/cpp/src/common/path.h b/cpp/src/common/path.h
index 3896b2715..c176d93db 100644
--- a/cpp/src/common/path.h
+++ b/cpp/src/common/path.h
@@ -21,7 +21,12 @@
#include
+#include "common/constant/tsfile_constant.h"
#include "common/device_id.h"
+#ifdef ENABLE_ANTLR4
+#include "parser/generated/PathParser.h"
+#include "parser/path_nodes_generator.h"
+#endif
#include "utils/errno_define.h"
namespace storage {
@@ -31,9 +36,57 @@ struct Path {
std::shared_ptr device_id_;
std::string full_path_;
- Path();
- Path(std::string& device, std::string& measurement);
- Path(const std::string& path_sc, bool if_split = true);
+ Path() {}
+
+ Path(std::string& device, std::string& measurement)
+ : measurement_(measurement),
+ device_id_(std::make_shared(device)) {
+ full_path_ = device + "." + measurement;
+ }
+
+ Path(const std::string& path_sc, bool if_split = true) {
+ if (!path_sc.empty()) {
+ if (!if_split) {
+ full_path_ = path_sc;
+ device_id_ = std::make_shared(path_sc);
+ } else {
+#ifdef ENABLE_ANTLR4
+ std::vector nodes =
+ PathNodesGenerator::invokeParser(path_sc);
+#else
+ std::vector nodes =
+ IDeviceID::split_string(path_sc, '.');
+#endif
+ if (nodes.size() > 1) {
+ // Join nodes, then parse like write path / Java Path
+ // (route through the interpretive string ctor instead of
+ // the literal per-segment vector ctor, so a stored
+ // "root.sg.d1" device matches a query path
+ // "root.sg.d1.s1").
+ std::string device_joined;
+ for (size_t i = 0; i + 1 < nodes.size(); ++i) {
+ if (i > 0) {
+ device_joined += PATH_SEPARATOR_CHAR;
+ }
+ device_joined += nodes[i];
+ }
+ device_id_ =
+ std::make_shared(device_joined);
+ measurement_ = nodes[nodes.size() - 1];
+ full_path_ =
+ device_id_->get_device_name() + "." + measurement_;
+ } else {
+ full_path_ = path_sc;
+ device_id_ = std::make_shared();
+ measurement_ = path_sc;
+ }
+ }
+ } else {
+ full_path_ = "";
+ device_id_ = std::make_shared();
+ measurement_ = "";
+ }
+ }
bool operator==(const Path& path) {
if (measurement_.compare(path.measurement_) == 0 &&
diff --git a/cpp/src/common/seq_tvlist.h b/cpp/src/common/seq_tvlist.h
deleted file mode 100644
index 24805ac5d..000000000
--- a/cpp/src/common/seq_tvlist.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef COMMON_SEQ_TVLIST_H
-#define COMMON_SEQ_TVLIST_H
-
-#include "common/allocator/alloc_base.h"
-#include "common/allocator/page_arena.h"
-#include "common/mutex/mutex.h"
-#include "utils/db_utils.h"
-#include "utils/errno_define.h"
-#include "utils/storage_utils.h"
-#include "utils/util_define.h"
-
-namespace storage {
-
-class SeqTVListBase {
- public:
- SeqTVListBase()
- : data_type_(common::VECTOR),
- mutex_(),
- ref_count_(0),
- primary_array_size_(0),
- list_size_(0),
- write_count_(0),
- page_arena_(common::g_base_allocator),
- use_page_arena_(false),
- is_immutable_(false) {}
- virtual ~SeqTVListBase() {}
- virtual void destroy() {}
-
- FORCE_INLINE void ref() { ATOMIC_AAF(&ref_count_, 1); }
- FORCE_INLINE bool unref() { return 0 == ATOMIC_AAF(&ref_count_, -1); }
-
- FORCE_INLINE void lock() { mutex_.lock(); }
- FORCE_INLINE void unlock() { mutex_.unlock(); }
-
- int32_t get_total_count() const { return write_count_; }
- common::TSDataType get_data_type() const { return data_type_; }
- virtual TimeRange get_time_range() const = 0;
- void mark_immutable() { is_immutable_ = true; }
- bool is_immutable() const { return is_immutable_; }
-
- protected:
- common::TSDataType data_type_;
- mutable common::Mutex mutex_;
- int32_t ref_count_;
- int32_t primary_array_size_;
- int32_t list_size_;
- int32_t write_count_;
- common::PageArena page_arena_;
- bool use_page_arena_;
- bool is_immutable_;
-};
-
-template
-class SeqTVList : public SeqTVListBase {
- public:
- typedef struct TV {
- int64_t time_;
- Type value_;
- } TV;
-
- struct Iterator {
- SeqTVList* host_list_;
- int32_t read_idx_;
- int32_t end_idx_;
-
- Iterator() : host_list_(nullptr), read_idx_(UINT32_MAX), end_idx_(0) {}
-
- INLINE void init(SeqTVList* host, int32_t start_idx, int32_t end_idx) {
- host_list_ = host;
- read_idx_ = start_idx;
- end_idx_ = end_idx;
- }
-
- int next(TV& tv) {
- if (read_idx_ >= end_idx_) {
- return common::E_NO_MORE_DATA;
- }
- tv = host_list_->at(read_idx_);
- read_idx_++;
- return common::E_OK;
- }
- };
-
- public:
- SeqTVList() : tv_array_list_(nullptr), last_time_(-1) {
- data_type_ = common::GetDataTypeFromTemplateType();
- }
- virtual ~SeqTVList() {}
-
- int init(int32_t primary_array_size, int32_t max_count,
- bool use_page_arena);
- void destroy() OVERRIDE;
-
- int push(int64_t time, Type value);
- int push_without_lock(int64_t time, Type value);
- Iterator scan_without_lock(int64_t start_time, int64_t end_time);
- Iterator scan_without_lock();
-
- TimeRange get_time_range() const OVERRIDE {
- TimeRange time_range;
- common::MutexGuard mg(mutex_);
- if (write_count_ > 0) {
- time_range.start_time_ = time_at(0);
- time_range.end_time_ = time_at(write_count_ - 1);
- ASSERT(time_range.start_time_ <= time_range.end_time_);
- }
- return time_range;
- }
-
- FORCE_INLINE TV at(int32_t tv_idx) const {
- ASSERT(tv_idx < write_count_);
- int32_t list_idx = tv_idx / primary_array_size_;
- int32_t list_offset = tv_idx % primary_array_size_;
- return tv_array_list_[list_idx][list_offset];
- }
-
- FORCE_INLINE int64_t time_at(int32_t tv_idx) const {
- return at(tv_idx).time_;
- }
-
-#ifdef ENABLE_TEST
- int32_t TEST_binary_search_upper(int64_t time) {
- return binary_search_upper(time);
- }
- int32_t TEST_binary_search_lower(int64_t time) {
- return binary_search_lower(time);
- }
-#endif
-
- private:
- FORCE_INLINE void* alloc(uint32_t size) {
- if (use_page_arena_) {
- return page_arena_.alloc(size);
- } else {
- return common::mem_alloc(size, common::MOD_TVLIST_DATA);
- }
- }
-
- // return the first tv which is larger or equal to @time
- int32_t binary_search_upper(int64_t time);
- // return the last tv which is less or equal to @time
- int32_t binary_search_lower(int64_t time);
-
- private:
- TV** tv_array_list_;
- int64_t last_time_;
-};
-
-} // namespace storage
-
-#include "seq_tvlist.inc"
-
-#endif // COMMON_SEQ_TVLIST_H
diff --git a/cpp/src/common/seq_tvlist.inc b/cpp/src/common/seq_tvlist.inc
deleted file mode 100644
index c25e49f45..000000000
--- a/cpp/src/common/seq_tvlist.inc
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-// #include "seq_tvlist.h"
-#include
-#include
-#include
-#include "common/mutex/mutex.h"
-#include "common/logger/elog.h"
-
-
-namespace storage
-{
-
-template
-int SeqTVList::init(int32_t primary_array_size,
- int32_t max_count,
- bool use_page_arena)
-{
- if (primary_array_size > max_count) {
- //common:://log_err("TVList init error, primary_array_size=%u, max_count=%u", primary_array_size, max_count);
- return common::E_INVALID_ARG;
- }
- use_page_arena_ = use_page_arena;
-
- primary_array_size_ = primary_array_size;
- list_size_ = (max_count / primary_array_size_) +
- (max_count % primary_array_size_ == 0 ? 0 : 1);
-
- int32_t alloc_size = sizeof(TV) * list_size_;
- tv_array_list_ = (TV**)alloc(alloc_size);
- if (tv_array_list_ == nullptr) {
- return common::E_OOM;
- }
- memset(tv_array_list_, 0, alloc_size);
- write_count_ = 0;
- if (use_page_arena_) {
- // TODO make it configurable
- page_arena_.init(sizeof(TV) * primary_array_size_ * 4, common::MOD_TVLIST_OBJ);
- }
- return common::E_OK;
-}
-
-template
-int SeqTVList::push(int64_t time, Type value)
-{
- common::MutexGuard mg(mutex_);
- return push_without_lock(time, value);
-};
-
-template
-int SeqTVList::push_without_lock(int64_t time, Type value)
-{
- if (UNLIKELY(time <= last_time_)) {
- return common::E_OUT_OF_ORDER;
- }
- if (UNLIKELY(write_count_ >= list_size_ * primary_array_size_)) {
- return common::E_OVERFLOW;
- }
-
- int32_t list_idx = write_count_ / primary_array_size_;
- int32_t list_offset = write_count_ % primary_array_size_;
- if (UNLIKELY(list_offset == 0)) {
- ASSERT(tv_array_list_[list_idx] == nullptr);
- tv_array_list_[list_idx] = static_cast(alloc(sizeof(TV) * primary_array_size_));
- if (UNLIKELY(tv_array_list_[list_idx] == nullptr)) {
- return common::E_OOM;
- }
- }
-
- TV insert_tv;
- insert_tv.time_ = time;
- insert_tv.value_ = value;
-#if STORAGE_ENGINE_DEBUG
- std::cout << "tvlist[" << list_idx << "][" << list_offset << "] = (" << time << ", " << value << ")" << std::endl;
-#endif
- tv_array_list_[list_idx][list_offset] = insert_tv;
- write_count_++;
- last_time_ = time;
- return common::E_OK;
-};
-
-template
-void SeqTVList::destroy()
-{
- if (use_page_arena_) {
- page_arena_.destroy();
- } else {
- int32_t list_size = write_count_ / primary_array_size_
- + (write_count_ % primary_array_size_ == 0 ? 0 : 1);
- for (int i = 0; i < list_size; i++) {
- common::mem_free(tv_array_list_[i]);
- }
- common::mem_free(tv_array_list_);
- }
-}
-
-template
-typename SeqTVList::Iterator SeqTVList::scan_without_lock(int64_t start_time, int64_t end_time)
-{
- ASSERT(start_time < end_time);
- int32_t start_idx = binary_search_lower(start_time);
- int32_t end_idx = binary_search_upper(end_time);
- ASSERT(start_idx <= end_time + 1);
- SeqTVList::Iterator iter;
- iter.init(this, start_idx, end_idx);
- return iter;
-}
-
-template
-typename SeqTVList::Iterator SeqTVList::scan_without_lock()
-{
- SeqTVList::Iterator iter;
- iter.init(this, 0, write_count_);
- return iter;
-}
-
-// return the first tv which is larger or equal to @time
-template
-int32_t SeqTVList::binary_search_lower(int64_t time)
-{
- int32_t start = -1;
- int32_t end = write_count_;
-
- // arr[start] < time <= arr[end]
- while (start + 1 != end) {
- int mid = (start + end) / 2;
- int64_t mid_time = time_at(mid);
- if (mid_time < time) {
- start = mid;
- } else {
- end = mid;
- }
- }
- return end;
-}
-
-// return the last tv which is less or equal to @time
-template
-int32_t SeqTVList::binary_search_upper(int64_t time)
-{
- int32_t start = 0;
- int32_t end = write_count_;
-
- // arr[start] <= time < arr[end]
- while (start + 1 != end) {
- int mid = (start + end) / 2;
- int64_t mid_time = time_at(mid);
- if (mid_time <= time) {
- start = mid;
- } else {
- end = mid;
- }
- }
- return start;
-}
-
-} // namespace storage
-
diff --git a/cpp/src/common/statistic.h b/cpp/src/common/statistic.h
index bced66173..3d45b4f43 100644
--- a/cpp/src/common/statistic.h
+++ b/cpp/src/common/statistic.h
@@ -22,12 +22,18 @@
#include
+#include
#include
#include "common/allocator/alloc_base.h"
#include "common/allocator/byte_stream.h"
#include "common/db_common.h"
+#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+#include
+#define TSFILE_HAS_NEON 1
+#endif
+
namespace storage {
/*
@@ -176,6 +182,48 @@ class Statistic {
}
virtual FORCE_INLINE void update(int64_t time) { ASSERT(false); }
+ virtual void update_time_batch(const int64_t* timestamps, uint32_t count) {
+ for (uint32_t i = 0; i < count; i++) {
+ update(timestamps[i]);
+ }
+ }
+ virtual void update_batch(const int64_t* timestamps, const bool* values,
+ uint32_t count) {
+ for (uint32_t i = 0; i < count; i++) {
+ update(timestamps[i], values[i]);
+ }
+ }
+ virtual void update_batch(const int64_t* timestamps, const int32_t* values,
+ uint32_t count) {
+ for (uint32_t i = 0; i < count; i++) {
+ update(timestamps[i], values[i]);
+ }
+ }
+ virtual void update_batch(const int64_t* timestamps, const int64_t* values,
+ uint32_t count) {
+ for (uint32_t i = 0; i < count; i++) {
+ update(timestamps[i], values[i]);
+ }
+ }
+ virtual void update_batch(const int64_t* timestamps, const float* values,
+ uint32_t count) {
+ for (uint32_t i = 0; i < count; i++) {
+ update(timestamps[i], values[i]);
+ }
+ }
+ virtual void update_batch(const int64_t* timestamps, const double* values,
+ uint32_t count) {
+ for (uint32_t i = 0; i < count; i++) {
+ update(timestamps[i], values[i]);
+ }
+ }
+ virtual void update_batch(const int64_t* timestamps,
+ const common::String* values, uint32_t count) {
+ for (uint32_t i = 0; i < count; i++) {
+ update(timestamps[i], values[i]);
+ }
+ }
+
virtual int serialize_to(common::ByteStream& out) {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::write_var_uint(count_, out))) {
@@ -554,17 +602,17 @@ class BooleanStatistic : public Statistic {
last_value_ = that.last_value_;
}
- FORCE_INLINE void reset() {
+ FORCE_INLINE void reset() override {
count_ = 0;
sum_value_ = 0;
first_value_ = false;
last_value_ = false;
}
- FORCE_INLINE void update(int64_t time, bool value) {
+ FORCE_INLINE void update(int64_t time, bool value) override {
BOOL_STAT_UPDATE(time, value);
}
- int serialize_typed_stat(common::ByteStream& out) {
+ int serialize_typed_stat(common::ByteStream& out) override {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::write_ui8(first_value_ ? 1 : 0,
out))) {
@@ -575,7 +623,7 @@ class BooleanStatistic : public Statistic {
}
return ret;
}
- int deserialize_typed_stat(common::ByteStream& in) {
+ int deserialize_typed_stat(common::ByteStream& in) override {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::read_ui8((uint8_t&)first_value_,
in))) {
@@ -587,13 +635,15 @@ class BooleanStatistic : public Statistic {
return ret;
}
- FORCE_INLINE common::TSDataType get_type() { return common::BOOLEAN; }
+ FORCE_INLINE common::TSDataType get_type() override {
+ return common::BOOLEAN;
+ }
- int merge_with(Statistic* stat) {
+ int merge_with(Statistic* stat) override {
MERGE_BOOL_STAT_FROM(BooleanStatistic, stat);
}
- int deep_copy_from(Statistic* stat) {
+ int deep_copy_from(Statistic* stat) override {
DEEP_COPY_BOOL_STAT_FROM(BooleanStatistic, stat);
}
};
@@ -625,7 +675,7 @@ class Int32Statistic : public Statistic {
last_value_ = that.last_value_;
}
- FORCE_INLINE void reset() {
+ FORCE_INLINE void reset() override {
count_ = 0;
sum_value_ = 0;
min_value_ = 0;
@@ -634,13 +684,41 @@ class Int32Statistic : public Statistic {
last_value_ = 0;
}
- FORCE_INLINE void update(int64_t time, int32_t value) {
+ FORCE_INLINE void update(int64_t time, int32_t value) override {
NUM_STAT_UPDATE(time, value);
}
- FORCE_INLINE common::TSDataType get_type() { return common::INT32; }
+ void update_batch(const int64_t* timestamps, const int32_t* values,
+ uint32_t count) override {
+ if (count == 0) return;
+ uint32_t start = 0;
+ if (count_ == 0) {
+ start_time_ = timestamps[0];
+ end_time_ = timestamps[0];
+ first_value_ = values[0];
+ last_value_ = values[0];
+ min_value_ = values[0];
+ max_value_ = values[0];
+ sum_value_ = (int64_t)values[0];
+ count_ = 1;
+ start = 1;
+ }
+ for (uint32_t i = start; i < count; i++) {
+ if (timestamps[i] < start_time_) start_time_ = timestamps[i];
+ if (timestamps[i] > end_time_) end_time_ = timestamps[i];
+ if (values[i] < min_value_) min_value_ = values[i];
+ if (values[i] > max_value_) max_value_ = values[i];
+ sum_value_ += (int64_t)values[i];
+ }
+ last_value_ = values[count - 1];
+ count_ += (count - start);
+ }
+
+ FORCE_INLINE common::TSDataType get_type() override {
+ return common::INT32;
+ }
- int serialize_typed_stat(common::ByteStream& out) {
+ int serialize_typed_stat(common::ByteStream& out) override {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::write_ui32(min_value_, out))) {
} else if (RET_FAIL(common::SerializationUtil::write_ui32(max_value_,
@@ -654,7 +732,7 @@ class Int32Statistic : public Statistic {
}
return ret;
}
- int deserialize_typed_stat(common::ByteStream& in) {
+ int deserialize_typed_stat(common::ByteStream& in) override {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::read_ui32((uint32_t&)min_value_,
in))) {
@@ -676,15 +754,15 @@ class Int32Statistic : public Statistic {
// << std::endl;
return ret;
}
- int merge_with(Statistic* stat) {
+ int merge_with(Statistic* stat) override {
MERGE_NUM_STAT_FROM(Int32Statistic, stat);
}
- int deep_copy_from(Statistic* stat) {
+ int deep_copy_from(Statistic* stat) override {
DEEP_COPY_NUM_STAT_FROM(Int32Statistic, stat);
}
- std::string to_string() const {
+ std::string to_string() const override {
std::ostringstream oss;
oss << "{count=" << count_ << ", start_time=" << start_time_
<< ", end_time=" << end_time_ << ", first_val=" << first_value_
@@ -696,7 +774,7 @@ class Int32Statistic : public Statistic {
};
class DateStatistic : public Int32Statistic {
- FORCE_INLINE common::TSDataType get_type() { return common::DATE; }
+ FORCE_INLINE common::TSDataType get_type() override { return common::DATE; }
};
class Int64Statistic : public Statistic {
@@ -726,7 +804,7 @@ class Int64Statistic : public Statistic {
last_value_ = that.last_value_;
}
- FORCE_INLINE void reset() {
+ FORCE_INLINE void reset() override {
count_ = 0;
sum_value_ = 0;
min_value_ = 0;
@@ -734,13 +812,69 @@ class Int64Statistic : public Statistic {
first_value_ = 0;
last_value_ = 0;
}
- FORCE_INLINE void update(int64_t time, int64_t value) {
+ FORCE_INLINE void update(int64_t time, int64_t value) override {
NUM_STAT_UPDATE(time, value);
}
- FORCE_INLINE common::TSDataType get_type() { return common::INT64; }
+ void update_batch(const int64_t* timestamps, const int64_t* values,
+ uint32_t count) override {
+ if (count == 0) return;
+ uint32_t start = 0;
+ if (count_ == 0) {
+ start_time_ = timestamps[0];
+ end_time_ = timestamps[0];
+ first_value_ = values[0];
+ last_value_ = values[0];
+ min_value_ = values[0];
+ max_value_ = values[0];
+ sum_value_ = (double)values[0];
+ count_ = 1;
+ start = 1;
+ }
+ // Timestamps are monotonic (verified by TimePageWriter),
+ // so only first/last matter for start_time_/end_time_.
+ if (count > start) {
+ if (timestamps[start] < start_time_)
+ start_time_ = timestamps[start];
+ if (timestamps[count - 1] > end_time_)
+ end_time_ = timestamps[count - 1];
+ }
+ uint32_t i = start;
+#if TSFILE_HAS_NEON
+ {
+ int64x2_t vmin = vdupq_n_s64(min_value_);
+ int64x2_t vmax = vdupq_n_s64(max_value_);
+ float64x2_t vsum = vdupq_n_f64(0.0);
+ for (; i + 2 <= count; i += 2) {
+ int64x2_t v = vld1q_s64(&values[i]);
+ // min/max via compare+select (no vminq_s64 in NEON)
+ uint64x2_t lt = vcltq_s64(v, vmin);
+ vmin = vbslq_s64(lt, v, vmin);
+ uint64x2_t gt = vcgtq_s64(v, vmax);
+ vmax = vbslq_s64(gt, v, vmax);
+ vsum = vaddq_f64(vsum, vcvtq_f64_s64(v));
+ }
+ min_value_ =
+ std::min(vgetq_lane_s64(vmin, 0), vgetq_lane_s64(vmin, 1));
+ max_value_ =
+ std::max(vgetq_lane_s64(vmax, 0), vgetq_lane_s64(vmax, 1));
+ sum_value_ += vgetq_lane_f64(vsum, 0) + vgetq_lane_f64(vsum, 1);
+ }
+#endif
+ for (; i < count; i++) {
+ if (values[i] < min_value_) min_value_ = values[i];
+ if (values[i] > max_value_) max_value_ = values[i];
+ sum_value_ += (double)values[i];
+ }
+ last_value_ = values[count - 1];
+ count_ += (count - start);
+ }
+
+ FORCE_INLINE common::TSDataType get_type() override {
+ return common::INT64;
+ }
- int serialize_typed_stat(common::ByteStream& out) {
+ int serialize_typed_stat(common::ByteStream& out) override {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::write_ui64(min_value_, out))) {
} else if (RET_FAIL(common::SerializationUtil::write_ui64(max_value_,
@@ -754,7 +888,7 @@ class Int64Statistic : public Statistic {
}
return ret;
}
- int deserialize_typed_stat(common::ByteStream& in) {
+ int deserialize_typed_stat(common::ByteStream& in) override {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::read_ui64((uint64_t&)min_value_,
in))) {
@@ -769,15 +903,15 @@ class Int64Statistic : public Statistic {
}
return ret;
}
- int merge_with(Statistic* stat) {
+ int merge_with(Statistic* stat) override {
MERGE_NUM_STAT_FROM(Int64Statistic, stat);
}
- int deep_copy_from(Statistic* stat) {
+ int deep_copy_from(Statistic* stat) override {
DEEP_COPY_NUM_STAT_FROM(Int64Statistic, stat);
}
- std::string to_string() const {
+ std::string to_string() const override {
std::ostringstream oss;
oss << "{count=" << count_ << ", start_time=" << start_time_
<< ", end_time=" << end_time_ << ", first_val=" << first_value_
@@ -815,7 +949,7 @@ class FloatStatistic : public Statistic {
last_value_ = that.last_value_;
}
- FORCE_INLINE void reset() {
+ FORCE_INLINE void reset() override {
count_ = 0;
sum_value_ = 0;
min_value_ = 0;
@@ -823,13 +957,15 @@ class FloatStatistic : public Statistic {
first_value_ = 0;
last_value_ = 0;
}
- FORCE_INLINE void update(int64_t time, float value) {
+ FORCE_INLINE void update(int64_t time, float value) override {
NUM_STAT_UPDATE(time, value);
}
- FORCE_INLINE common::TSDataType get_type() { return common::FLOAT; }
+ FORCE_INLINE common::TSDataType get_type() override {
+ return common::FLOAT;
+ }
- int serialize_typed_stat(common::ByteStream& out) {
+ int serialize_typed_stat(common::ByteStream& out) override {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::write_float(min_value_, out))) {
} else if (RET_FAIL(common::SerializationUtil::write_float(max_value_,
@@ -843,7 +979,7 @@ class FloatStatistic : public Statistic {
}
return ret;
}
- int deserialize_typed_stat(common::ByteStream& in) {
+ int deserialize_typed_stat(common::ByteStream& in) override {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::read_float(min_value_, in))) {
} else if (RET_FAIL(
@@ -857,10 +993,10 @@ class FloatStatistic : public Statistic {
}
return ret;
}
- int merge_with(Statistic* stat) {
+ int merge_with(Statistic* stat) override {
MERGE_NUM_STAT_FROM(FloatStatistic, stat);
}
- int deep_copy_from(Statistic* stat) {
+ int deep_copy_from(Statistic* stat) override {
DEEP_COPY_NUM_STAT_FROM(FloatStatistic, stat);
}
};
@@ -892,7 +1028,7 @@ class DoubleStatistic : public Statistic {
last_value_ = that.last_value_;
}
- FORCE_INLINE void reset() {
+ FORCE_INLINE void reset() override {
count_ = 0;
sum_value_ = 0;
min_value_ = 0;
@@ -900,13 +1036,64 @@ class DoubleStatistic : public Statistic {
first_value_ = 0;
last_value_ = 0;
}
- FORCE_INLINE void update(int64_t time, double value) {
+ FORCE_INLINE void update(int64_t time, double value) override {
NUM_STAT_UPDATE(time, value);
}
- FORCE_INLINE common::TSDataType get_type() { return common::DOUBLE; }
+ void update_batch(const int64_t* timestamps, const double* values,
+ uint32_t count) override {
+ if (count == 0) return;
+ uint32_t start = 0;
+ if (count_ == 0) {
+ start_time_ = timestamps[0];
+ end_time_ = timestamps[0];
+ first_value_ = values[0];
+ last_value_ = values[0];
+ min_value_ = values[0];
+ max_value_ = values[0];
+ sum_value_ = values[0];
+ count_ = 1;
+ start = 1;
+ }
+ if (count > start) {
+ if (timestamps[start] < start_time_)
+ start_time_ = timestamps[start];
+ if (timestamps[count - 1] > end_time_)
+ end_time_ = timestamps[count - 1];
+ }
+ uint32_t i = start;
+#if TSFILE_HAS_NEON
+ {
+ float64x2_t vmin = vdupq_n_f64(min_value_);
+ float64x2_t vmax = vdupq_n_f64(max_value_);
+ float64x2_t vsum = vdupq_n_f64(0.0);
+ for (; i + 2 <= count; i += 2) {
+ float64x2_t v = vld1q_f64(&values[i]);
+ vmin = vminq_f64(vmin, v);
+ vmax = vmaxq_f64(vmax, v);
+ vsum = vaddq_f64(vsum, v);
+ }
+ min_value_ =
+ std::min(vgetq_lane_f64(vmin, 0), vgetq_lane_f64(vmin, 1));
+ max_value_ =
+ std::max(vgetq_lane_f64(vmax, 0), vgetq_lane_f64(vmax, 1));
+ sum_value_ += vgetq_lane_f64(vsum, 0) + vgetq_lane_f64(vsum, 1);
+ }
+#endif
+ for (; i < count; i++) {
+ if (values[i] < min_value_) min_value_ = values[i];
+ if (values[i] > max_value_) max_value_ = values[i];
+ sum_value_ += values[i];
+ }
+ last_value_ = values[count - 1];
+ count_ += (count - start);
+ }
+
+ FORCE_INLINE common::TSDataType get_type() override {
+ return common::DOUBLE;
+ }
- int serialize_typed_stat(common::ByteStream& out) {
+ int serialize_typed_stat(common::ByteStream& out) override {
int ret = common::E_OK;
if (RET_FAIL(
common::SerializationUtil::write_double(min_value_, out))) {
@@ -921,7 +1108,7 @@ class DoubleStatistic : public Statistic {
}
return ret;
}
- int deserialize_typed_stat(common::ByteStream& in) {
+ int deserialize_typed_stat(common::ByteStream& in) override {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::read_double(min_value_, in))) {
} else if (RET_FAIL(common::SerializationUtil::read_double(max_value_,
@@ -935,10 +1122,10 @@ class DoubleStatistic : public Statistic {
}
return ret;
}
- int merge_with(Statistic* stat) {
+ int merge_with(Statistic* stat) override {
MERGE_NUM_STAT_FROM(DoubleStatistic, stat);
}
- int deep_copy_from(Statistic* stat) {
+ int deep_copy_from(Statistic* stat) override {
DEEP_COPY_NUM_STAT_FROM(DoubleStatistic, stat);
}
};
@@ -960,30 +1147,50 @@ class TimeStatistic : public Statistic {
end_time_ = that.end_time_;
}
- FORCE_INLINE void reset() {
+ FORCE_INLINE void reset() override {
count_ = 0;
start_time_ = 0;
end_time_ = 0;
}
- FORCE_INLINE void update(int64_t time) {
+ FORCE_INLINE void update(int64_t time) override {
TIME_STAT_UPDATE((time));
count_++;
}
- FORCE_INLINE common::TSDataType get_type() { return common::VECTOR; }
+ void update_time_batch(const int64_t* timestamps, uint32_t count) override {
+ if (count == 0) return;
+ if (count_ == 0) {
+ start_time_ = timestamps[0];
+ end_time_ = timestamps[0];
+ }
+ // Timestamps are already verified monotonic in TimePageWriter,
+ // so first element is min candidate and last is max candidate.
+ if (timestamps[0] < start_time_) start_time_ = timestamps[0];
+ if (timestamps[count - 1] > end_time_)
+ end_time_ = timestamps[count - 1];
+ count_ += count;
+ }
- int serialize_typed_stat(common::ByteStream& out) { return common::E_OK; }
- int deserialize_typed_stat(common::ByteStream& in) { return common::E_OK; }
- int merge_with(Statistic* stat) {
+ FORCE_INLINE common::TSDataType get_type() override {
+ return common::VECTOR;
+ }
+
+ int serialize_typed_stat(common::ByteStream& out) override {
+ return common::E_OK;
+ }
+ int deserialize_typed_stat(common::ByteStream& in) override {
+ return common::E_OK;
+ }
+ int merge_with(Statistic* stat) override {
MERGE_TIME_STAT_FROM(TimeStatistic, stat);
}
- int deep_copy_from(Statistic* stat) {
+ int deep_copy_from(Statistic* stat) override {
DEEP_COPY_TIME_STAT_FROM(TimeStatistic, stat);
}
- std::string to_string() const {
+ std::string to_string() const override {
std::ostringstream oss;
oss << "{count=" << count_ << ", start_time=" << start_time_
<< ", end_time=" << end_time_ << "}";
@@ -992,7 +1199,9 @@ class TimeStatistic : public Statistic {
};
class TimestampStatistics : public Int64Statistic {
- FORCE_INLINE common::TSDataType get_type() { return common::TIMESTAMP; }
+ FORCE_INLINE common::TSDataType get_type() override {
+ return common::TIMESTAMP;
+ }
};
class StringStatistic : public Statistic {
@@ -1002,35 +1211,24 @@ class StringStatistic : public Statistic {
common::String first_value_;
common::String last_value_;
StringStatistic()
- : min_value_(),
- max_value_(),
- first_value_(),
- last_value_(),
- pa_(nullptr),
- owns_pa_(true) {
+ : min_value_(), max_value_(), first_value_(), last_value_() {
pa_ = new common::PageArena();
pa_->init(512, common::MOD_STATISTIC_OBJ);
}
StringStatistic(common::PageArena* pa)
- : min_value_(),
- max_value_(),
- first_value_(),
- last_value_(),
- pa_(pa),
- owns_pa_(false) {}
+ : min_value_(), max_value_(), first_value_(), last_value_(), pa_(pa) {}
~StringStatistic() { destroy(); }
- void destroy() {
- if (owns_pa_ && pa_) {
+ void destroy() override {
+ if (pa_) {
delete pa_;
pa_ = nullptr;
}
- owns_pa_ = false;
}
- FORCE_INLINE void reset() {
+ FORCE_INLINE void reset() override {
count_ = 0;
start_time_ = 0;
end_time_ = 0;
@@ -1050,13 +1248,15 @@ class StringStatistic : public Statistic {
last_value_.dup_from(that.last_value_, *pa_);
}
- FORCE_INLINE void update(int64_t time, common::String value) {
+ FORCE_INLINE void update(int64_t time, common::String value) override {
STRING_STAT_UPDATE(time, value);
}
- FORCE_INLINE common::TSDataType get_type() { return common::STRING; }
+ FORCE_INLINE common::TSDataType get_type() override {
+ return common::STRING;
+ }
- int serialize_typed_stat(common::ByteStream& out) {
+ int serialize_typed_stat(common::ByteStream& out) override {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::write_str(first_value_, out))) {
} else if (RET_FAIL(common::SerializationUtil::write_str(last_value_,
@@ -1068,7 +1268,7 @@ class StringStatistic : public Statistic {
}
return ret;
}
- int deserialize_typed_stat(common::ByteStream& in) {
+ int deserialize_typed_stat(common::ByteStream& in) override {
int ret = common::E_OK;
if (RET_FAIL(
common::SerializationUtil::read_str(first_value_, pa_, in))) {
@@ -1081,42 +1281,39 @@ class StringStatistic : public Statistic {
}
return ret;
}
- int merge_with(Statistic* stat) {
+ int merge_with(Statistic* stat) override {
MERGE_STRING_STAT_FROM(StringStatistic, stat);
}
- int deep_copy_from(Statistic* stat) {
+ int deep_copy_from(Statistic* stat) override {
DEEP_COPY_STRING_STAT_FROM(StringStatistic, stat);
}
private:
common::PageArena* pa_;
- bool owns_pa_;
};
class TextStatistic : public Statistic {
public:
common::String first_value_;
common::String last_value_;
- TextStatistic()
- : first_value_(), last_value_(), pa_(nullptr), owns_pa_(true) {
+ TextStatistic() : first_value_(), last_value_() {
pa_ = new common::PageArena();
pa_->init(512, common::MOD_STATISTIC_OBJ);
}
TextStatistic(common::PageArena* pa)
- : first_value_(), last_value_(), pa_(pa), owns_pa_(false) {}
+ : first_value_(), last_value_(), pa_(pa) {}
~TextStatistic() { destroy(); }
- void destroy() {
- if (owns_pa_ && pa_) {
+ void destroy() override {
+ if (pa_) {
delete pa_;
pa_ = nullptr;
}
- owns_pa_ = false;
}
- FORCE_INLINE void reset() {
+ FORCE_INLINE void reset() override {
count_ = 0;
start_time_ = 0;
end_time_ = 0;
@@ -1132,13 +1329,13 @@ class TextStatistic : public Statistic {
last_value_.dup_from(that.last_value_, *pa_);
}
- FORCE_INLINE void update(int64_t time, common::String value) {
+ FORCE_INLINE void update(int64_t time, common::String value) override {
TEXT_STAT_UPDATE(time, value);
}
- FORCE_INLINE common::TSDataType get_type() { return common::TEXT; }
+ FORCE_INLINE common::TSDataType get_type() override { return common::TEXT; }
- int serialize_typed_stat(common::ByteStream& out) {
+ int serialize_typed_stat(common::ByteStream& out) override {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::write_str(first_value_, out))) {
} else if (RET_FAIL(common::SerializationUtil::write_str(last_value_,
@@ -1146,7 +1343,7 @@ class TextStatistic : public Statistic {
}
return ret;
}
- int deserialize_typed_stat(common::ByteStream& in) {
+ int deserialize_typed_stat(common::ByteStream& in) override {
int ret = common::E_OK;
if (RET_FAIL(
common::SerializationUtil::read_str(first_value_, pa_, in))) {
@@ -1155,35 +1352,33 @@ class TextStatistic : public Statistic {
}
return ret;
}
- int merge_with(Statistic* stat) {
+ int merge_with(Statistic* stat) override {
MERGE_TEXT_STAT_FROM(TextStatistic, stat);
}
- int deep_copy_from(Statistic* stat) {
+ int deep_copy_from(Statistic* stat) override {
DEEP_COPY_TEXT_STAT_FROM(TextStatistic, stat);
}
private:
common::PageArena* pa_;
- bool owns_pa_;
};
class BlobStatistic : public Statistic {
public:
- BlobStatistic() : pa_(nullptr), owns_pa_(true) {
+ BlobStatistic() {
pa_ = new common::PageArena();
pa_->init(512, common::MOD_STATISTIC_OBJ);
}
- BlobStatistic(common::PageArena* pa) : pa_(pa), owns_pa_(false) {}
+ BlobStatistic(common::PageArena* pa) {}
~BlobStatistic() { destroy(); }
void destroy() {
- if (owns_pa_ && pa_) {
+ if (pa_) {
delete pa_;
pa_ = nullptr;
}
- owns_pa_ = false;
}
FORCE_INLINE void reset() {
@@ -1214,7 +1409,6 @@ class BlobStatistic : public Statistic {
private:
common::PageArena* pa_;
- bool owns_pa_;
};
FORCE_INLINE uint32_t get_typed_statistic_sizeof(common::TSDataType type) {
diff --git a/cpp/src/common/tablet.cc b/cpp/src/common/tablet.cc
index b9ae5301a..ba37a3245 100644
--- a/cpp/src/common/tablet.cc
+++ b/cpp/src/common/tablet.cc
@@ -20,8 +20,10 @@
#include "tablet.h"
#include
+#include
#include "allocator/alloc_base.h"
+#include "container/bit_map.h"
#include "datatype/date_converter.h"
#include "utils/errno_define.h"
@@ -98,14 +100,13 @@ int Tablet::init() {
case BLOB:
case TEXT:
case STRING: {
- auto* sc = static_cast(common::mem_alloc(
- sizeof(StringColumn), common::MOD_TABLET));
- if (sc == nullptr) return E_OOM;
- new (sc) StringColumn();
- // 8 bytes/row is a conservative initial estimate for short
- // string columns (e.g. device IDs, tags). The buffer grows
- // automatically on demand via mem_realloc.
- sc->init(max_row_num_, max_row_num_ * 8);
+ void* mem =
+ common::mem_alloc(sizeof(StringColumn), common::MOD_TABLET);
+ if (mem == nullptr) {
+ return E_OOM;
+ }
+ auto* sc = new (mem) StringColumn();
+ sc->init(max_row_num_, max_row_num_ * 32);
value_matrix_[c].string_col = sc;
break;
}
@@ -120,8 +121,9 @@ int Tablet::init() {
if (bitmaps_ == nullptr) return E_OOM;
for (size_t c = 0; c < schema_count; c++) {
new (&bitmaps_[c]) BitMap();
- bitmaps_[c].init(max_row_num_, false);
+ bitmaps_[c].init(max_row_num_, false, common::MOD_TABLET);
}
+
return E_OK;
}
@@ -156,6 +158,7 @@ void Tablet::destroy() {
case TEXT:
case STRING:
value_matrix_[c].string_col->destroy();
+ value_matrix_[c].string_col->~StringColumn();
common::mem_free(value_matrix_[c].string_col);
break;
default:
@@ -192,9 +195,7 @@ int Tablet::add_timestamp(uint32_t row_index, int64_t timestamp) {
}
int Tablet::set_timestamps(const int64_t* timestamps, uint32_t count) {
- if (err_code_ != E_OK) {
- return err_code_;
- }
+ if (err_code_ != E_OK) return err_code_;
ASSERT(timestamps_ != NULL);
if (UNLIKELY(count > static_cast(max_row_num_))) {
return E_OUT_OF_RANGE;
@@ -206,15 +207,10 @@ int Tablet::set_timestamps(const int64_t* timestamps, uint32_t count) {
int Tablet::set_column_values(uint32_t schema_index, const void* data,
const uint8_t* bitmap, uint32_t count) {
- if (err_code_ != E_OK) {
- return err_code_;
- }
- if (UNLIKELY(schema_index >= schema_vec_->size())) {
- return E_OUT_OF_RANGE;
- }
- if (UNLIKELY(count > static_cast(max_row_num_))) {
+ if (err_code_ != E_OK) return err_code_;
+ if (UNLIKELY(schema_index >= schema_vec_->size())) return E_OUT_OF_RANGE;
+ if (UNLIKELY(count > static_cast(max_row_num_)))
return E_OUT_OF_RANGE;
- }
const MeasurementSchema& schema = schema_vec_->at(schema_index);
size_t elem_size = 0;
@@ -250,9 +246,13 @@ int Tablet::set_column_values(uint32_t schema_index, const void* data,
if (bitmap == nullptr) {
bitmaps_[schema_index].clear_all();
} else {
- char* tsfile_bm = bitmaps_[schema_index].get_bitmap();
+ // copy_from also refreshes has_set_bits_; a plain memcpy into
+ // get_bitmap() would leave the flag stale (e.g. cleared by a prior
+ // clear_all()) and downstream may_have_set_bits() checks would skip
+ // null-mask handling for the column.
uint32_t bm_bytes = (count + 7) / 8;
- std::memcpy(tsfile_bm, bitmap, bm_bytes);
+ bitmaps_[schema_index].copy_from(reinterpret_cast(bitmap),
+ bm_bytes);
}
cur_row_size_ = std::max(count, cur_row_size_);
return E_OK;
@@ -271,15 +271,36 @@ int Tablet::set_column_string_values(uint32_t schema_index,
return E_OUT_OF_RANGE;
}
+ // Reject non-string types: the union member is StringColumn*, but for
+ // numeric columns the same slot holds the numeric buffer pointer.
+ // Interpreting it as StringColumn* and writing into ->buffer/->offsets
+ // would corrupt the numeric buffer.
+ const TSDataType dt = schema_vec_->at(schema_index).data_type_;
+ if (dt != STRING && dt != TEXT && dt != BLOB) {
+ return E_TYPE_NOT_MATCH;
+ }
StringColumn* sc = value_matrix_[schema_index].string_col;
if (sc == nullptr) {
return E_INVALID_ARG;
}
+ // offsets is the Arrow-style "offsets" array (count + 1 entries). All
+ // downstream code assumes offsets[0] == 0, offsets are non-negative,
+ // and offsets[i] <= offsets[i+1]. Skipping these checks would let a
+ // caller pass e.g. {0, 10, 5} and trigger an unsigned underflow on
+ // (offsets[i+1] - offsets[i]) at serialize time, plus a wild memcpy.
+ if (UNLIKELY(offsets == nullptr)) return E_INVALID_ARG;
+ if (UNLIKELY(offsets[0] != 0)) return E_INVALID_ARG;
+ for (uint32_t i = 0; i < count; i++) {
+ if (UNLIKELY(offsets[i + 1] < offsets[i])) return E_INVALID_ARG;
+ }
+ if (UNLIKELY(offsets[count] < 0)) return E_INVALID_ARG;
uint32_t total_bytes = static_cast(offsets[count]);
if (total_bytes > sc->buf_capacity) {
+ char* new_buf = (char*)mem_realloc(sc->buffer, total_bytes);
+ if (UNLIKELY(new_buf == nullptr)) return E_OOM;
+ sc->buffer = new_buf;
sc->buf_capacity = total_bytes;
- sc->buffer = (char*)mem_realloc(sc->buffer, sc->buf_capacity);
}
if (total_bytes > 0) {
@@ -291,14 +312,74 @@ int Tablet::set_column_string_values(uint32_t schema_index,
if (bitmap == nullptr) {
bitmaps_[schema_index].clear_all();
} else {
- char* tsfile_bm = bitmaps_[schema_index].get_bitmap();
uint32_t bm_bytes = (count + 7) / 8;
- std::memcpy(tsfile_bm, bitmap, bm_bytes);
+ bitmaps_[schema_index].copy_from(reinterpret_cast(bitmap),
+ bm_bytes);
+ }
+ cur_row_size_ = std::max(count, cur_row_size_);
+ return E_OK;
+}
+
+int Tablet::set_column_string_repeated(uint32_t schema_index, const char* str,
+ uint32_t str_len, uint32_t count) {
+ if (err_code_ != E_OK) return err_code_;
+ if (UNLIKELY(schema_index >= schema_vec_->size())) return E_OUT_OF_RANGE;
+ if (UNLIKELY(count > static_cast(max_row_num_)))
+ return E_OUT_OF_RANGE;
+
+ // See set_column_string_values: the union member is only valid as
+ // StringColumn* when the schema column is a variable-width type.
+ const TSDataType dt = schema_vec_->at(schema_index).data_type_;
+ if (dt != STRING && dt != TEXT && dt != BLOB) {
+ return E_TYPE_NOT_MATCH;
+ }
+ StringColumn* sc = value_matrix_[schema_index].string_col;
+ if (sc == nullptr) return E_INVALID_ARG;
+
+ // str_len * count can overflow uint32_t; do the multiply in uint64_t and
+ // reject anything that wouldn't fit, otherwise the subsequent loop would
+ // walk past the truncated buf_capacity allocation.
+ uint64_t total_bytes_64 =
+ static_cast(str_len) * static_cast(count);
+ if (total_bytes_64 > std::numeric_limits::max()) {
+ return E_OVERFLOW;
}
+ uint32_t total_bytes = static_cast(total_bytes_64);
+ if (total_bytes > sc->buf_capacity) {
+ char* new_buf = (char*)mem_realloc(sc->buffer, total_bytes);
+ if (UNLIKELY(new_buf == nullptr)) return E_OOM;
+ sc->buffer = new_buf;
+ sc->buf_capacity = total_bytes;
+ }
+
+ for (uint32_t i = 0; i < count; i++) {
+ sc->offsets[i] = i * str_len;
+ memcpy(sc->buffer + i * str_len, str, str_len);
+ }
+ sc->offsets[count] = total_bytes;
+ sc->buf_used = total_bytes;
+
+ bitmaps_[schema_index].clear_all();
cur_row_size_ = std::max(count, cur_row_size_);
return E_OK;
}
+void Tablet::reset(uint32_t row_count) {
+ ASSERT(row_count <= max_row_num_);
+ cur_row_size_ = row_count;
+ reset_string_columns();
+ // Bitmaps init to all-null (bit=1); writes flip bits to mark non-null.
+ // Without resetting them here, a reused Tablet would inherit cleared
+ // bits from the previous batch, causing stale values to be reported as
+ // non-null and written out again.
+ if (bitmaps_ != nullptr) {
+ const size_t schema_count = schema_vec_->size();
+ for (size_t c = 0; c < schema_count; c++) {
+ bitmaps_[c].reset();
+ }
+ }
+}
+
void* Tablet::get_value(int row_index, uint32_t schema_index,
common::TSDataType& data_type) const {
if (UNLIKELY(schema_index >= schema_vec_->size())) {
@@ -505,31 +586,21 @@ void Tablet::reset_string_columns() {
}
}
-// Find all row indices where the device ID changes. A device ID is the
-// composite key formed by all id columns (e.g. region + sensor_id). Row i
-// is a boundary when at least one id column differs between row i-1 and row i.
-//
-// Example (2 id columns: region, sensor_id):
-// row 0: "A", "s1"
-// row 1: "A", "s2" <- boundary: sensor_id changed
-// row 2: "B", "s1" <- boundary: region changed
-// row 3: "B", "s1"
-// row 4: "B", "s2" <- boundary: sensor_id changed
-// result: [1, 2, 4]
-//
-// Boundaries are computed in one shot at flush time rather than maintained
-// incrementally during add_value / set_column_*. The total work is similar
-// either way, but batch computation here is far more CPU-friendly: the inner
-// loop is a tight memcmp scan over contiguous buffers with good cache
-// locality, and the CPU can pipeline comparisons without the branch overhead
-// and cache thrashing of per-row bookkeeping spread across the write path.
std::vector Tablet::find_all_device_boundaries() const {
const uint32_t row_count = get_cur_row_size();
if (row_count <= 1) return {};
+ // Use uint64_t bitmap instead of vector for faster set/test/scan.
const uint32_t nwords = (row_count + 63) / 64;
std::vector boundary(nwords, 0);
+ // Walk id columns RIGHT to LEFT. In time-series tag systems the rightmost
+ // tags (sensor_id, metric_name, etc.) typically have the highest
+ // cardinality and change most often. By processing them first we mark most
+ // of the boundary bitmap early; subsequent (lower-cardinality) columns then
+ // short- circuit on `boundary[i] already set` for the bulk of their rows.
+ // Reverse order also lets us bail out of the entire scan as soon as every
+ // possible boundary is marked.
uint32_t boundary_count = 0;
const uint32_t max_boundaries = row_count - 1;
for (auto it = id_column_indexes_.rbegin(); it != id_column_indexes_.rend();
@@ -537,43 +608,55 @@ std::vector Tablet::find_all_device_boundaries() const {
const StringColumn& sc = *value_matrix_[*it].string_col;
const int32_t* off = sc.offsets;
const char* buf = sc.buffer;
+ common::BitMap& bitmap = const_cast(bitmaps_[*it]);
for (uint32_t i = 1; i < row_count; i++) {
- if (boundary[i >> 6] & (1ULL << (i & 63))) continue;
+ if (boundary[i >> 6] & (1ULL << (i & 63))) {
+ continue;
+ }
+ const bool prev_null = bitmap.test(i - 1);
+ const bool curr_null = bitmap.test(i);
+ if (prev_null != curr_null) {
+ boundary[i >> 6] |= (1ULL << (i & 63));
+ if (++boundary_count >= max_boundaries) {
+ break;
+ }
+ continue;
+ }
+ if (prev_null) {
+ continue;
+ }
+ // Signed int32 widths so an offset-array corruption that would
+ // otherwise underflow to a huge unsigned value surfaces as
+ // len < 0 instead. memcmp's size_t param needs an explicit cast,
+ // guarded by `len_a > 0`.
int32_t len_a = off[i] - off[i - 1];
int32_t len_b = off[i + 1] - off[i];
if (len_a != len_b ||
(len_a > 0 && memcmp(buf + off[i - 1], buf + off[i],
- static_cast(len_a)) != 0)) {
+ static_cast(len_a)) != 0)) {
boundary[i >> 6] |= (1ULL << (i & 63));
- if (++boundary_count >= max_boundaries) break;
+ if (++boundary_count >= max_boundaries) {
+ break;
+ }
}
}
- if (boundary_count >= max_boundaries) break;
- }
-
- // Sweep the bitmap word by word, extracting set bit positions in order.
- // Each word covers 64 consecutive rows: word w covers rows [w*64, w*64+63].
- //
- // For each word we use two standard bit tricks:
- // __builtin_ctzll(bits) — count trailing zeros = index of lowest set bit
- // bits &= bits - 1 — clear the lowest set bit
- //
- // Example: w=1, bits=0b...00010100 (bits 2 and 4 set)
- // iter 1: ctzll=2 → idx=1*64+2=66, bits becomes 0b...00010000
- // iter 2: ctzll=4 → idx=1*64+4=68, bits becomes 0b...00000000 → exit
- //
- // Guards: idx>0 because row 0 can never be a boundary (no predecessor);
- // idx= max_boundaries) {
+ break;
+ }
+ }
+
+ // Collect boundary positions using bitscan
std::vector result;
for (uint32_t w = 0; w < nwords; w++) {
uint64_t bits = boundary[w];
while (bits) {
- uint32_t bit = bitops::ctz64_nonzero(bits);
+ uint32_t bit =
+ static_cast(common::bitops::ctz_nonzero(bits));
uint32_t idx = w * 64 + bit;
if (idx > 0 && idx < row_count) {
result.push_back(idx);
}
- bits &= bits - 1;
+ bits &= bits - 1; // clear lowest set bit
}
}
return result;
@@ -612,4 +695,4 @@ std::shared_ptr Tablet::get_device_id(int i) const {
return res;
}
-} // end namespace storage
\ No newline at end of file
+} // end namespace storage
diff --git a/cpp/src/common/tablet.h b/cpp/src/common/tablet.h
index 799d6b7cc..76af3ac0e 100644
--- a/cpp/src/common/tablet.h
+++ b/cpp/src/common/tablet.h
@@ -22,7 +22,6 @@
#include
#include
-#include
#include
#include "common/config/config.h"
@@ -47,7 +46,6 @@ class TabletColIterator;
* with their associated metadata such as column names and types.
*/
class Tablet {
- public:
// Arrow-style string column: offsets + contiguous buffer.
// string[i] = buffer + offsets[i], len = offsets[i+1] - offsets[i]
struct StringColumn {
@@ -61,11 +59,10 @@ class Tablet {
void init(uint32_t max_rows, uint32_t init_buf_capacity) {
offsets = (int32_t*)common::mem_alloc(
- sizeof(int32_t) * (max_rows + 1), common::MOD_DEFAULT);
+ sizeof(int32_t) * (max_rows + 1), common::MOD_TABLET);
offsets[0] = 0;
buf_capacity = init_buf_capacity;
- buffer =
- (char*)common::mem_alloc(buf_capacity, common::MOD_DEFAULT);
+ buffer = (char*)common::mem_alloc(buf_capacity, common::MOD_TABLET);
buf_used = 0;
}
@@ -98,14 +95,13 @@ class Tablet {
return buffer + offsets[row];
}
uint32_t get_len(uint32_t row) const {
- return static_cast(offsets[row + 1] - offsets[row]);
+ return offsets[row + 1] - offsets[row];
}
// Return a String view for a given row. The returned reference is
// valid until the next call to get_string_view on this column.
common::String& get_string_view(uint32_t row) {
view_cache_.buf_ = buffer + offsets[row];
- view_cache_.len_ =
- static_cast(offsets[row + 1] - offsets[row]);
+ view_cache_.len_ = offsets[row + 1] - offsets[row];
return view_cache_;
}
@@ -231,11 +227,14 @@ class Tablet {
~Tablet() { destroy(); }
- // Tablet owns raw heap buffers (timestamps_, value_matrix_, bitmaps_) that
- // destroy() frees. The implicitly generated copy operations would shallow-
- // copy those pointers, causing double-free / use-after-free, so copying is
- // disabled. Move transfers ownership and leaves the source empty (its
- // pointers nulled) so the moved-from object destructs harmlessly.
+ // Tablet owns several heap buffers (timestamps_, value_matrix_ with its
+ // StringColumn::buffer/offsets, bitmaps_) that ~Tablet frees. The default
+ // copy ctor / copy-assign shallow-copies the raw pointers, so any copy
+ // path (e.g. `return tablet;` without NRVO under MSVC Debug) leaves the
+ // source Tablet's destructor freeing buffers the copy still points at,
+ // triggering heap-use-after-free in code like
+ // Tablet::find_all_device_boundaries. Make Tablet move-only with a
+ // pointer-stealing move ctor / move-assign so return-by-value is safe.
Tablet(const Tablet&) = delete;
Tablet& operator=(const Tablet&) = delete;
@@ -250,10 +249,14 @@ class Tablet {
value_matrix_(other.value_matrix_),
bitmaps_(other.bitmaps_),
column_categories_(std::move(other.column_categories_)),
- id_column_indexes_(std::move(other.id_column_indexes_)) {
+ id_column_indexes_(std::move(other.id_column_indexes_)),
+ single_device_(other.single_device_) {
other.timestamps_ = nullptr;
other.value_matrix_ = nullptr;
other.bitmaps_ = nullptr;
+ other.cur_row_size_ = 0;
+ // Leaving other.schema_vec_ moved-from is fine; destroy() only
+ // touches the heap buffers above, which we've now nulled out.
}
Tablet& operator=(Tablet&& other) noexcept {
@@ -270,9 +273,11 @@ class Tablet {
bitmaps_ = other.bitmaps_;
column_categories_ = std::move(other.column_categories_);
id_column_indexes_ = std::move(other.id_column_indexes_);
+ single_device_ = other.single_device_;
other.timestamps_ = nullptr;
other.value_matrix_ = nullptr;
other.bitmaps_ = nullptr;
+ other.cur_row_size_ = 0;
}
return *this;
}
@@ -283,12 +288,6 @@ class Tablet {
}
size_t get_column_count() const { return schema_vec_->size(); }
uint32_t get_cur_row_size() const { return cur_row_size_; }
- int64_t get_timestamp(uint32_t row_index) const {
- return timestamps_[row_index];
- }
- bool is_null(uint32_t row_index, uint32_t col_index) const {
- return bitmaps_[col_index].test(row_index);
- }
/**
* @brief Adds a timestamp to the specified row.
@@ -300,25 +299,27 @@ class Tablet {
*/
int add_timestamp(uint32_t row_index, int64_t timestamp);
- /**
- * @brief Bulk copy timestamps into the tablet.
- *
- * @param timestamps Pointer to an array of timestamp values.
- * @param count Number of timestamps to copy. Must be <= max_row_num.
- * If count > cur_row_size_, cur_row_size_ is updated to count,
- * so that subsequent operations know how many rows are populated.
- * @return Returns 0 on success, or a non-zero error code on failure
- * (E_OUT_OF_RANGE if count > max_row_num).
- */
int set_timestamps(const int64_t* timestamps, uint32_t count);
- // Bulk copy fixed-length column data. If bitmap is nullptr, all rows are
- // non-null. Otherwise bit=1 means null, bit=0 means valid (same as TsFile
- // BitMap convention). Callers using other conventions (e.g. Arrow, where
- // 1=valid) must invert before calling.
+ // Bulk copy fixed-length column data. bitmap=nullptr means all non-null.
+ // bitmap uses TsFile convention: bit=1 is null, bit=0 is valid.
int set_column_values(uint32_t schema_index, const void* data,
const uint8_t* bitmap, uint32_t count);
+ // Bulk copy a STRING column from Arrow-style offsets + flat data buffer.
+ // bitmap=nullptr means all non-null; same convention as set_column_values.
+ int set_column_string_values(uint32_t schema_index, const int32_t* offsets,
+ const char* data, const uint8_t* bitmap,
+ uint32_t count);
+
+ // Bulk fill a STRING column with the same value for all rows.
+ int set_column_string_repeated(uint32_t schema_index, const char* str,
+ uint32_t str_len, uint32_t count);
+
+ // Reset per-batch state so the tablet can be reused without reallocating
+ // its backing buffers. row_count is typically 0 before refilling.
+ void reset(uint32_t row_count = 0);
+
void* get_value(int row_index, uint32_t schema_index,
common::TSDataType& data_type) const;
/**
@@ -341,14 +342,10 @@ class Tablet {
std::shared_ptr get_device_id(int i) const;
std::vector find_all_device_boundaries() const;
- // Bulk copy string column data (offsets + data buffer).
- // offsets has count+1 entries and must start from 0 (offsets[0] == 0).
- // bitmap follows TsFile convention (bit=1 means null, nullptr means all
- // valid). Callers using Arrow convention (bit=1 means valid) must invert
- // before calling.
- int set_column_string_values(uint32_t schema_index, const int32_t* offsets,
- const char* data, const uint8_t* bitmap,
- uint32_t count);
+ // When the caller guarantees that all rows belong to a single device,
+ // set this flag to skip the O(n*m) boundary detection in the write path.
+ void set_single_device(bool v) { single_device_ = v; }
+ bool is_single_device() const { return single_device_; }
/**
* @brief Template function to add a value of type T to the specified row
* and column by name.
@@ -406,6 +403,7 @@ class Tablet {
common::BitMap* bitmaps_;
std::vector column_categories_;
std::vector id_column_indexes_;
+ bool single_device_ = false;
};
} // end namespace storage
diff --git a/cpp/src/common/thread_pool.h b/cpp/src/common/thread_pool.h
index f82aea038..191001bd9 100644
--- a/cpp/src/common/thread_pool.h
+++ b/cpp/src/common/thread_pool.h
@@ -27,7 +27,6 @@
#include
#include
#include
-#include
#include
namespace common {
@@ -38,12 +37,27 @@ namespace common {
// (column-parallel decoding).
class ThreadPool {
public:
- explicit ThreadPool(size_t num_threads) : stop_(false), active_(0) {
- for (size_t i = 0; i < num_threads; i++) {
- workers_.emplace_back([this] { worker_loop(); });
+ explicit ThreadPool(size_t num_threads)
+ // A zero-thread pool would silently accept submit() but wait_all()
+ // would block forever because active_ never reaches 0. init_common()
+ // already clamps the configured size to >= 1 before building the
+ // global pool; this normalization is a defensive backstop so any
+ // direct ThreadPool(0) still makes progress.
+ : num_threads_(num_threads == 0 ? 1 : num_threads),
+ stop_(false),
+ active_(0) {
+ for (size_t i = 0; i < num_threads_; i++) {
+ workers_.emplace_back([this, i] { worker_loop(i); });
}
}
+ // Returns this worker's index in [0, num_threads). Returns SIZE_MAX when
+ // called from a non-pool thread. Used by callers that want per-worker
+ // state (e.g., per-worker decoders/compressors).
+ static size_t current_worker_id() { return tl_worker_id_(); }
+
+ size_t num_threads() const { return num_threads_; }
+
~ThreadPool() {
{
std::lock_guard lk(mu_);
@@ -88,7 +102,8 @@ class ThreadPool {
}
private:
- void worker_loop() {
+ void worker_loop(size_t id) {
+ tl_worker_id_() = id;
while (true) {
std::function task;
{
@@ -98,7 +113,23 @@ class ThreadPool {
task = std::move(tasks_.front());
tasks_.pop();
}
- task();
+ // Without the try/catch, a task that throws would:
+ // (1) skip the active_-- below → wait_all() blocks forever
+ // because active_ never drops to zero, and
+ // (2) propagate the exception out of the std::thread function
+ // → std::terminate() takes down the whole process.
+ // Swallowing the exception is unfortunate but it matches the
+ // contract of the public submit(std::function) overload
+ // which has no way to surface the failure back to the caller.
+ // submit() callers receive their error via the std::future
+ // wrapper installed by std::packaged_task — that path never
+ // reaches here, so this catch only fires for fire-and-forget
+ // tasks where the alternative is termination.
+ try {
+ task();
+ } catch (...) {
+ // Intentionally suppressed; see comment above.
+ }
{
std::lock_guard lk(mu_);
active_--;
@@ -107,6 +138,14 @@ class ThreadPool {
}
}
+ // Wrapped in a function so static-initialization order is well-defined
+ // (function-local static is zero-initialized to a sentinel).
+ static size_t& tl_worker_id_() {
+ static thread_local size_t id = static_cast(-1);
+ return id;
+ }
+
+ size_t num_threads_;
std::vector workers_;
std::queue> tasks_;
std::mutex mu_;
diff --git a/cpp/src/common/tsblock/tsblock.h b/cpp/src/common/tsblock/tsblock.h
index 859ad393d..b68af1611 100644
--- a/cpp/src/common/tsblock/tsblock.h
+++ b/cpp/src/common/tsblock/tsblock.h
@@ -144,6 +144,12 @@ class RowAppender {
ASSERT(tsblock_->row_count_ > 0);
tsblock_->row_count_--;
}
+ FORCE_INLINE uint32_t remaining() const {
+ return tsblock_->max_row_count_ - tsblock_->row_count_;
+ }
+ FORCE_INLINE void add_rows(uint32_t count) {
+ tsblock_->row_count_ += count;
+ }
FORCE_INLINE void append(uint32_t slot_index, const char* value,
uint32_t len) {
@@ -222,6 +228,19 @@ class ColAppender {
}
FORCE_INLINE void reset() { column_row_count_ = 0; }
+ FORCE_INLINE void bulk_append_fixed(const char* data, uint32_t count,
+ uint32_t elem_size) {
+ vec_->get_value_data().append_fixed_value(data, count * elem_size);
+ vec_->add_row_nums(count);
+ column_row_count_ += count;
+ }
+
+ FORCE_INLINE uint32_t get_column_row_count() const {
+ return column_row_count_;
+ }
+
+ FORCE_INLINE Vector* get_vector() { return vec_; }
+
private:
uint32_t column_index_;
uint32_t column_row_count_;
@@ -242,6 +261,8 @@ class RowIterator {
FORCE_INLINE bool has_next() { return row_id_ < tsblock_->row_count_; }
+ FORCE_INLINE uint32_t get_row_id() const { return row_id_; }
+
FORCE_INLINE uint32_t get_column_count() { return column_count_; }
FORCE_INLINE TSDataType get_data_type(uint32_t column_index) {
@@ -251,17 +272,14 @@ class RowIterator {
FORCE_INLINE void next() {
ASSERT(row_id_ < tsblock_->row_count_);
- ++row_id_;
+ const uint32_t current_row_id = row_id_++;
for (uint32_t i = 0; i < column_count_; ++i) {
- tsblock_->vectors_[i]->update_offset();
+ if (!tsblock_->vectors_[i]->is_null(current_row_id)) {
+ tsblock_->vectors_[i]->update_offset();
+ }
}
}
- FORCE_INLINE void next(size_t ind) const {
- ASSERT(row_id_ < tsblock_->row_count_);
- tsblock_->vectors_[ind]->update_offset();
- }
-
FORCE_INLINE void update_row_id() { row_id_++; }
FORCE_INLINE char* read(uint32_t column_index, uint32_t* __restrict len,
@@ -271,6 +289,22 @@ class RowIterator {
return vec->read(len, null, row_id_);
}
+ // Cheap null check at the current row that avoids the value-read path.
+ FORCE_INLINE bool is_null_at(uint32_t column_index) {
+ ASSERT(column_index < column_count_);
+ return tsblock_->vectors_[column_index]->is_null(row_id_);
+ }
+
+ // Direct access to the underlying Vector for the column. Caller is
+ // responsible for type-correct interpretation of the buffer; intended
+ // for the fast typed-read path that wants to bypass Vector::read's
+ // virtual dispatch (read into the raw buffer at the vector's current
+ // offset_).
+ FORCE_INLINE Vector* get_vector(uint32_t column_index) {
+ ASSERT(column_index < column_count_);
+ return tsblock_->vectors_[column_index];
+ }
+
std::string debug_string(); // for debug
private:
@@ -311,6 +345,23 @@ class ColIterator {
FORCE_INLINE uint32_t get_column_index() { return column_index_; }
+ FORCE_INLINE uint32_t remaining() const {
+ return tsblock_->row_count_ - row_id_;
+ }
+ FORCE_INLINE char* data_ptr() {
+ return vec_->get_value_data().get_data() + vec_->get_offset();
+ }
+ FORCE_INLINE void advance(uint32_t n, uint32_t elem_size) {
+ row_id_ += n;
+ vec_->advance_offset(n * elem_size);
+ }
+
+ FORCE_INLINE void advance_row_only(uint32_t n) { row_id_ += n; }
+
+ FORCE_INLINE uint32_t get_row_id() const { return row_id_; }
+
+ FORCE_INLINE Vector* get_vector() { return vec_; }
+
private:
uint32_t column_index_;
uint32_t row_id_;
diff --git a/cpp/src/common/tsblock/vector/variable_length_vector.h b/cpp/src/common/tsblock/vector/variable_length_vector.h
index b98a9c739..84e541e5c 100644
--- a/cpp/src/common/tsblock/vector/variable_length_vector.h
+++ b/cpp/src/common/tsblock/vector/variable_length_vector.h
@@ -45,8 +45,15 @@ class VariableLengthVector : public Vector {
// cppcheck-suppress missingOverride
FORCE_INLINE void update_offset() OVERRIDE {
- offset_ += variable_type_len_;
- offset_ += last_value_len_;
+ // Self-contained advance: read the length prefix at the current
+ // offset from the buffer rather than relying on a side effect from
+ // a prior read(). This makes update_offset safe when callers skip
+ // reading variable-length columns for some rows (e.g. a row
+ // iterator that only consumes fixed-width columns).
+ uint32_t value_len = 0;
+ std::memcpy(&value_len, values_.get_data() + offset_,
+ sizeof(value_len));
+ offset_ += variable_type_len_ + value_len;
}
// cppcheck-suppress missingOverride
diff --git a/cpp/src/common/tsblock/vector/vector.h b/cpp/src/common/tsblock/vector/vector.h
index 37a96c543..dde3e76cc 100644
--- a/cpp/src/common/tsblock/vector/vector.h
+++ b/cpp/src/common/tsblock/vector/vector.h
@@ -73,6 +73,9 @@ class Vector {
FORCE_INLINE uint32_t get_row_num() { return row_num_; }
FORCE_INLINE void add_row_num() { row_num_++; }
+ FORCE_INLINE void add_row_nums(uint32_t n) { row_num_ += n; }
+ FORCE_INLINE uint32_t get_offset() const { return offset_; }
+ FORCE_INLINE void advance_offset(uint32_t bytes) { offset_ += bytes; }
FORCE_INLINE common::TsBlock* get_tsblock() { return tsblock_; }
diff --git a/cpp/src/common/tsfile_common.h b/cpp/src/common/tsfile_common.h
index b516b608f..fd3690200 100644
--- a/cpp/src/common/tsfile_common.h
+++ b/cpp/src/common/tsfile_common.h
@@ -314,6 +314,11 @@ class ITimeseriesIndex {
virtual common::SimpleList* get_value_chunk_meta_list() const {
return nullptr;
}
+ virtual uint32_t get_value_column_count() const { return 1; }
+ virtual common::SimpleList* get_value_chunk_meta_list(
+ uint32_t col_index) const {
+ return col_index == 0 ? get_value_chunk_meta_list() : nullptr;
+ }
virtual common::String get_measurement_name() const {
return common::String();
@@ -457,7 +462,7 @@ class TimeseriesIndex : public ITimeseriesIndex {
(timeseries_meta_type_ & 0x3F); // TODO
chunk_meta_list_ =
new (chunk_meta_list_buf) common::SimpleList(pa);
- uint32_t start_pos = in.read_pos();
+ uint64_t start_pos = in.read_pos();
while (IS_SUCC(ret) &&
in.read_pos() < start_pos + chunk_meta_list_data_size_) {
void* cm_buf = pa->alloc(sizeof(ChunkMeta));
@@ -589,11 +594,17 @@ class AlignedTimeseriesIndex : public ITimeseriesIndex {
virtual common::String get_measurement_name() const {
return value_ts_idx_->get_measurement_name();
}
+ // Return the VALUE column's data type — that's what consumers like
+ // TsFileReader::get_timeseries_schema and metadata APIs expect for an
+ // aligned measurement. Returning time_ts_idx_->get_data_type() would
+ // surface the time chunk's on-wire VECTOR marker (or INT64 depending
+ // on how the marker is interpreted) for every aligned timeseries,
+ // breaking schema introspection.
virtual common::TSDataType get_data_type() const {
return value_ts_idx_ == nullptr ? common::INVALID_DATATYPE
: value_ts_idx_->get_data_type();
}
- virtual bool is_aligned() const { return true; }
+ bool is_aligned() const override { return true; }
virtual Statistic* get_statistic() const {
return value_ts_idx_->get_statistic();
}
@@ -608,6 +619,52 @@ class AlignedTimeseriesIndex : public ITimeseriesIndex {
#endif
};
+class MultiAlignedTimeseriesIndex : public ITimeseriesIndex {
+ public:
+ TimeseriesIndex* time_ts_idx_ = nullptr;
+ std::vector value_ts_idxs_;
+
+ MultiAlignedTimeseriesIndex() {}
+ ~MultiAlignedTimeseriesIndex() {}
+
+ common::SimpleList* get_time_chunk_meta_list() const override {
+ return time_ts_idx_ ? time_ts_idx_->get_chunk_meta_list() : nullptr;
+ }
+ common::SimpleList* get_value_chunk_meta_list() const override {
+ return value_ts_idxs_.empty()
+ ? nullptr
+ : value_ts_idxs_[0]->get_chunk_meta_list();
+ }
+ uint32_t get_value_column_count() const override {
+ return value_ts_idxs_.size();
+ }
+ common::SimpleList* get_value_chunk_meta_list(
+ uint32_t col_index) const override {
+ return col_index < value_ts_idxs_.size()
+ ? value_ts_idxs_[col_index]->get_chunk_meta_list()
+ : nullptr;
+ }
+ common::String get_measurement_name() const override {
+ return value_ts_idxs_.empty()
+ ? common::String()
+ : value_ts_idxs_[0]->get_measurement_name();
+ }
+ // Same fix as AlignedTimeseriesIndex: report the first value column's
+ // type rather than the time chunk's VECTOR marker. Consumers walking
+ // a multi-aligned device for schema info expect the measurement type.
+ common::TSDataType get_data_type() const override {
+ return value_ts_idxs_.empty() || value_ts_idxs_[0] == nullptr
+ ? common::INVALID_DATATYPE
+ : value_ts_idxs_[0]->get_data_type();
+ }
+ bool is_aligned() const override { return true; }
+ Statistic* get_statistic() const override { return nullptr; }
+
+ const std::vector& get_value_indices() const {
+ return value_ts_idxs_;
+ }
+};
+
class TSMIterator {
public:
explicit TSMIterator(
@@ -629,7 +686,6 @@ class TSMIterator {
common::SimpleList::Iterator chunk_meta_iter_;
// timeseries measurenemnt chunk meta info
- // map >>
std::map,
std::map>,
IDeviceIDComparator>
diff --git a/cpp/src/compress/lz4_compressor.cc b/cpp/src/compress/lz4_compressor.cc
index 88c64466f..0f19ce179 100644
--- a/cpp/src/compress/lz4_compressor.cc
+++ b/cpp/src/compress/lz4_compressor.cc
@@ -76,9 +76,13 @@ int LZ4Compressor::compress(char* uncompressed_buf,
}
void LZ4Compressor::after_compress(char* compressed_buf) {
+ // See SnappyCompressor::after_compress for the same reasoning: the member
+ // pointer can lag behind the caller-known buffer across page reuse.
if (compressed_buf != nullptr) {
- mem_free(compressed_buf_);
- compressed_buf_ = nullptr;
+ mem_free(compressed_buf);
+ if (compressed_buf_ == compressed_buf) {
+ compressed_buf_ = nullptr;
+ }
}
}
@@ -132,9 +136,11 @@ int LZ4Compressor::uncompress(char* compressed_buf, uint32_t compressed_buf_len,
void LZ4Compressor::after_uncompress(char* uncompressed_buf) {
if (uncompressed_buf != nullptr) {
- mem_free(uncompressed_buf_);
- uncompressed_buf_ = nullptr;
+ mem_free(uncompressed_buf);
+ if (uncompressed_buf_ == uncompressed_buf) {
+ uncompressed_buf_ = nullptr;
+ }
}
}
-} // end namespace storage
\ No newline at end of file
+} // end namespace storage
diff --git a/cpp/src/compress/snappy_compressor.cc b/cpp/src/compress/snappy_compressor.cc
index 6a2735e7b..e78a67ac3 100644
--- a/cpp/src/compress/snappy_compressor.cc
+++ b/cpp/src/compress/snappy_compressor.cc
@@ -73,9 +73,16 @@ int SnappyCompressor::compress(char* uncompressed_buf,
}
void SnappyCompressor::after_compress(char* compressed_buf) {
+ // Free the buffer the caller is releasing, not whatever we last cached in
+ // compressed_buf_. The member is only kept so destroy() can clean up if
+ // after_compress is never called. When the same compressor is reused
+ // across pages, compressed_buf_ may point to a different (live) allocation
+ // or be null by the time the caller releases an earlier page's buffer.
if (compressed_buf != nullptr) {
- mem_free(compressed_buf_);
- compressed_buf_ = nullptr;
+ mem_free(compressed_buf);
+ if (compressed_buf_ == compressed_buf) {
+ compressed_buf_ = nullptr;
+ }
}
}
@@ -109,9 +116,11 @@ int SnappyCompressor::uncompress(char* compressed_buf,
void SnappyCompressor::after_uncompress(char* uncompressed_buf) {
if (uncompressed_buf != nullptr) {
- mem_free(uncompressed_buf_);
- uncompressed_buf_ = nullptr;
+ mem_free(uncompressed_buf);
+ if (uncompressed_buf_ == uncompressed_buf) {
+ uncompressed_buf_ = nullptr;
+ }
}
}
-} // end namespace storage
\ No newline at end of file
+} // end namespace storage
diff --git a/cpp/src/compress/uncompressed_compressor.h b/cpp/src/compress/uncompressed_compressor.h
index c262837a8..c342b5001 100644
--- a/cpp/src/compress/uncompressed_compressor.h
+++ b/cpp/src/compress/uncompressed_compressor.h
@@ -20,19 +20,38 @@
#ifndef COMPRESS_UNCOMPRESSED_COMPRESSOR_H
#define COMPRESS_UNCOMPRESSED_COMPRESSOR_H
+#include
+
+#include "common/allocator/alloc_base.h"
#include "compressor.h"
+#include "utils/errno_define.h"
+#include "utils/util_define.h"
namespace storage {
class UncompressedCompressor : public Compressor {
public:
- UncompressedCompressor() {}
- virtual ~UncompressedCompressor() {}
+ UncompressedCompressor() : uncompressed_buf_(nullptr) {}
+ virtual ~UncompressedCompressor() {
+ if (uncompressed_buf_ != nullptr) {
+ common::mem_free(uncompressed_buf_);
+ uncompressed_buf_ = nullptr;
+ }
+ }
int reset(bool for_compress) {
UNUSED(for_compress);
+ if (uncompressed_buf_ != nullptr) {
+ common::mem_free(uncompressed_buf_);
+ uncompressed_buf_ = nullptr;
+ }
return common::E_OK;
}
- void destroy() {}
+ void destroy() {
+ if (uncompressed_buf_ != nullptr) {
+ common::mem_free(uncompressed_buf_);
+ uncompressed_buf_ = nullptr;
+ }
+ }
int compress(char* uncompressed_buf, uint32_t uncompressed_buf_len,
char*& compressed_buf, uint32_t& compressed_buf_len) {
compressed_buf = uncompressed_buf;
@@ -43,11 +62,33 @@ class UncompressedCompressor : public Compressor {
int uncompress(char* compressed_buf, uint32_t compressed_buf_len,
char*& uncompressed_buf, uint32_t& uncompressed_buf_len) {
- uncompressed_buf = compressed_buf;
+ char* buf = static_cast(
+ common::mem_alloc(compressed_buf_len, common::MOD_COMPRESSOR_OBJ));
+ if (buf == nullptr) {
+ return common::E_OOM;
+ }
+ memcpy(buf, compressed_buf, compressed_buf_len);
+ uncompressed_buf = buf;
+ uncompressed_buf_ = buf;
uncompressed_buf_len = compressed_buf_len;
return common::E_OK;
}
- void after_uncompress(char* uncompressed_buf) { UNUSED(uncompressed_buf); }
+ void after_uncompress(char* uncompressed_buf) {
+ // Free the buffer the caller is releasing, not the most-recently
+ // allocated one cached in uncompressed_buf_. Two successive
+ // uncompress() calls would overwrite uncompressed_buf_ with the
+ // second allocation; after_uncompress(first) used to free that
+ // second buffer (use-after-free for the still-live one) and leak
+ // the first.
+ if (uncompressed_buf == nullptr) return;
+ common::mem_free(uncompressed_buf);
+ if (uncompressed_buf_ == uncompressed_buf) {
+ uncompressed_buf_ = nullptr;
+ }
+ }
+
+ private:
+ char* uncompressed_buf_;
};
} // end namespace storage
diff --git a/cpp/src/cwrapper/arrow_c.cc b/cpp/src/cwrapper/arrow_c.cc
index 931c17de7..3f02a7692 100644
--- a/cpp/src/cwrapper/arrow_c.cc
+++ b/cpp/src/cwrapper/arrow_c.cc
@@ -843,7 +843,12 @@ int ArrowStructToTablet(const char* table_name, const ArrowArray* in_array,
const ArrowArray* ts_arr = in_array->children[time_col_index];
const int64_t* ts_buf =
static_cast(ts_arr->buffers[1]) + ts_arr->offset;
- tablet->set_timestamps(ts_buf, static_cast(n_rows));
+ int sret =
+ tablet->set_timestamps(ts_buf, static_cast(n_rows));
+ if (sret != common::E_OK) {
+ delete tablet;
+ return sret;
+ }
}
// Fill data columns from Arrow children (use read_modes to decode buffers)
@@ -892,11 +897,15 @@ int ArrowStructToTablet(const char* table_name, const ArrowArray* in_array,
delete tablet;
return common::E_OOM;
}
- tablet->set_column_values(tcol, data, null_bm,
- static_cast(n_rows));
+ int sret = tablet->set_column_values(
+ tcol, data, null_bm, static_cast(n_rows));
if (null_bm != nullptr) {
common::mem_free(null_bm);
}
+ if (sret != common::E_OK) {
+ delete tablet;
+ return sret;
+ }
break;
}
case common::DATE: {
@@ -948,14 +957,18 @@ int ArrowStructToTablet(const char* table_name, const ArrowArray* in_array,
delete tablet;
return common::E_OOM;
}
- tablet->set_column_string_values(tcol, offsets, data, null_bm,
- nrows);
+ int sret = tablet->set_column_string_values(tcol, offsets, data,
+ null_bm, nrows);
if (null_bm != nullptr) {
common::mem_free(null_bm);
}
if (norm_offsets != nullptr) {
common::mem_free(norm_offsets);
}
+ if (sret != common::E_OK) {
+ delete tablet;
+ return sret;
+ }
break;
}
default:
diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc
index 0934981f9..0fc915974 100644
--- a/cpp/src/cwrapper/tsfile_cwrapper.cc
+++ b/cpp/src/cwrapper/tsfile_cwrapper.cc
@@ -21,7 +21,9 @@
#include
#include
+#include
#include
+
#ifdef _WIN32
#include
#else
@@ -92,8 +94,14 @@ WriteFile write_file_new(const char* pathname, ERRNO* err_code) {
int ret;
init_tsfile_config();
- if (access(pathname, F_OK) == 0) {
- *err_code = common::E_ALREADY_EXIST;
+ struct stat path_stat {};
+ if (stat(pathname, &path_stat) == 0) {
+#ifdef _WIN32
+ const bool is_dir = (path_stat.st_mode & _S_IFDIR) != 0;
+#else
+ const bool is_dir = S_ISDIR(path_stat.st_mode);
+#endif
+ *err_code = is_dir ? common::E_FILE_OPEN_ERR : common::E_ALREADY_EXIST;
return nullptr;
}
@@ -110,6 +118,17 @@ WriteFile write_file_new(const char* pathname, ERRNO* err_code) {
TsFileWriter tsfile_writer_new(WriteFile file, TableSchema* schema,
ERRNO* err_code) {
+ // C API: every public entry must defend against null callers — a null
+ // schema or err_code would crash the host process the moment it's
+ // dereferenced. The tag-filter helpers already follow this pattern.
+ if (err_code == nullptr) {
+ return nullptr;
+ }
+ if (file == nullptr || schema == nullptr ||
+ schema->column_schemas == nullptr || schema->table_name == nullptr) {
+ *err_code = common::E_INVALID_ARG;
+ return nullptr;
+ }
if (schema->column_num == 0) {
*err_code = common::E_INVALID_SCHEMA;
return nullptr;
@@ -149,6 +168,15 @@ TsFileWriter tsfile_writer_new_with_memory_threshold(WriteFile file,
TableSchema* schema,
uint64_t memory_threshold,
ERRNO* err_code) {
+ // See tsfile_writer_new() above for the null-guard rationale.
+ if (err_code == nullptr) {
+ return nullptr;
+ }
+ if (file == nullptr || schema == nullptr ||
+ schema->column_schemas == nullptr || schema->table_name == nullptr) {
+ *err_code = common::E_INVALID_ARG;
+ return nullptr;
+ }
if (schema->column_num == 0) {
*err_code = common::E_INVALID_SCHEMA;
return nullptr;
@@ -158,11 +186,21 @@ TsFileWriter tsfile_writer_new_with_memory_threshold(WriteFile file,
std::set column_names;
for (int i = 0; i < schema->column_num; i++) {
ColumnSchema cur_schema = schema->column_schemas[i];
- if (column_names.find(cur_schema.column_name) == column_names.end()) {
+ // Reject only when the name has already been seen. The previous
+ // condition was inverted, so the first column (always a fresh name)
+ // was rejected as a duplicate and this constructor was effectively
+ // unusable — tsfile_writer_new()'s loop above has the correct check
+ // for comparison.
+ if (column_names.find(cur_schema.column_name) != column_names.end()) {
*err_code = common::E_INVALID_SCHEMA;
return nullptr;
}
column_names.insert(cur_schema.column_name);
+ if (cur_schema.column_category == TAG &&
+ cur_schema.data_type != TS_DATATYPE_STRING) {
+ *err_code = common::E_INVALID_SCHEMA;
+ return nullptr;
+ }
column_schemas.emplace_back(
cur_schema.column_name,
static_cast(cur_schema.data_type),
@@ -1205,6 +1243,8 @@ ERRNO populate_c_metadata_map_from_cpp(
if (m.measurement_name == nullptr) {
for (uint32_t u = 0; u < slot; u++) {
free_timeseries_statistic_heap(&e.timeseries[u].statistic);
+ free_timeseries_statistic_heap(
+ &e.timeseries[u].timeline_statistic);
free(e.timeseries[u].measurement_name);
}
free(e.timeseries);
@@ -1465,6 +1505,13 @@ Tablet _tablet_new_with_target_name(const char* device_id,
}
ERRNO _tsfile_writer_register_table(TsFileWriter writer, TableSchema* schema) {
+ if (writer == nullptr || schema == nullptr ||
+ schema->column_schemas == nullptr || schema->table_name == nullptr) {
+ return common::E_INVALID_ARG;
+ }
+ if (schema->column_num <= 0) {
+ return common::E_INVALID_SCHEMA;
+ }
std::vector measurement_schemas;
std::vector column_categories;
measurement_schemas.resize(schema->column_num);
@@ -1587,13 +1634,50 @@ ResultSet _tsfile_reader_query_device(TsFileReader reader,
return qds;
}
-// ---------- Tag Filter API ----------
+// ============== Tag Filter API Implementation ==============
+
+// Helper macro to avoid repetition in tag filter factory functions.
+// The shared_ptr must stay alive while TagFilterBuilder accesses the schema.
+// Every C-API entry must validate its pointers: a null reader would deref
+// during the static_cast, and null table/column/value would feed std::string
+// a null pointer (UB / crash).
+#define DEFINE_TAG_FILTER_FACTORY(name, method) \
+ TagFilterHandle tsfile_tag_filter_##name( \
+ TsFileReader reader, const char* table_name, const char* column_name, \
+ const char* value) { \
+ if (reader == nullptr || table_name == nullptr || \
+ column_name == nullptr || value == nullptr) { \
+ return nullptr; \
+ } \
+ auto* r = static_cast(reader); \
+ auto schema = r->get_table_schema(table_name); \
+ if (!schema) return nullptr; \
+ storage::TagFilterBuilder builder(schema.get()); \
+ return builder.method(column_name, value); \
+ }
+
+DEFINE_TAG_FILTER_FACTORY(eq, eq)
+DEFINE_TAG_FILTER_FACTORY(neq, neq)
+DEFINE_TAG_FILTER_FACTORY(lt, lt)
+DEFINE_TAG_FILTER_FACTORY(lteq, lteq)
+DEFINE_TAG_FILTER_FACTORY(gt, gt)
+DEFINE_TAG_FILTER_FACTORY(gteq, gteq)
+
+#undef DEFINE_TAG_FILTER_FACTORY
TagFilterHandle tsfile_tag_filter_create(TsFileReader reader,
const char* table_name,
const char* column_name,
const char* value, TagFilterOp op,
ERRNO* err_code) {
+ if (err_code == nullptr) {
+ return nullptr;
+ }
+ if (reader == nullptr || table_name == nullptr || column_name == nullptr ||
+ value == nullptr) {
+ *err_code = common::E_INVALID_ARG;
+ return nullptr;
+ }
auto* r = static_cast(reader);
auto schema = r->get_table_schema(table_name);
if (!schema) {
@@ -1656,25 +1740,30 @@ TagFilterHandle tsfile_tag_filter_between(TsFileReader reader,
TagFilterHandle tsfile_tag_filter_and(TagFilterHandle left,
TagFilterHandle right) {
- return static_cast(storage::TagFilterBuilder::and_filter(
+ if (!left || !right) return nullptr;
+ return storage::TagFilterBuilder::and_filter(
static_cast(left),
- static_cast(right)));
+ static_cast(right));
}
TagFilterHandle tsfile_tag_filter_or(TagFilterHandle left,
TagFilterHandle right) {
- return static_cast(storage::TagFilterBuilder::or_filter(
+ if (!left || !right) return nullptr;
+ return storage::TagFilterBuilder::or_filter(
static_cast(left),
- static_cast(right)));
+ static_cast(right));
}
TagFilterHandle tsfile_tag_filter_not(TagFilterHandle filter) {
- return static_cast(storage::TagFilterBuilder::not_filter(
- static_cast(filter)));
+ if (!filter) return nullptr;
+ return storage::TagFilterBuilder::not_filter(
+ static_cast(filter));
}
void tsfile_tag_filter_free(TagFilterHandle filter) {
- delete static_cast(filter);
+ if (filter) {
+ delete static_cast(filter);
+ }
}
ResultSet tsfile_query_table_with_tag_filter(
diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h
index ae3e28eed..3b3b13c36 100644
--- a/cpp/src/cwrapper/tsfile_cwrapper.h
+++ b/cpp/src/cwrapper/tsfile_cwrapper.h
@@ -905,32 +905,68 @@ TagFilterHandle tsfile_tag_filter_between(TsFileReader reader,
bool is_not, ERRNO* err_code);
/**
- * @brief Combine two tag filters with AND.
+ * @brief Create a tag equality filter: column == value.
+ *
+ * @param reader [in] Valid TsFileReader handle (used to resolve column index).
+ * @param table_name [in] Target table name.
+ * @param column_name [in] Tag column name.
+ * @param value [in] Value to compare against.
+ * @return TagFilterHandle on success, NULL on failure.
+ */
+TagFilterHandle tsfile_tag_filter_eq(TsFileReader reader,
+ const char* table_name,
+ const char* column_name,
+ const char* value);
+
+TagFilterHandle tsfile_tag_filter_neq(TsFileReader reader,
+ const char* table_name,
+ const char* column_name,
+ const char* value);
+
+TagFilterHandle tsfile_tag_filter_lt(TsFileReader reader,
+ const char* table_name,
+ const char* column_name,
+ const char* value);
+
+TagFilterHandle tsfile_tag_filter_lteq(TsFileReader reader,
+ const char* table_name,
+ const char* column_name,
+ const char* value);
+
+TagFilterHandle tsfile_tag_filter_gt(TsFileReader reader,
+ const char* table_name,
+ const char* column_name,
+ const char* value);
+
+TagFilterHandle tsfile_tag_filter_gteq(TsFileReader reader,
+ const char* table_name,
+ const char* column_name,
+ const char* value);
+
+/**
+ * @brief Logical AND of two tag filters. Takes ownership of left and right.
*/
TagFilterHandle tsfile_tag_filter_and(TagFilterHandle left,
TagFilterHandle right);
/**
- * @brief Combine two tag filters with OR.
+ * @brief Logical OR of two tag filters. Takes ownership of left and right.
*/
TagFilterHandle tsfile_tag_filter_or(TagFilterHandle left,
TagFilterHandle right);
/**
- * @brief Negate a tag filter.
+ * @brief Logical NOT of a tag filter. Takes ownership of filter.
*/
TagFilterHandle tsfile_tag_filter_not(TagFilterHandle filter);
/**
- * @brief Free a tag filter and all its children.
+ * @brief Free a tag filter handle.
*/
void tsfile_tag_filter_free(TagFilterHandle filter);
/**
- * @brief Query table with tag filter.
- *
- * @param batch_size <= 0 means row-by-row return mode,
- * > 0 means return TsBlock with the specified block size.
+ * @brief Batch query with tag filter support.
*/
ResultSet tsfile_query_table_with_tag_filter(
TsFileReader reader, const char* table_name, char** columns,
diff --git a/cpp/src/encoding/decoder.h b/cpp/src/encoding/decoder.h
index c290b5791..24455ca01 100644
--- a/cpp/src/encoding/decoder.h
+++ b/cpp/src/encoding/decoder.h
@@ -21,6 +21,7 @@
#define ENCODING_DECODER_H
#include "common/allocator/byte_stream.h"
+#include "common/db_common.h"
namespace storage {
@@ -37,6 +38,140 @@ class Decoder {
virtual int read_double(double& ret_value, common::ByteStream& in) = 0;
virtual int read_String(common::String& ret_value, common::PageArena& pa,
common::ByteStream& in) = 0;
+
+ virtual int read_batch_int32(int32_t* out, int capacity, int& actual,
+ common::ByteStream& in) {
+ actual = 0;
+ int ret = common::E_OK;
+ int32_t val;
+ while (actual < capacity && has_remaining(in)) {
+ ret = read_int32(val, in);
+ if (ret != common::E_OK) {
+ return ret;
+ }
+ out[actual++] = val;
+ }
+ return common::E_OK;
+ }
+
+ virtual int read_batch_int64(int64_t* out, int capacity, int& actual,
+ common::ByteStream& in) {
+ actual = 0;
+ int ret = common::E_OK;
+ int64_t val;
+ while (actual < capacity && has_remaining(in)) {
+ ret = read_int64(val, in);
+ if (ret != common::E_OK) {
+ return ret;
+ }
+ out[actual++] = val;
+ }
+ return common::E_OK;
+ }
+
+ virtual int read_batch_float(float* out, int capacity, int& actual,
+ common::ByteStream& in) {
+ actual = 0;
+ int ret = common::E_OK;
+ float val;
+ while (actual < capacity && has_remaining(in)) {
+ ret = read_float(val, in);
+ if (ret != common::E_OK) {
+ return ret;
+ }
+ out[actual++] = val;
+ }
+ return common::E_OK;
+ }
+
+ virtual int read_batch_double(double* out, int capacity, int& actual,
+ common::ByteStream& in) {
+ actual = 0;
+ int ret = common::E_OK;
+ double val;
+ while (actual < capacity && has_remaining(in)) {
+ ret = read_double(val, in);
+ if (ret != common::E_OK) {
+ return ret;
+ }
+ out[actual++] = val;
+ }
+ return common::E_OK;
+ }
+
+ virtual int skip_int32(int count, int& skipped, common::ByteStream& in) {
+ skipped = 0;
+ int ret = common::E_OK;
+ int32_t dummy;
+ while (skipped < count && has_remaining(in)) {
+ ret = read_int32(dummy, in);
+ if (ret != common::E_OK) {
+ return ret;
+ }
+ ++skipped;
+ }
+ return common::E_OK;
+ }
+
+ virtual int skip_int64(int count, int& skipped, common::ByteStream& in) {
+ skipped = 0;
+ int ret = common::E_OK;
+ int64_t dummy;
+ while (skipped < count && has_remaining(in)) {
+ ret = read_int64(dummy, in);
+ if (ret != common::E_OK) {
+ return ret;
+ }
+ ++skipped;
+ }
+ return common::E_OK;
+ }
+
+ virtual int skip_float(int count, int& skipped, common::ByteStream& in) {
+ skipped = 0;
+ int ret = common::E_OK;
+ float dummy;
+ while (skipped < count && has_remaining(in)) {
+ ret = read_float(dummy, in);
+ if (ret != common::E_OK) {
+ return ret;
+ }
+ ++skipped;
+ }
+ return common::E_OK;
+ }
+
+ virtual int skip_double(int count, int& skipped, common::ByteStream& in) {
+ skipped = 0;
+ int ret = common::E_OK;
+ double dummy;
+ while (skipped < count && has_remaining(in)) {
+ ret = read_double(dummy, in);
+ if (ret != common::E_OK) {
+ return ret;
+ }
+ ++skipped;
+ }
+ return common::E_OK;
+ }
+
+ // Block-level filter check: peek the next block header and compute
+ // the value range [block_min, block_max] without decoding.
+ // Returns true if a block was peeked; false if not supported or no data.
+ // After peeking, caller must either:
+ // - Call skip_peeked_block_int64() to skip the block
+ // - Call read_batch_int64() which will use the peeked header
+ virtual bool peek_next_block_range_int64(common::ByteStream& in,
+ int64_t& block_min,
+ int64_t& block_max,
+ int& block_count) {
+ return false;
+ }
+
+ // Skip the block whose header was already consumed by peek.
+ virtual int skip_peeked_block_int64(common::ByteStream& in, int& skipped) {
+ return common::E_NOT_SUPPORT;
+ }
};
} // end namespace storage
diff --git a/cpp/src/encoding/dictionary_encoder.h b/cpp/src/encoding/dictionary_encoder.h
index be5f78a09..8f7c495c4 100644
--- a/cpp/src/encoding/dictionary_encoder.h
+++ b/cpp/src/encoding/dictionary_encoder.h
@@ -83,7 +83,12 @@ class DictionaryEncoder : public Encoder {
if (entry_index_.count(value) == 0) {
index_entry_.push_back(value);
map_size_ = map_size_ + value.length();
- entry_index_[value] = static_cast(index_entry_.size()) - 1;
+ // Compute the index before the insert: LHS/RHS evaluation order of
+ // `m[k] = m.size()` is unspecified before C++17, so a compiler
+ // that evaluates the LHS first would store size()+1 and corrupt
+ // the dictionary.
+ const int new_idx = static_cast(index_entry_.size()) - 1;
+ entry_index_[value] = new_idx;
}
values_encoder_.encode(entry_index_[value], out);
return common::E_OK;
diff --git a/cpp/src/encoding/encoder.h b/cpp/src/encoding/encoder.h
index 921686446..386129f6e 100644
--- a/cpp/src/encoding/encoder.h
+++ b/cpp/src/encoding/encoder.h
@@ -48,6 +48,81 @@ class Encoder {
* @return the maximal size of possible memory occupied by current encoder
*/
virtual int get_max_byte_size() = 0;
+
+ /*
+ * Batch encoding interfaces.
+ * Default implementations fall back to per-value encode().
+ * Subclasses may override for better performance.
+ */
+ virtual int encode_batch(const bool* values, uint32_t count,
+ common::ByteStream& out_stream) {
+ int ret = common::E_OK;
+ for (uint32_t i = 0; i < count; i++) {
+ if (RET_FAIL(encode(values[i], out_stream))) {
+ return ret;
+ }
+ }
+ return ret;
+ }
+ virtual int encode_batch(const int32_t* values, uint32_t count,
+ common::ByteStream& out_stream) {
+ int ret = common::E_OK;
+ for (uint32_t i = 0; i < count; i++) {
+ if (RET_FAIL(encode(values[i], out_stream))) {
+ return ret;
+ }
+ }
+ return ret;
+ }
+ virtual int encode_batch(const int64_t* values, uint32_t count,
+ common::ByteStream& out_stream) {
+ int ret = common::E_OK;
+ for (uint32_t i = 0; i < count; i++) {
+ if (RET_FAIL(encode(values[i], out_stream))) {
+ return ret;
+ }
+ }
+ return ret;
+ }
+ virtual int encode_batch(const float* values, uint32_t count,
+ common::ByteStream& out_stream) {
+ int ret = common::E_OK;
+ for (uint32_t i = 0; i < count; i++) {
+ if (RET_FAIL(encode(values[i], out_stream))) {
+ return ret;
+ }
+ }
+ return ret;
+ }
+ virtual int encode_batch(const double* values, uint32_t count,
+ common::ByteStream& out_stream) {
+ int ret = common::E_OK;
+ for (uint32_t i = 0; i < count; i++) {
+ if (RET_FAIL(encode(values[i], out_stream))) {
+ return ret;
+ }
+ }
+ return ret;
+ }
+
+ // Batch encode strings from a contiguous buffer with offset array
+ // (Arrow-style layout from Tablet::StringColumn).
+ // string[i] = buffer + offsets[start_idx + i], length = offsets[start_idx +
+ // i + 1] - offsets[start_idx + i].
+ virtual int encode_string_batch(const char* buffer, const uint32_t* offsets,
+ uint32_t start_idx, uint32_t count,
+ common::ByteStream& out_stream) {
+ int ret = common::E_OK;
+ for (uint32_t i = 0; i < count; i++) {
+ uint32_t idx = start_idx + i;
+ uint32_t len = offsets[idx + 1] - offsets[idx];
+ common::String val(buffer + offsets[idx], len);
+ if (RET_FAIL(encode(val, out_stream))) {
+ return ret;
+ }
+ }
+ return ret;
+ }
};
} // end namespace storage
diff --git a/cpp/src/encoding/gorilla_decoder.h b/cpp/src/encoding/gorilla_decoder.h
index 5684561aa..e1e490105 100644
--- a/cpp/src/encoding/gorilla_decoder.h
+++ b/cpp/src/encoding/gorilla_decoder.h
@@ -30,6 +30,163 @@
namespace storage {
+// ── Raw-pointer bit reader ────────────────────────────────────────────────
+// Operates directly on a contiguous byte array, bypassing ByteStream's
+// per-byte read_buf() overhead (atomic loads, page boundary checks, memcpy).
+
+struct GorillaBitReader {
+ const uint8_t* data;
+ uint32_t pos; // next byte index to load
+ uint32_t data_len; // total bytes
+ int bits; // remaining bits in cur_byte (0..8)
+ uint8_t cur_byte;
+ // Set once a load was attempted on an empty input, or once read_bit /
+ // read_long ran out of bits mid-value. Without this, a truncated page
+ // would spin read_long() forever (bits stays 0, n -= 0 makes no
+ // progress) and read_bit() would execute a negative shift via
+ // (cur_byte >> (bits - 1)).
+ bool exhausted = false;
+
+ FORCE_INLINE void load_byte_if_empty() {
+ if (bits == 0) {
+ if (pos < data_len) {
+ cur_byte = data[pos++];
+ bits = 8;
+ } else {
+ exhausted = true;
+ }
+ }
+ }
+
+ FORCE_INLINE bool read_bit() {
+ if (UNLIKELY(bits == 0)) {
+ exhausted = true;
+ return false;
+ }
+ bool bit = ((cur_byte >> (bits - 1)) & 1) == 1;
+ bits--;
+ load_byte_if_empty();
+ return bit;
+ }
+
+ FORCE_INLINE int64_t read_long(int n) {
+ int64_t value = 0;
+ while (n > 0) {
+ if (UNLIKELY(bits == 0)) {
+ // Input drained mid-value; bail so the outer loop in
+ // read_control_bits / batch_decode_raw doesn't spin.
+ exhausted = true;
+ return value;
+ }
+ if (n > bits || n == 8) {
+ value = (value << bits) + (cur_byte & ((1 << bits) - 1));
+ n -= bits;
+ bits = 0;
+ } else {
+ value =
+ (value << n) + ((cur_byte >> (bits - n)) & ((1 << n) - 1));
+ bits -= n;
+ n = 0;
+ }
+ load_byte_if_empty();
+ }
+ return value;
+ }
+
+ FORCE_INLINE uint8_t read_control_bits(int max_bits) {
+ uint8_t value = 0x00;
+ for (int i = 0; i < max_bits; i++) {
+ value <<= 1;
+ if (exhausted) break;
+ if (read_bit()) {
+ value |= 0x01;
+ } else {
+ break;
+ }
+ }
+ return value;
+ }
+};
+
+// ── Templated raw-pointer decode helpers ──────────────────────────────────
+
+template
+struct GorillaRawOps {
+ static FORCE_INLINE T read_next(GorillaBitReader& r, T& stored_value,
+ int& stored_leading_zeros,
+ int& stored_trailing_zeros);
+};
+
+template <>
+struct GorillaRawOps {
+ static constexpr int VALUE_BITS = VALUE_BITS_LENGTH_32BIT;
+
+ static FORCE_INLINE int32_t read_next(GorillaBitReader& r,
+ int32_t& stored_value,
+ int& stored_leading_zeros,
+ int& stored_trailing_zeros) {
+ uint8_t ctrl = r.read_control_bits(2);
+ switch (ctrl) {
+ case 3: {
+ stored_leading_zeros =
+ (int)r.read_long(LEADING_ZERO_BITS_LENGTH_32BIT);
+ uint8_t sig =
+ (uint8_t)r.read_long(MEANINGFUL_XOR_BITS_LENGTH_32BIT);
+ sig++;
+ stored_trailing_zeros = VALUE_BITS - sig - stored_leading_zeros;
+ }
+ // fallthrough
+ case 2: {
+ int32_t xor_value = (int32_t)r.read_long(
+ VALUE_BITS - stored_leading_zeros - stored_trailing_zeros);
+ xor_value = static_cast(xor_value)
+ << stored_trailing_zeros;
+ stored_value ^= xor_value;
+ }
+ // fallthrough
+ default:
+ return stored_value;
+ }
+ return stored_value;
+ }
+};
+
+template <>
+struct GorillaRawOps {
+ static constexpr int VALUE_BITS = VALUE_BITS_LENGTH_64BIT;
+
+ static FORCE_INLINE int64_t read_next(GorillaBitReader& r,
+ int64_t& stored_value,
+ int& stored_leading_zeros,
+ int& stored_trailing_zeros) {
+ uint8_t ctrl = r.read_control_bits(2);
+ switch (ctrl) {
+ case 3: {
+ stored_leading_zeros =
+ (int)r.read_long(LEADING_ZERO_BITS_LENGTH_64BIT);
+ uint8_t sig =
+ (uint8_t)r.read_long(MEANINGFUL_XOR_BITS_LENGTH_64BIT);
+ sig++;
+ stored_trailing_zeros = VALUE_BITS - sig - stored_leading_zeros;
+ }
+ // fallthrough
+ case 2: {
+ int64_t xor_value = r.read_long(
+ VALUE_BITS - stored_leading_zeros - stored_trailing_zeros);
+ xor_value = static_cast(xor_value)
+ << stored_trailing_zeros;
+ stored_value ^= xor_value;
+ }
+ // fallthrough
+ default:
+ return stored_value;
+ }
+ return stored_value;
+ }
+};
+
+// ──────────────────────────────────────────────────────────────────────────
+
template
class GorillaDecoder : public Decoder {
public:
@@ -127,6 +284,197 @@ class GorillaDecoder : public Decoder {
int read_String(common::String& ret_value, common::PageArena& pa,
common::ByteStream& in) override;
+ // Batch overrides — declared here, defined after template specializations
+ int read_batch_int32(int32_t* out, int capacity, int& actual,
+ common::ByteStream& in) override;
+ int read_batch_int64(int64_t* out, int capacity, int& actual,
+ common::ByteStream& in) override;
+ int skip_int32(int count, int& skipped, common::ByteStream& in) override;
+ int skip_int64(int count, int& skipped, common::ByteStream& in) override;
+
+ protected:
+ // ── Batch decode using raw pointer (bypasses ByteStream) ─────────────
+ // The decode() contract:
+ // stored_value_ holds the "next" value to be returned.
+ // decode() returns stored_value_, then advances via cache_next().
+ // has_next_==false means the ending sentinel was hit.
+ //
+ // batch_decode_raw replicates this logic using GorillaBitReader on the
+ // wrapped contiguous buffer, then syncs state back to ByteStream.
+ int batch_decode_raw(T* out, int capacity, int& actual, T ending,
+ common::ByteStream& in) {
+ int ret = common::E_OK;
+ actual = 0;
+ // Bootstrap below would unconditionally write out[0]; guard the
+ // zero-capacity edge case so callers can probe without writing.
+ if (capacity <= 0) {
+ return common::E_OK;
+ }
+ if (!in.is_wrapped()) {
+ return batch_decode_fallback(out, capacity, actual, ending, in);
+ }
+
+ const uint8_t* base =
+ (const uint8_t*)in.get_wrapped_buf() + in.read_pos();
+ // Gorilla pages are bounded by the page-writer cap (well below 4 GiB),
+ // so saturating to uint32_t is safe and matches GorillaBitReader's
+ // 32-bit cursor.
+ uint32_t remain = static_cast(
+ std::min(in.remaining_size(), UINT32_MAX));
+
+ GorillaBitReader r;
+ r.data = base;
+ r.pos = 0;
+ r.data_len = remain;
+ r.bits = bits_left_;
+ r.cur_byte = buffer_;
+
+ // Bootstrap first value if needed (mirrors decode()'s first-call path)
+ if (UNLIKELY(!first_value_was_read_)) {
+ if (r.bits == 0 && r.pos >= r.data_len) goto done;
+ r.load_byte_if_empty();
+ stored_value_ = (T)r.read_long(GorillaRawOps::VALUE_BITS);
+ if (UNLIKELY(r.exhausted)) {
+ // Page truncated before the first value finished; refuse to
+ // emit a partially-decoded sentinel.
+ first_value_was_read_ = false;
+ ret = common::E_BUF_NOT_ENOUGH;
+ goto done;
+ }
+ first_value_was_read_ = true;
+ // Save the first value before cache_next mutates stored_value_
+ T first_value = stored_value_;
+ // cache_next: read_next then check ending
+ GorillaRawOps::read_next(r, stored_value_, stored_leading_zeros_,
+ stored_trailing_zeros_);
+ if (UNLIKELY(r.exhausted)) {
+ ret = common::E_BUF_NOT_ENOUGH;
+ goto done;
+ }
+ if (stored_value_ == ending) {
+ has_next_ = false;
+ } else {
+ has_next_ = true;
+ }
+ // Output the first value
+ out[actual++] = first_value;
+ if (!has_next_ || actual >= capacity) goto done;
+ }
+
+ // Main batch loop
+ while (actual < capacity && has_next_) {
+ out[actual++] = stored_value_;
+ GorillaRawOps::read_next(r, stored_value_, stored_leading_zeros_,
+ stored_trailing_zeros_);
+ if (UNLIKELY(r.exhausted)) {
+ ret = common::E_BUF_NOT_ENOUGH;
+ goto done;
+ }
+ if (stored_value_ == ending) {
+ has_next_ = false;
+ }
+ }
+
+ done:
+ // Sync bit-reader state back
+ buffer_ = r.cur_byte;
+ bits_left_ = r.bits;
+ in.wrapped_buf_advance_read_pos(r.pos);
+ return ret;
+ }
+
+ int batch_skip_raw(int count, int& skipped, T ending,
+ common::ByteStream& in) {
+ int ret = common::E_OK;
+ skipped = 0;
+ // Bootstrap below would consume first_value_ even when count == 0,
+ // advancing the stream past data the caller didn't ask to skip.
+ if (count <= 0) {
+ return common::E_OK;
+ }
+ if (!in.is_wrapped()) {
+ return batch_skip_fallback(count, skipped, ending, in);
+ }
+
+ const uint8_t* base =
+ (const uint8_t*)in.get_wrapped_buf() + in.read_pos();
+ // Same saturation as batch_decode_raw: GorillaBitReader is 32-bit
+ // internally; pages are well under 4 GiB.
+ uint32_t remain = static_cast(
+ std::min(in.remaining_size(), UINT32_MAX));
+
+ GorillaBitReader r;
+ r.data = base;
+ r.pos = 0;
+ r.data_len = remain;
+ r.bits = bits_left_;
+ r.cur_byte = buffer_;
+
+ if (UNLIKELY(!first_value_was_read_)) {
+ if (r.bits == 0 && r.pos >= r.data_len) goto done;
+ r.load_byte_if_empty();
+ stored_value_ = (T)r.read_long(GorillaRawOps::VALUE_BITS);
+ if (UNLIKELY(r.exhausted)) {
+ first_value_was_read_ = false;
+ ret = common::E_BUF_NOT_ENOUGH;
+ goto done;
+ }
+ first_value_was_read_ = true;
+ GorillaRawOps::read_next(r, stored_value_, stored_leading_zeros_,
+ stored_trailing_zeros_);
+ if (UNLIKELY(r.exhausted)) {
+ ret = common::E_BUF_NOT_ENOUGH;
+ goto done;
+ }
+ if (stored_value_ == ending) {
+ has_next_ = false;
+ } else {
+ has_next_ = true;
+ }
+ // The first value counts as one skip
+ skipped++;
+ if (!has_next_ || skipped >= count) goto done;
+ }
+
+ while (skipped < count && has_next_) {
+ skipped++;
+ GorillaRawOps::read_next(r, stored_value_, stored_leading_zeros_,
+ stored_trailing_zeros_);
+ if (UNLIKELY(r.exhausted)) {
+ ret = common::E_BUF_NOT_ENOUGH;
+ goto done;
+ }
+ if (stored_value_ == ending) {
+ has_next_ = false;
+ }
+ }
+
+ done:
+ buffer_ = r.cur_byte;
+ bits_left_ = r.bits;
+ in.wrapped_buf_advance_read_pos(r.pos);
+ return ret;
+ }
+
+ int batch_decode_fallback(T* out, int capacity, int& actual, T ending,
+ common::ByteStream& in) {
+ actual = 0;
+ while (actual < capacity && has_remaining(in)) {
+ out[actual++] = decode(in);
+ }
+ return common::E_OK;
+ }
+
+ int batch_skip_fallback(int count, int& skipped, T ending,
+ common::ByteStream& in) {
+ skipped = 0;
+ while (skipped < count && has_remaining(in)) {
+ decode(in);
+ skipped++;
+ }
+ return common::E_OK;
+ }
+
public:
common::TSEncoding type_;
T stored_value_;
@@ -254,18 +602,18 @@ FORCE_INLINE int64_t GorillaDecoder::decode(common::ByteStream& in) {
class FloatGorillaDecoder : public GorillaDecoder {
public:
- int read_boolean(bool& ret_value, common::ByteStream& in);
- int read_int32(int32_t& ret_value, common::ByteStream& in);
- int read_int64(int64_t& ret_value, common::ByteStream& in);
- int read_float(float& ret_value, common::ByteStream& in);
- int read_double(double& ret_value, common::ByteStream& in);
+ int read_boolean(bool& ret_value, common::ByteStream& in) override;
+ int read_int32(int32_t& ret_value, common::ByteStream& in) override;
+ int read_int64(int64_t& ret_value, common::ByteStream& in) override;
+ int read_float(float& ret_value, common::ByteStream& in) override;
+ int read_double(double& ret_value, common::ByteStream& in) override;
float decode(common::ByteStream& in) {
int32_t value_int = GorillaDecoder::decode(in);
return common::int_to_float(value_int);
}
- int32_t cache_next(common::ByteStream& in) {
+ int32_t cache_next(common::ByteStream& in) override {
read_next(in);
if (stored_value_ ==
common::float_to_int(GORILLA_ENCODING_ENDING_FLOAT)) {
@@ -273,22 +621,46 @@ class FloatGorillaDecoder : public GorillaDecoder {
}
return stored_value_;
}
+
+ int read_batch_float(float* out, int capacity, int& actual,
+ common::ByteStream& in) override {
+ int32_t ending = common::float_to_int(GORILLA_ENCODING_ENDING_FLOAT);
+ actual = 0;
+ while (actual < capacity && has_remaining(in)) {
+ int32_t buf[129];
+ int batch = std::min(129, capacity - actual);
+ int buf_actual = 0;
+ int ret = batch_decode_raw(buf, batch, buf_actual, ending, in);
+ if (ret != common::E_OK) return ret;
+ if (buf_actual == 0) break;
+ for (int i = 0; i < buf_actual; i++) {
+ out[actual + i] = common::int_to_float(buf[i]);
+ }
+ actual += buf_actual;
+ }
+ return common::E_OK;
+ }
+
+ int skip_float(int count, int& skipped, common::ByteStream& in) override {
+ int32_t ending = common::float_to_int(GORILLA_ENCODING_ENDING_FLOAT);
+ return batch_skip_raw(count, skipped, ending, in);
+ }
};
class DoubleGorillaDecoder : public GorillaDecoder {
public:
- int read_boolean(bool& ret_value, common::ByteStream& in);
- int read_int32(int32_t& ret_value, common::ByteStream& in);
- int read_int64(int64_t& ret_value, common::ByteStream& in);
- int read_float(float& ret_value, common::ByteStream& in);
- int read_double(double& ret_value, common::ByteStream& in);
+ int read_boolean(bool& ret_value, common::ByteStream& in) override;
+ int read_int32(int32_t& ret_value, common::ByteStream& in) override;
+ int read_int64(int64_t& ret_value, common::ByteStream& in) override;
+ int read_float(float& ret_value, common::ByteStream& in) override;
+ int read_double(double& ret_value, common::ByteStream& in) override;
double decode(common::ByteStream& in) {
int64_t value_long = GorillaDecoder::decode(in);
return common::long_to_double(value_long);
}
- int64_t cache_next(common::ByteStream& in) {
+ int64_t cache_next(common::ByteStream& in) override {
read_next(in);
if (stored_value_ ==
common::double_to_long(GORILLA_ENCODING_ENDING_DOUBLE)) {
@@ -296,12 +668,88 @@ class DoubleGorillaDecoder : public GorillaDecoder {
}
return stored_value_;
}
+
+ int read_batch_double(double* out, int capacity, int& actual,
+ common::ByteStream& in) override {
+ int64_t ending = common::double_to_long(GORILLA_ENCODING_ENDING_DOUBLE);
+ actual = 0;
+ while (actual < capacity && has_remaining(in)) {
+ int64_t buf[129];
+ int batch = std::min(129, capacity - actual);
+ int buf_actual = 0;
+ int ret = batch_decode_raw(buf, batch, buf_actual, ending, in);
+ if (ret != common::E_OK) return ret;
+ if (buf_actual == 0) break;
+ for (int i = 0; i < buf_actual; i++) {
+ out[actual + i] = common::long_to_double(buf[i]);
+ }
+ actual += buf_actual;
+ }
+ return common::E_OK;
+ }
+
+ int skip_double(int count, int& skipped, common::ByteStream& in) override {
+ int64_t ending = common::double_to_long(GORILLA_ENCODING_ENDING_DOUBLE);
+ return batch_skip_raw(count, skipped, ending, in);
+ }
};
typedef GorillaDecoder IntGorillaDecoder;
typedef GorillaDecoder LongGorillaDecoder;
-// wrap as Decoder interface
+// ── IntGorillaDecoder batch/skip overrides ─────────────────────────────────
+template <>
+inline int GorillaDecoder::read_batch_int32(int32_t* out, int capacity,
+ int& actual,
+ common::ByteStream& in) {
+ return batch_decode_raw(out, capacity, actual,
+ GORILLA_ENCODING_ENDING_INTEGER, in);
+}
+template <>
+inline int GorillaDecoder::read_batch_int64(int64_t*, int, int& actual,
+ common::ByteStream&) {
+ actual = 0;
+ return common::E_NOT_SUPPORT;
+}
+template <>
+inline int GorillaDecoder::skip_int32(int count, int& skipped,
+ common::ByteStream& in) {
+ return batch_skip_raw(count, skipped, GORILLA_ENCODING_ENDING_INTEGER, in);
+}
+template <>
+inline int GorillaDecoder::skip_int64(int, int& skipped,
+ common::ByteStream&) {
+ skipped = 0;
+ return common::E_NOT_SUPPORT;
+}
+
+// ── LongGorillaDecoder batch/skip overrides ───────────────────────────────
+template <>
+inline int GorillaDecoder::read_batch_int32(int32_t*, int, int& actual,
+ common::ByteStream&) {
+ actual = 0;
+ return common::E_NOT_SUPPORT;
+}
+template <>
+inline int GorillaDecoder::read_batch_int64(int64_t* out, int capacity,
+ int& actual,
+ common::ByteStream& in) {
+ return batch_decode_raw(out, capacity, actual, GORILLA_ENCODING_ENDING_LONG,
+ in);
+}
+template <>
+inline int GorillaDecoder::skip_int32(int, int& skipped,
+ common::ByteStream&) {
+ skipped = 0;
+ return common::E_NOT_SUPPORT;
+}
+template <>
+inline int GorillaDecoder::skip_int64(int count, int& skipped,
+ common::ByteStream& in) {
+ return batch_skip_raw(count, skipped, GORILLA_ENCODING_ENDING_LONG, in);
+}
+
+// ── Scalar Decoder interface wrappers (unchanged) ─────────────────────────
template <>
FORCE_INLINE int IntGorillaDecoder::read_boolean(bool& ret_value,
common::ByteStream& in) {
diff --git a/cpp/src/encoding/plain_decoder.h b/cpp/src/encoding/plain_decoder.h
index c2627f71d..3e83cfc76 100644
--- a/cpp/src/encoding/plain_decoder.h
+++ b/cpp/src/encoding/plain_decoder.h
@@ -20,10 +20,47 @@
#ifndef ENCODING_PLAIN_DECODER_H
#define ENCODING_PLAIN_DECODER_H
+#include
+#include
+#include
+
+#if defined(_MSC_VER)
+#include
+#include
+#endif
+
#include "encoding/decoder.h"
namespace storage {
+FORCE_INLINE uint32_t plain_bswap32(uint32_t v) {
+#if defined(__GNUC__) || defined(__clang__)
+ return __builtin_bswap32(v);
+#elif defined(_MSC_VER)
+ return _byteswap_ulong(v);
+#else
+ return ((v & 0x000000FFu) << 24) | ((v & 0x0000FF00u) << 8) |
+ ((v & 0x00FF0000u) >> 8) | ((v & 0xFF000000u) >> 24);
+#endif
+}
+
+FORCE_INLINE uint64_t plain_bswap64(uint64_t v) {
+#if defined(__GNUC__) || defined(__clang__)
+ return __builtin_bswap64(v);
+#elif defined(_MSC_VER)
+ return _byteswap_uint64(v);
+#else
+ return ((v & 0x00000000000000FFull) << 56) |
+ ((v & 0x000000000000FF00ull) << 40) |
+ ((v & 0x0000000000FF0000ull) << 24) |
+ ((v & 0x00000000FF000000ull) << 8) |
+ ((v & 0x000000FF00000000ull) >> 8) |
+ ((v & 0x0000FF0000000000ull) >> 24) |
+ ((v & 0x00FF000000000000ull) >> 40) |
+ ((v & 0xFF00000000000000ull) >> 56);
+#endif
+}
+
class PlainDecoder : public Decoder {
public:
~PlainDecoder() override = default;
@@ -62,6 +99,113 @@ class PlainDecoder : public Decoder {
common::ByteStream& in) override {
return common::SerializationUtil::read_mystring(ret_String, &pa, in);
}
+
+ // ── Batch overrides ──────────────────────────────────────────────────────
+ //
+ // INT32: PLAIN encoding uses varint (variable stride). Override to avoid
+ // virtual dispatch per element; actual decode is still per-value.
+ int read_batch_int32(int32_t* out, int capacity, int& actual,
+ common::ByteStream& in) override {
+ actual = 0;
+ while (actual < capacity && in.has_remaining()) {
+ int ret = common::SerializationUtil::read_var_int(out[actual], in);
+ if (ret != common::E_OK) return ret;
+ ++actual;
+ }
+ return common::E_OK;
+ }
+
+ int skip_int32(int count, int& skipped, common::ByteStream& in) override {
+ skipped = 0;
+ int32_t dummy;
+ while (skipped < count && in.has_remaining()) {
+ int ret = common::SerializationUtil::read_var_int(dummy, in);
+ if (ret != common::E_OK) {
+ return ret;
+ }
+ ++skipped;
+ }
+ return common::E_OK;
+ }
+
+ // Fixed-stride INT64 / FLOAT / DOUBLE share the same shape: when the
+ // ByteStream is wrapped (contiguous buf), advance the read pointer in one
+ // step and byte-swap in place; otherwise fall back to per-value reads.
+ // The macros below expand into one override per type.
+#define PLAIN_SKIP_FIXED(NAME, T, STRIDE, READ_ONE) \
+ int NAME(int count, int& skipped, common::ByteStream& in) override { \
+ skipped = 0; \
+ if (!in.is_wrapped()) { \
+ T dummy; \
+ while (skipped < count && in.has_remaining()) { \
+ int ret = READ_ONE(dummy, in); \
+ if (ret != common::E_OK) { \
+ return ret; \
+ } \
+ ++skipped; \
+ } \
+ return common::E_OK; \
+ } \
+ skipped = static_cast(std::min( \
+ in.remaining_size() / (STRIDE), static_cast(count))); \
+ if (skipped <= 0) { \
+ skipped = 0; \
+ return common::E_OK; \
+ } \
+ in.wrapped_buf_advance_read_pos(static_cast(skipped) * \
+ (STRIDE)); \
+ return common::E_OK; \
+ }
+
+#define PLAIN_READ_BATCH_FIXED(NAME, T, U, STRIDE, READ_ONE, BSWAP) \
+ int NAME(T* out, int capacity, int& actual, common::ByteStream& in) \
+ override { \
+ actual = 0; \
+ if (!in.is_wrapped()) { \
+ while (actual < capacity && in.has_remaining()) { \
+ int ret = READ_ONE(out[actual], in); \
+ if (ret != common::E_OK) { \
+ return ret; \
+ } \
+ ++actual; \
+ } \
+ return common::E_OK; \
+ } \
+ int n = static_cast(std::min( \
+ in.remaining_size() / (STRIDE), static_cast(capacity))); \
+ if (n <= 0) { \
+ return common::E_OK; \
+ } \
+ const uint8_t* src = \
+ (const uint8_t*)in.get_wrapped_buf() + in.read_pos(); \
+ in.wrapped_buf_advance_read_pos(static_cast(n) * (STRIDE)); \
+ actual = n; \
+ for (int i = 0; i < n; ++i) { \
+ U v; \
+ memcpy(&v, src + i * (STRIDE), (STRIDE)); \
+ v = BSWAP(v); \
+ memcpy(&out[i], &v, (STRIDE)); \
+ } \
+ return common::E_OK; \
+ }
+
+ PLAIN_SKIP_FIXED(skip_int64, int64_t, 8,
+ common::SerializationUtil::read_i64)
+ PLAIN_SKIP_FIXED(skip_float, float, 4,
+ common::SerializationUtil::read_float)
+ PLAIN_SKIP_FIXED(skip_double, double, 8,
+ common::SerializationUtil::read_double)
+
+ PLAIN_READ_BATCH_FIXED(read_batch_int64, int64_t, uint64_t, 8,
+ common::SerializationUtil::read_i64, plain_bswap64)
+ PLAIN_READ_BATCH_FIXED(read_batch_float, float, uint32_t, 4,
+ common::SerializationUtil::read_float, plain_bswap32)
+ PLAIN_READ_BATCH_FIXED(read_batch_double, double, uint64_t, 8,
+ common::SerializationUtil::read_double,
+ plain_bswap64)
+
+#undef PLAIN_SKIP_FIXED
+#undef PLAIN_READ_BATCH_FIXED
};
} // end namespace storage
diff --git a/cpp/src/encoding/plain_encoder.h b/cpp/src/encoding/plain_encoder.h
index b768c9bf0..84ebee238 100644
--- a/cpp/src/encoding/plain_encoder.h
+++ b/cpp/src/encoding/plain_encoder.h
@@ -20,50 +20,221 @@
#ifndef ENCODING_PLAIN_ENCODER_H
#define ENCODING_PLAIN_ENCODER_H
+#include
+
#include "encoder.h"
+#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+#include
+#define TSFILE_HAS_NEON 1
+#endif
+
namespace storage {
class PlainEncoder : public Encoder {
public:
PlainEncoder() {}
~PlainEncoder() { destroy(); }
- void destroy() { /* do nothing for PlainEncoder */
+ void destroy() override { /* do nothing for PlainEncoder */
}
- void reset() { /* do thing for PlainEncoder */
+ void reset() override { /* do thing for PlainEncoder */
}
- FORCE_INLINE int encode(bool value, common::ByteStream& out_stream) {
+ FORCE_INLINE int encode(bool value,
+ common::ByteStream& out_stream) override {
return common::SerializationUtil::write_i8(value ? 1 : 0, out_stream);
}
- FORCE_INLINE int encode(int32_t value, common::ByteStream& out_stream) {
+ FORCE_INLINE int encode(int32_t value,
+ common::ByteStream& out_stream) override {
return common::SerializationUtil::write_var_int(value, out_stream);
}
- FORCE_INLINE int encode(int64_t value, common::ByteStream& out_stream) {
+ FORCE_INLINE int encode(int64_t value,
+ common::ByteStream& out_stream) override {
return common::SerializationUtil::write_i64(value, out_stream);
}
- FORCE_INLINE int encode(float value, common::ByteStream& out_stream) {
+ FORCE_INLINE int encode(float value,
+ common::ByteStream& out_stream) override {
return common::SerializationUtil::write_float(value, out_stream);
}
- FORCE_INLINE int encode(double value, common::ByteStream& out_stream) {
+ FORCE_INLINE int encode(double value,
+ common::ByteStream& out_stream) override {
return common::SerializationUtil::write_double(value, out_stream);
}
FORCE_INLINE int encode(common::String value,
- common::ByteStream& out_stream) {
+ common::ByteStream& out_stream) override {
return common::SerializationUtil::write_mystring(value, out_stream);
}
- int flush(common::ByteStream& out_stream) {
+ int flush(common::ByteStream& out_stream) override {
// do nothing for PlainEncoder
return common::E_OK;
}
- int get_max_byte_size() { return 0; }
+ int get_max_byte_size() override { return 0; }
+
+ // Optimized batch encoding: directly byte-swap into ByteStream page buffer.
+ // Avoids per-value write_buf overhead entirely — only calls acquire_buf()
+ // once per page boundary crossing.
+ int encode_batch(const int64_t* values, uint32_t count,
+ common::ByteStream& out_stream) override {
+ if (count == 0) return common::E_OK;
+ uint32_t offset = 0;
+ while (offset < count) {
+ common::ByteStream::Buffer buf = out_stream.acquire_buf();
+ if (UNLIKELY(buf.buf_ == nullptr)) return common::E_OOM;
+ // How many int64 values fit in the remaining page space?
+ uint32_t capacity = buf.len_ / 8;
+ if (capacity == 0) {
+ // Page has < 8 bytes left, fall back to write_buf for this one
+ return Encoder::encode_batch(values + offset, count - offset,
+ out_stream);
+ }
+ uint32_t batch = std::min(count - offset, capacity);
+ uint8_t* dst = (uint8_t*)buf.buf_;
+ const int64_t* src = values + offset;
+ uint32_t i = 0;
+#if TSFILE_HAS_NEON
+ // NEON: byte-reverse 2 x int64 per iteration
+ for (; i + 2 <= batch; i += 2) {
+ uint8x16_t v = vld1q_u8((const uint8_t*)&src[i]);
+ v = vrev64q_u8(v);
+ vst1q_u8(dst, v);
+ dst += 16;
+ }
+#endif
+ // Scalar tail
+ for (; i < batch; i++) {
+ uint64_t v = (uint64_t)src[i];
+ dst[0] = (uint8_t)(v >> 56);
+ dst[1] = (uint8_t)(v >> 48);
+ dst[2] = (uint8_t)(v >> 40);
+ dst[3] = (uint8_t)(v >> 32);
+ dst[4] = (uint8_t)(v >> 24);
+ dst[5] = (uint8_t)(v >> 16);
+ dst[6] = (uint8_t)(v >> 8);
+ dst[7] = (uint8_t)(v);
+ dst += 8;
+ }
+ out_stream.buffer_used(batch * 8);
+ offset += batch;
+ }
+ return common::E_OK;
+ }
+
+ int encode_batch(const double* values, uint32_t count,
+ common::ByteStream& out_stream) override {
+ if (count == 0) return common::E_OK;
+ uint32_t offset = 0;
+ while (offset < count) {
+ common::ByteStream::Buffer buf = out_stream.acquire_buf();
+ if (UNLIKELY(buf.buf_ == nullptr)) return common::E_OOM;
+ uint32_t capacity = buf.len_ / 8;
+ if (capacity == 0) {
+ return Encoder::encode_batch(values + offset, count - offset,
+ out_stream);
+ }
+ uint32_t batch = std::min(count - offset, capacity);
+ uint8_t* dst = (uint8_t*)buf.buf_;
+ const double* src = values + offset;
+ uint32_t i = 0;
+#if TSFILE_HAS_NEON
+ // NEON byte-reverse of raw bytes works for double bits too.
+ for (; i + 2 <= batch; i += 2) {
+ uint8x16_t v = vld1q_u8((const uint8_t*)&src[i]);
+ v = vrev64q_u8(v);
+ vst1q_u8(dst, v);
+ dst += 16;
+ }
+#endif
+ // Scalar tail: round-trip the bits via memcpy to avoid the
+ // strict-aliasing violation of reading a double through an
+ // int64_t* (the old reinterpret_cast dispatch).
+ for (; i < batch; i++) {
+ uint64_t v;
+ memcpy(&v, &src[i], sizeof(double));
+ dst[0] = (uint8_t)(v >> 56);
+ dst[1] = (uint8_t)(v >> 48);
+ dst[2] = (uint8_t)(v >> 40);
+ dst[3] = (uint8_t)(v >> 32);
+ dst[4] = (uint8_t)(v >> 24);
+ dst[5] = (uint8_t)(v >> 16);
+ dst[6] = (uint8_t)(v >> 8);
+ dst[7] = (uint8_t)(v);
+ dst += 8;
+ }
+ out_stream.buffer_used(batch * 8);
+ offset += batch;
+ }
+ return common::E_OK;
+ }
+
+ int encode_batch(const float* values, uint32_t count,
+ common::ByteStream& out_stream) override {
+ if (count == 0) return common::E_OK;
+ uint32_t offset = 0;
+ while (offset < count) {
+ common::ByteStream::Buffer buf = out_stream.acquire_buf();
+ if (UNLIKELY(buf.buf_ == nullptr)) return common::E_OOM;
+ uint32_t capacity = buf.len_ / 4;
+ if (capacity == 0) {
+ return Encoder::encode_batch(values + offset, count - offset,
+ out_stream);
+ }
+ uint32_t batch = std::min(count - offset, capacity);
+ uint8_t* dst = (uint8_t*)buf.buf_;
+ const float* src = values + offset;
+ uint32_t i = 0;
+#if TSFILE_HAS_NEON
+ // NEON: byte-reverse 4 x float (32-bit) per iteration
+ for (; i + 4 <= batch; i += 4) {
+ uint8x16_t v = vld1q_u8((const uint8_t*)&src[i]);
+ v = vrev32q_u8(v);
+ vst1q_u8(dst, v);
+ dst += 16;
+ }
+#endif
+ for (; i < batch; i++) {
+ uint32_t v;
+ memcpy(&v, &src[i], sizeof(float));
+ dst[0] = (uint8_t)(v >> 24);
+ dst[1] = (uint8_t)(v >> 16);
+ dst[2] = (uint8_t)(v >> 8);
+ dst[3] = (uint8_t)(v);
+ dst += 4;
+ }
+ out_stream.buffer_used(batch * 4);
+ offset += batch;
+ }
+ return common::E_OK;
+ }
+
+ // Batch encode strings from Arrow-style offset+buffer layout.
+ // Each string is serialized as: var_int(len) + raw bytes.
+ int encode_string_batch(const char* buffer, const uint32_t* offsets,
+ uint32_t start_idx, uint32_t count,
+ common::ByteStream& out_stream) override {
+ int ret = common::E_OK;
+ for (uint32_t i = 0; i < count; i++) {
+ uint32_t idx = start_idx + i;
+ uint32_t len = offsets[idx + 1] - offsets[idx];
+ if (RET_FAIL(common::SerializationUtil::write_var_int(
+ (int32_t)len, out_stream))) {
+ return ret;
+ }
+ if (len > 0) {
+ if (RET_FAIL(
+ out_stream.write_buf(buffer + offsets[idx], len))) {
+ return ret;
+ }
+ }
+ }
+ return ret;
+ }
};
} // end namespace storage
diff --git a/cpp/src/encoding/ts2diff_decoder.h b/cpp/src/encoding/ts2diff_decoder.h
index f37001003..bc6e89613 100644
--- a/cpp/src/encoding/ts2diff_decoder.h
+++ b/cpp/src/encoding/ts2diff_decoder.h
@@ -24,6 +24,7 @@
#include
#include
+#include
#include
#include "common/allocator/alloc_base.h"
@@ -31,8 +32,174 @@
#include "decoder.h"
#include "utils/util_define.h"
+#ifdef ENABLE_SIMD
+#include "simde/x86/avx2.h"
+#endif
+
namespace storage {
+// ============================================================================
+// SIMD batch decode helpers (INT32)
+// ============================================================================
+#ifdef ENABLE_SIMD
+
+// Decode 4 INT32 values from bit-packed data using SIMD gather + shift.
+// @in: pointer to the start of packed bit data for the block
+// @bit_width: bits per delta value
+// @delta_min: minimum delta offset for this block
+// @index: current position within the block (0-based, among write_index_
+// deltas)
+// @base: the previous reconstructed value (for prefix-sum)
+// @out: output array (4 values written)
+// Returns: the last reconstructed value (new base for next group)
+static inline int32_t simd_decode_4_i32(const uint8_t* in, int32_t bit_width,
+ int32_t delta_min, int32_t index,
+ int32_t base, int32_t out[4]) {
+ static const simde__m128i SHUF_REV4 = simde_mm_setr_epi8(
+ 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+
+ const simde__m128i VMIN4 = simde_mm_set1_epi32(delta_min);
+
+ int32_t pos0 = index * bit_width;
+ int32_t pos[4] = {pos0, pos0 + bit_width, pos0 + 2 * bit_width,
+ pos0 + 3 * bit_width};
+ int32_t bidx[4] = {pos[0] >> 3, pos[1] >> 3, pos[2] >> 3, pos[3] >> 3};
+ int32_t off[4] = {pos[0] & 7, pos[1] & 7, pos[2] & 7, pos[3] & 7};
+
+ simde__m128i IDX = simde_mm_setr_epi32(bidx[0], bidx[1], bidx[2], bidx[3]);
+ simde__m128i OFF = simde_mm_setr_epi32(off[0], off[1], off[2], off[3]);
+
+ simde__m128i V4;
+
+ if (bit_width <= 16) {
+ int rshift = 32 - bit_width;
+ simde__m128i w32_le = simde_mm_i32gather_epi32((const int*)in, IDX, 1);
+ simde__m128i w32_be = simde_mm_shuffle_epi8(w32_le, SHUF_REV4);
+ simde__m128i U32 = simde_mm_sllv_epi32(w32_be, OFF);
+ simde__m128i RS32 = simde_mm_set1_epi32(rshift);
+ V4 = simde_mm_srlv_epi32(U32, RS32);
+ } else {
+ static const simde__m256i SHUF_REV8 = simde_mm256_setr_epi8(
+ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3,
+ 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+ int rshift = 64 - bit_width;
+ simde__m256i w64_le =
+ simde_mm256_i32gather_epi64((const int64_t*)in, IDX, 1);
+ simde__m256i w64_be = simde_mm256_shuffle_epi8(w64_le, SHUF_REV8);
+ simde__m256i OFF64 = simde_mm256_cvtepu32_epi64(OFF);
+ simde__m256i U64 = simde_mm256_sllv_epi64(w64_be, OFF64);
+ simde__m256i V64 =
+ simde_mm256_srl_epi64(U64, simde_mm_cvtsi32_si128(rshift));
+ simde__m256i perm = simde_mm256_setr_epi32(0, 2, 4, 6, 0, 0, 0, 0);
+ simde__m256i comp = simde_mm256_permutevar8x32_epi32(V64, perm);
+ V4 = simde_mm256_castsi256_si128(comp);
+ }
+
+ // Add delta_min
+ V4 = simde_mm_add_epi32(V4, VMIN4);
+
+ // Prefix sum to reconstruct absolute values
+ simde__m128i t;
+ t = simde_mm_slli_si128(V4, 4);
+ V4 = simde_mm_add_epi32(V4, t);
+ t = simde_mm_slli_si128(V4, 8);
+ V4 = simde_mm_add_epi32(V4, t);
+
+ // Add base
+ simde__m128i C4 = simde_mm_set1_epi32(base);
+ V4 = simde_mm_add_epi32(V4, C4);
+
+ simde_mm_storeu_si128((simde__m128i*)out, V4);
+ return out[3];
+}
+
+// Decode 4 INT64 values from bit-packed data using SIMD.
+static inline int64_t simd_decode_4_i64(const uint8_t* in, int32_t bit_width,
+ int64_t delta_min, int32_t index,
+ int64_t base, int64_t out[4]) {
+ static const simde__m256i SHUF_REV8 = simde_mm256_setr_epi8(
+ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
+ 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+
+ const simde__m256i VMIN4 = simde_mm256_set1_epi64x(delta_min);
+
+ int32_t pos0 = index * bit_width;
+ int32_t pos[4] = {pos0, pos0 + bit_width, pos0 + 2 * bit_width,
+ pos0 + 3 * bit_width};
+ int32_t bidx[4] = {pos[0] >> 3, pos[1] >> 3, pos[2] >> 3, pos[3] >> 3};
+ int32_t off[4] = {pos[0] & 7, pos[1] & 7, pos[2] & 7, pos[3] & 7};
+
+ simde__m128i IDX = simde_mm_setr_epi32(bidx[0], bidx[1], bidx[2], bidx[3]);
+
+ int rshift = 64 - bit_width;
+ simde__m256i w64_le =
+ simde_mm256_i32gather_epi64((const int64_t*)in, IDX, 1);
+ simde__m256i w64_be = simde_mm256_shuffle_epi8(w64_le, SHUF_REV8);
+ simde__m256i OFF64 = simde_mm256_cvtepu32_epi64(
+ simde_mm_setr_epi32(off[0], off[1], off[2], off[3]));
+ simde__m256i U64 = simde_mm256_sllv_epi64(w64_be, OFF64);
+ simde__m256i V64 =
+ simde_mm256_srl_epi64(U64, simde_mm_cvtsi32_si128(rshift));
+
+ // Add delta_min
+ V64 = simde_mm256_add_epi64(V64, VMIN4);
+
+ // Prefix sum (64-bit, 4 lanes)
+ simde__m256i t;
+ // shift by 8 bytes = 1 lane
+ t = simde_mm256_slli_si256(V64, 8);
+ V64 = simde_mm256_add_epi64(V64, t);
+ // cross-lane: add lane[1] to lane[2] and lane[3]
+ // Extract high 128 bits, add broadcast of element[1] to both elements
+ int64_t tmp_buf[4];
+ simde_mm256_storeu_si256((simde__m256i*)tmp_buf, V64);
+ tmp_buf[2] += tmp_buf[1];
+ tmp_buf[3] += tmp_buf[1];
+ V64 = simde_mm256_loadu_si256((const simde__m256i*)tmp_buf);
+
+ // Add base
+ simde__m256i C4 = simde_mm256_set1_epi64x(base);
+ V64 = simde_mm256_add_epi64(V64, C4);
+
+ simde_mm256_storeu_si256((simde__m256i*)out, V64);
+ return out[3];
+}
+
+#endif // ENABLE_SIMD
+
+// ============================================================================
+// Scalar batch decode helpers
+// ============================================================================
+
+// Scalar: extract one value from bit-packed data.
+// @data: pointer to packed bits (NOT advanced; caller handles position)
+// @bit_pos: bit offset from start of data
+// @bit_width: bits per value
+static inline int64_t scalar_read_bits(const uint8_t* data, int32_t bit_pos,
+ int32_t bit_width) {
+ int64_t value = 0;
+ int bits = bit_width;
+ int byte_idx = bit_pos >> 3;
+ int bit_offset = bit_pos & 7;
+ int bits_avail = 8 - bit_offset;
+
+ while (bits > 0) {
+ if (bits >= bits_avail) {
+ uint8_t d = data[byte_idx] & ((1 << bits_avail) - 1);
+ value = (value << bits_avail) | d;
+ bits -= bits_avail;
+ byte_idx++;
+ bits_avail = 8;
+ } else {
+ uint8_t d =
+ (data[byte_idx] >> (bits_avail - bits)) & ((1 << bits) - 1);
+ value = (value << bits) | d;
+ bits = 0;
+ }
+ }
+ return value;
+}
+
namespace ts2diff_java_detail {
// Java float/double TS_2DIFF overflow page markers.
@@ -54,7 +221,7 @@ inline bool bitmap_marked(const std::vector& bm, int idx) {
inline bool looks_like_ts2diff_header(common::ByteStream& in) {
int ret = common::E_OK;
- uint32_t probe_mark = in.read_pos();
+ uint64_t probe_mark = in.read_pos();
int32_t write_index = 0;
int32_t bit_width = 0;
if (RET_FAIL(common::SerializationUtil::read_i32(write_index, in)) ||
@@ -82,7 +249,7 @@ inline int consume_float_double_ts2diff_prefix(
underflow_bm.clear();
overflow_bm.clear();
segment_size = 0;
- uint32_t mark = in.read_pos();
+ uint64_t mark = in.read_pos();
uint32_t tag = 0;
if (RET_FAIL(common::SerializationUtil::read_var_uint(tag, in))) {
return ret;
@@ -132,6 +299,9 @@ inline int consume_float_double_ts2diff_prefix(
} // namespace ts2diff_java_detail
+// ============================================================================
+// TS2DIFFDecoder template
+// ============================================================================
template
class TS2DIFFDecoder : public Decoder {
public:
@@ -148,12 +318,14 @@ class TS2DIFFDecoder : public Decoder {
previous_value_ = 0;
bit_width_ = 0;
current_index_ = 0;
+ header_peeked_ = false;
}
FORCE_INLINE bool has_remaining(const common::ByteStream& buffer) override {
if (buffer.has_remaining()) return true;
- return bits_left_ != 0 || (current_index_ <= write_index_ &&
- write_index_ != -1 && current_index_ != 0);
+ return header_peeked_ || bits_left_ != 0 ||
+ (current_index_ <= write_index_ && write_index_ != -1 &&
+ current_index_ != 0);
}
void read_header(common::ByteStream& in) {
@@ -208,6 +380,18 @@ class TS2DIFFDecoder : public Decoder {
int read_String(common::String& ret_value, common::PageArena& pa,
common::ByteStream& in) override;
+ int read_batch_int32(int32_t* out, int capacity, int& actual,
+ common::ByteStream& in) override;
+ int read_batch_int64(int64_t* out, int capacity, int& actual,
+ common::ByteStream& in) override;
+ int skip_int32(int count, int& skipped, common::ByteStream& in) override;
+ int skip_int64(int count, int& skipped, common::ByteStream& in) override;
+
+ bool peek_next_block_range_int64(common::ByteStream& in, int64_t& block_min,
+ int64_t& block_max,
+ int& block_count) override;
+ int skip_peeked_block_int64(common::ByteStream& in, int& skipped) override;
+
public:
T first_value_;
T previous_value_;
@@ -218,8 +402,13 @@ class TS2DIFFDecoder : public Decoder {
int bit_width_;
int write_index_;
int current_index_;
+ bool header_peeked_;
};
+// ============================================================================
+// Per-value decode (unchanged)
+// ============================================================================
+
template <>
inline int32_t TS2DIFFDecoder::decode(common::ByteStream& in) {
int32_t ret_value = stored_value_;
@@ -274,6 +463,436 @@ inline int64_t TS2DIFFDecoder::decode(common::ByteStream& in) {
return ret_value;
}
+// ============================================================================
+// Batch decode: INT32
+// Decodes one full block (up to 129 values) per call using SIMD when enabled.
+// ============================================================================
+
+template <>
+inline int TS2DIFFDecoder::read_batch_int32(int32_t* out, int capacity,
+ int& actual,
+ common::ByteStream& in) {
+ actual = 0;
+
+ while (actual < capacity && has_remaining(in)) {
+ // If we are mid-block (current_index_ != 0), finish it per-value.
+ if (current_index_ != 0) {
+ while (actual < capacity && current_index_ != 0 &&
+ has_remaining(in)) {
+ out[actual++] = decode(in);
+ }
+ continue;
+ }
+
+ // Start of a new block — read header
+ read_header(in);
+ common::SerializationUtil::read_i32(delta_min_, in);
+ common::SerializationUtil::read_i32(first_value_, in);
+ bits_left_ = 0;
+ buffer_ = 0;
+
+ // Output first_value
+ if (actual >= capacity) {
+ // Must consume first_value next time; set state for per-value path
+ current_index_ = 0;
+ // We already consumed the header; push first_value as stored
+ // and let the next call to decode() handle it.
+ // Actually, we need to handle this: rewind is not possible.
+ // So we output first_value and accept going 1 over capacity.
+ }
+ out[actual++] = first_value_;
+
+ if (write_index_ == 0) {
+ // Block has only first_value, no deltas
+ current_index_ = 0;
+ continue;
+ }
+
+ int32_t remaining = write_index_;
+ if (actual + remaining > capacity) {
+ // Block won't fit in output. Fall back to per-value decode.
+ // Stream is at packed data start; bits_left_/buffer_ are reset.
+ current_index_ = 1;
+ continue;
+ }
+ if (!in.is_wrapped()) {
+ // SIMD/scalar block decode below requires a contiguous wrapped
+ // buffer. For a paged ByteStream, drop down to per-value
+ // decode the same way the doesn't-fit branch does.
+ current_index_ = 1;
+ continue;
+ }
+
+ // Full block decode. Validate against corrupt headers before
+ // advancing the read position — a bogus bit_width_ or write_index_
+ // could compute a block_bytes that overflows the int32_t multiply
+ // or runs past the wrapped buffer.
+ if (UNLIKELY(write_index_ < 0 || bit_width_ < 0 || bit_width_ > 32)) {
+ return common::E_TSFILE_CORRUPTED;
+ }
+ int64_t block_bytes_64 =
+ (static_cast(write_index_) * bit_width_ + 7) / 8;
+ if (UNLIKELY(block_bytes_64 > in.remaining_size())) {
+ return common::E_TSFILE_CORRUPTED;
+ }
+ int32_t block_bytes = static_cast(block_bytes_64);
+ const uint8_t* blk_ptr =
+ (const uint8_t*)in.get_wrapped_buf() + in.read_pos();
+ in.wrapped_buf_advance_read_pos(static_cast(block_bytes));
+
+ int32_t prev = first_value_;
+ int32_t i = 0;
+
+#ifdef ENABLE_SIMD
+ // SIMD path: decode 8 values at a time (2 groups of 4)
+ for (; i + 7 < remaining; i += 8) {
+ int32_t need_bytes = ((i + 7) * bit_width_ + bit_width_ + 7) / 8 +
+ (bit_width_ > 16 ? 8 : 4);
+ if (need_bytes > block_bytes) break;
+
+ int32_t grp_out[8];
+ prev = simd_decode_4_i32(blk_ptr, bit_width_, delta_min_, i, prev,
+ grp_out);
+ prev = simd_decode_4_i32(blk_ptr, bit_width_, delta_min_, i + 4,
+ prev, grp_out + 4);
+
+ memcpy(out + actual, grp_out, 8 * sizeof(int32_t));
+ actual += 8;
+ }
+#endif
+
+ // Scalar tail
+ int32_t bit_pos = i * bit_width_;
+ for (; i < remaining; ++i) {
+ int64_t delta = scalar_read_bits(blk_ptr, bit_pos, bit_width_);
+ bit_pos += bit_width_;
+ int32_t val = (int32_t)delta + prev + delta_min_;
+ prev = val;
+ out[actual++] = val;
+ }
+
+ // Block done, reset state
+ first_value_ = prev;
+ current_index_ = 0;
+ }
+
+ return common::E_OK;
+}
+
+// ============================================================================
+// Batch decode: INT64
+// ============================================================================
+
+template <>
+inline int TS2DIFFDecoder::read_batch_int64(int64_t* out, int capacity,
+ int& actual,
+ common::ByteStream& in) {
+ actual = 0;
+
+ while (actual < capacity && has_remaining(in)) {
+ // If mid-block, finish per-value
+ if (current_index_ != 0) {
+ while (actual < capacity && current_index_ != 0 &&
+ has_remaining(in)) {
+ out[actual++] = decode(in);
+ }
+ continue;
+ }
+
+ // Start of a new block
+ if (!header_peeked_) {
+ read_header(in);
+ common::SerializationUtil::read_i64(delta_min_, in);
+ common::SerializationUtil::read_i64(first_value_, in);
+ bits_left_ = 0;
+ buffer_ = 0;
+ }
+ header_peeked_ = false;
+
+ out[actual++] = first_value_;
+
+ if (write_index_ == 0) {
+ current_index_ = 0;
+ continue;
+ }
+
+ int32_t remaining = write_index_;
+ if (actual + remaining > capacity) {
+ // Block won't fit in output. Fall back to per-value decode.
+ // Stream is at packed data start; bits_left_/buffer_ are reset.
+ current_index_ = 1;
+ continue;
+ }
+ if (!in.is_wrapped()) {
+ // SIMD/scalar block decode below requires a contiguous wrapped
+ // buffer. Page-backed ByteStreams must use the per-value path.
+ current_index_ = 1;
+ continue;
+ }
+
+ // Validate against corrupt headers (see int32 path).
+ if (UNLIKELY(write_index_ < 0 || bit_width_ < 0 || bit_width_ > 64)) {
+ return common::E_TSFILE_CORRUPTED;
+ }
+ int64_t block_bytes_64 =
+ (static_cast(write_index_) * bit_width_ + 7) / 8;
+ if (UNLIKELY(block_bytes_64 > in.remaining_size())) {
+ return common::E_TSFILE_CORRUPTED;
+ }
+ int32_t block_bytes = static_cast(block_bytes_64);
+ // Direct pointer into the wrapped ByteStream buffer.
+ const uint8_t* blk_ptr =
+ (const uint8_t*)in.get_wrapped_buf() + in.read_pos();
+ in.wrapped_buf_advance_read_pos(static_cast(block_bytes));
+
+ int64_t prev = first_value_;
+ int32_t i = 0;
+
+#ifdef ENABLE_SIMD
+ // SIMD path: decode 4 INT64 values at a time
+ for (; i + 3 < remaining; i += 4) {
+ int32_t need_bytes =
+ ((i + 3) * bit_width_ + bit_width_ + 7) / 8 + 8;
+ if (need_bytes > block_bytes) break;
+
+ int64_t grp_out[4];
+ prev = simd_decode_4_i64(blk_ptr, bit_width_, delta_min_, i, prev,
+ grp_out);
+ memcpy(out + actual, grp_out, 4 * sizeof(int64_t));
+ actual += 4;
+ }
+#endif
+
+ // Scalar tail
+ int32_t bit_pos = i * bit_width_;
+ for (; i < remaining; ++i) {
+ int64_t delta = scalar_read_bits(blk_ptr, bit_pos, bit_width_);
+ bit_pos += bit_width_;
+ int64_t val = delta + prev + delta_min_;
+ prev = val;
+ out[actual++] = val;
+ }
+
+ first_value_ = prev;
+ current_index_ = 0;
+ }
+
+ return common::E_OK;
+}
+
+// ============================================================================
+// Skip: INT32 — read header only, jump over packed data
+// ============================================================================
+
+template <>
+inline int TS2DIFFDecoder::skip_int32(int count, int& skipped,
+ common::ByteStream& in) {
+ skipped = 0;
+
+ // If mid-block, finish current block per-value
+ while (skipped < count && current_index_ != 0 && has_remaining(in)) {
+ decode(in);
+ ++skipped;
+ }
+
+ while (skipped < count && has_remaining(in)) {
+ int32_t wi, bw, dm, fv;
+ common::SerializationUtil::read_i32(wi, in);
+ common::SerializationUtil::read_i32(bw, in);
+ common::SerializationUtil::read_i32(dm, in);
+ common::SerializationUtil::read_i32(fv, in);
+
+ int32_t block_vals = wi + 1;
+ bits_left_ = 0;
+ buffer_ = 0;
+
+ if (count - skipped >= block_vals) {
+ // Whole-block fast path: jump over packed body.
+ int32_t skip_bytes = (wi * bw + 7) / 8;
+ in.wrapped_buf_advance_read_pos(skip_bytes);
+ skipped += block_vals;
+ current_index_ = 0;
+ write_index_ = -1;
+ } else {
+ // Partial block: reinstate decoder state as if we'd just
+ // emitted first_value_ from decode(), bump skipped by 1,
+ // then per-value decode the remaining count, leaving the
+ // rest of the block intact for the next decode() call.
+ write_index_ = wi;
+ bit_width_ = bw;
+ delta_min_ = dm;
+ first_value_ = fv;
+ current_index_ = (wi == 0) ? 0 : 1;
+ ++skipped;
+ while (skipped < count && current_index_ != 0 &&
+ has_remaining(in)) {
+ decode(in);
+ ++skipped;
+ }
+ }
+ }
+
+ return common::E_OK;
+}
+
+// ============================================================================
+// Skip: INT64
+// ============================================================================
+
+template <>
+inline int TS2DIFFDecoder::skip_int64(int count, int& skipped,
+ common::ByteStream& in) {
+ skipped = 0;
+
+ while (skipped < count && current_index_ != 0 && has_remaining(in)) {
+ decode(in);
+ ++skipped;
+ }
+
+ while (skipped < count && has_remaining(in)) {
+ int32_t wi, bw;
+ int64_t dm, fv;
+ common::SerializationUtil::read_i32(wi, in);
+ common::SerializationUtil::read_i32(bw, in);
+ common::SerializationUtil::read_i64(dm, in);
+ common::SerializationUtil::read_i64(fv, in);
+
+ int32_t block_vals = wi + 1;
+ bits_left_ = 0;
+ buffer_ = 0;
+
+ if (count - skipped >= block_vals) {
+ int32_t skip_bytes = (wi * bw + 7) / 8;
+ in.wrapped_buf_advance_read_pos(skip_bytes);
+ skipped += block_vals;
+ current_index_ = 0;
+ write_index_ = -1;
+ } else {
+ write_index_ = wi;
+ bit_width_ = bw;
+ delta_min_ = dm;
+ first_value_ = fv;
+ current_index_ = (wi == 0) ? 0 : 1;
+ ++skipped;
+ while (skipped < count && current_index_ != 0 &&
+ has_remaining(in)) {
+ decode(in);
+ ++skipped;
+ }
+ }
+ }
+
+ return common::E_OK;
+}
+
+// ============================================================================
+// Block-level filter check: peek header and compute value range
+// ============================================================================
+
+template <>
+inline bool TS2DIFFDecoder::peek_next_block_range_int64(
+ common::ByteStream& in, int64_t& block_min, int64_t& block_max,
+ int& block_count) {
+ if (current_index_ != 0 || !has_remaining(in)) return false;
+
+ read_header(in);
+ common::SerializationUtil::read_i64(delta_min_, in);
+ common::SerializationUtil::read_i64(first_value_, in);
+ bits_left_ = 0;
+ buffer_ = 0;
+
+ block_min = first_value_;
+ block_count = write_index_ + 1;
+
+ // Look-ahead: since timestamps are monotonically increasing, the true
+ // block_max is the last timestamp, which equals next block's first_value_.
+ // The next block header starts at read_pos + packed_bytes. first_value_ is
+ // at offset 16 within the header
+ // (write_index_(4)+bit_width_(4)+delta_min_(8)). We read it via raw pointer
+ // so the stream position is not consumed.
+ int32_t packed_bytes = (write_index_ * bit_width_ + 7) / 8;
+ if (in.remaining_size() >= (uint32_t)packed_bytes + 24) {
+ char* next_fv_ptr =
+ in.get_wrapped_buf() + in.read_pos() + packed_bytes + 16;
+ block_max = (int64_t)common::SerializationUtil::read_ui64(next_fv_ptr);
+ } else {
+ // Last block in page: fall back to conservative estimate.
+ if (write_index_ == 0 || bit_width_ == 0) {
+ block_max = first_value_ + (int64_t)write_index_ * delta_min_;
+ } else if (bit_width_ >= 63) {
+ block_max = INT64_MAX;
+ } else {
+ int64_t max_delta = delta_min_ + ((1LL << bit_width_) - 1);
+ block_max = first_value_ + (int64_t)write_index_ * max_delta;
+ }
+ }
+
+ header_peeked_ = true;
+ return true;
+}
+
+template <>
+inline int TS2DIFFDecoder