diff --git a/inst/include/TreeTools/SplitList.h b/inst/include/TreeTools/SplitList.h index e827d8d9..067f069b 100644 --- a/inst/include/TreeTools/SplitList.h +++ b/inst/include/TreeTools/SplitList.h @@ -13,15 +13,19 @@ using splitbit = uint_fast64_t; #define R_BIN_SIZE int16(8) #define SL_BIN_SIZE int16(64) -#define SL_MAX_BINS int16(32) +#define SL_MAX_BINS int16(512) -/* * Stack allocation limits (Legacy support for speed) - * Trees smaller than this will use stack arrays. - * Trees larger will trigger heap allocation. - */ -#define SL_MAX_TIPS (SL_BIN_SIZE * SL_MAX_BINS) // 2048 +#define SL_MAX_TIPS (SL_BIN_SIZE * SL_MAX_BINS) // 32768 #define SL_MAX_SPLITS (SL_MAX_TIPS - 3) +/* Stack allocation thresholds. + * Trees with n_splits <= SL_STACK_SPLITS AND n_bins <= SL_STACK_BINS + * use fast stack arrays; larger trees fall back to heap allocation. + * Kept at the pre-v1.16 values to avoid bloating SplitList objects. + */ +#define SL_STACK_BINS int16(32) +#define SL_STACK_SPLITS int16(SL_BIN_SIZE * SL_STACK_BINS - 3) // 2045 + #define INLASTBIN(n, size) int16((size) - int16((size) - int16((n) % (size))) % (size)) #define INSUBBIN(bin, offset) \ splitbit(x(split, ((bin) * input_bins_per_bin) + (offset))) @@ -75,10 +79,10 @@ namespace TreeTools { splitbit** state; private: - /* STACK STORAGE (Fast path for small trees) */ - int32 stack_in_split[SL_MAX_SPLITS]; - splitbit stack_state[SL_MAX_SPLITS][SL_MAX_BINS]; - splitbit* stack_rows[SL_MAX_SPLITS]; + /* STACK STORAGE (Fast path for small trees ≤ SL_STACK_SPLITS splits) */ + int32 stack_in_split[SL_STACK_SPLITS]; + splitbit stack_state[SL_STACK_SPLITS][SL_STACK_BINS]; + splitbit* stack_rows[SL_STACK_SPLITS]; /* HEAP STORAGE (Large trees) */ std::vector heap_in_split; @@ -102,7 +106,7 @@ namespace TreeTools { ASSERT(n_input_bins > 0); n_bins = int32(n_input_bins + R_BIN_SIZE - 1) / input_bins_per_bin; - bool use_heap = (n_splits > SL_MAX_SPLITS) || (n_bins > SL_MAX_BINS); + bool use_heap = (n_splits > SL_STACK_SPLITS) || (n_bins > SL_STACK_BINS); if (use_heap) { heap_in_split.resize(n_splits, 0); diff --git a/src/splits_to_tree.cpp b/src/splits_to_tree.cpp index 11ed9eab..b7b518cd 100644 --- a/src/splits_to_tree.cpp +++ b/src/splits_to_tree.cpp @@ -31,11 +31,13 @@ IntegerMatrix splits_to_edge(const RawMatrix splits, const IntegerVector nTip) { const SplitList x(splits); // Decide whether to use stack or heap allocation based on tree size - const bool use_heap = (n_tip > SL_MAX_TIPS) || (x.n_splits > SL_MAX_SPLITS); + // Use stack arrays for small trees, heap for large + constexpr int32 stack_tip_lim = SL_BIN_SIZE * SL_STACK_BINS; + const bool use_heap = (n_tip > stack_tip_lim) || (x.n_splits > SL_STACK_SPLITS); // Stack allocation for small trees (fast path) - alignas(64) std::array stack_parent{}; - alignas(64) std::array stack_patriarch{}; + alignas(64) std::array stack_parent{}; + alignas(64) std::array stack_patriarch{}; // Heap allocation for large trees std::vector heap_parent;