Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .github/workflows/R-CMD-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ on:
- "**.R[dD]ata"
- "**.Rpro*"
pull_request:
branches:
- main
- master
branches: ["*"]
paths-ignore:
- "Meta**"
- "memcheck**"
Expand Down
1 change: 1 addition & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ binary 0/1 in an underlying `raw` object.
before moving on to the next task.
- Increment the `.900X` dev version suffix in `DESCRIPTION` with each
`NEWS.md` update.
- Check that existing tests cover all new code. (The GHA test suite uses codecov.)
5 changes: 3 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# TreeTools 2.1.0.9005 (2026-03-13) #

- `SplitFrequency(reference = NULL)` split normalization moved to C++,
eliminating an R-level per-split loop.
- `SplitFrequency(reference = NULL)` performance improvements:
split normalization moved to C++; internal split de-duplication uses
hash map instead of ordered map.

# TreeTools 2.1.0.9003 (2026-03-09) #

Expand Down
24 changes: 14 additions & 10 deletions src/consensus.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ using namespace Rcpp;

#include <algorithm> /* for fill */
#include <array> /* for array */
#include <map> /* for map */
#include <string> /* for string (hash key) */
#include <unordered_map> /* for unordered_map */

using TreeTools::ct_stack_threshold;
using TreeTools::ct_max_leaves_heap;
Expand Down Expand Up @@ -178,12 +179,15 @@ List calc_split_frequencies(

StackEntry *const S_start = S.data();

// Use a map to store unique splits and their counts
// Key: split bit pattern; Value: index in output
std::map<std::vector<Rbyte>, int32> split_map;
// Hash map for O(1) amortized split deduplication
std::unordered_map<std::string, int32> split_map;
split_map.reserve(ntip_3 * 2);
std::vector<std::vector<Rbyte>> split_patterns;
std::vector<int32> counts;

// Reusable key buffer — avoids per-split heap allocation
std::string key(nbin, '\0');

for (int32 i = 0; i < n_trees; ++i) {
if (tables[i].NOSWX(ntip_3)) {
continue;
Expand Down Expand Up @@ -247,21 +251,21 @@ List calc_split_frequencies(
const int32 end = tables[i].X_right(k + 1);
if (start == 0 && end == 0) continue; // No valid cluster at this position

// Build the bit pattern for this split
std::vector<Rbyte> pattern(nbin, 0);
// Build the bit pattern into the reusable key buffer
std::fill(key.begin(), key.end(), '\0');
for (int32 j = start; j <= end; ++j) {
const int32 leaf_idx = tables[i].DECODE(j) - 1; // 0-based
const int32 byte_idx = leaf_idx >> 3;
const int32 bit_idx = leaf_idx & 7;
pattern[byte_idx] |= (Rbyte(1) << bit_idx);
key[byte_idx] |= static_cast<char>(1 << bit_idx);
}

auto it = split_map.find(pattern);
auto it = split_map.find(key);
if (it == split_map.end()) {
// New split: record it with count from this reference tree
const int32 idx = split_patterns.size();
split_map[pattern] = idx;
split_patterns.push_back(std::move(pattern));
split_map.emplace(key, idx);
split_patterns.emplace_back(key.begin(), key.end());
counts.push_back(split_count[k]);
}
// If already found, the first reference tree that found it has the
Expand Down
13 changes: 13 additions & 0 deletions tests/testthat/test-Support.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,19 @@ test_that("Node support colours consistent", {
c("oor", "1", "34", "67", "101"))
})

test_that("normalize_splits() covers all branches", {
# n_spare == 0 (nTip multiple of 8): complement without trailing-bit mask
trees16 <- c(PectinateTree(16), PectinateTree(16))
freq16 <- SplitFrequency(trees16)
expect_equal(length(freq16), NSplits(trees16[[1]]))
expect_true(all(attr(freq16, "count") == 2L))

# Trivial splits are filtered: a star tree produces no non-trivial splits
star <- c(StarTree(10), StarTree(10))
freq_star <- SplitFrequency(star)
expect_equal(length(freq_star), 0)
})

test_that("SplitFrequency() handles four-split trees", {
trees <- AddTipEverywhere(BalancedTree(3))
trees <- c(trees[1], trees)
Expand Down
Loading