Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,11 @@ binary 0/1 in an underlying `raw` object.
- Increment the `.900X` dev version suffix in `DESCRIPTION` with each
`NEWS.md` update.
- Check that existing tests cover all new code. (The GHA test suite uses codecov.)

## Optimization notes

- `descendant_edges_single()`: the O(n_edge) linear scan per node looks
theoretically O(n²), but benchmarking (CSR index, vector-of-vectors)
showed the original is faster at all practical sizes (up to 50k tips)
due to cache-friendly sequential access over contiguous Rcpp memory.
Not worth optimizing.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: TreeTools
Title: Create, Modify and Analyse Phylogenetic Trees
Version: 2.1.0.9005
Version: 2.1.0.9006
Authors@R: c(
person("Martin R.", 'Smith', role = c("aut", "cre", "cph"),
email = "martin.smith@durham.ac.uk",
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# TreeTools 2.1.0.9006 (2026-03-13) #

- `NodeDepth()` for unrooted trees rewritten as O(n) two-pass C++ algorithm,
replacing iterative R while-loop.

# TreeTools 2.1.0.9005 (2026-03-13) #

- `SplitFrequency(reference = NULL)` performance improvements:
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ n_cherries_wrapper <- function(parent, child, nTip) {
.Call(`_TreeTools_n_cherries_wrapper`, parent, child, nTip)
}

node_depth_unrooted <- function(parent, child, postorder, shortest) {
.Call(`_TreeTools_node_depth_unrooted`, parent, child, postorder, shortest)
}

path_lengths <- function(edge, weight, init_nas) {
.Call(`_TreeTools_path_lengths`, edge, weight, init_nas)
}
Expand Down
74 changes: 3 additions & 71 deletions R/tree_properties.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,81 +79,13 @@ NodeDepth.phylo <- function(x, shortest = FALSE, includeTips = TRUE) {

#' @export
NodeDepth.matrix <- function(x, shortest = FALSE, includeTips = TRUE) {


.NodeDepth.short <- function() {

depths <- c(leaf0s, vapply(minVertex:nVertex, function(node)
if (any(!is.na(leaf0s[child[parent == node]]))) 1L else NA_integer_
, 0L))
maxDepth <- 1L

while(any(is.na(depths))) {
for (node in rev(which(is.na(depths)))) {
incident <- c(depths[child[parent == node]],
depths[parent[child == node]])
na <- is.na(incident)
nNa <- sum(na)
if (nNa == 0L) {
depths[node] <- min(incident) + 1L
} else if (nNa == 1L) {
aIncident <- incident[!na]
if (all(aIncident <= maxDepth)) {
depths[node] <- min(aIncident) + 1L
}
}
}
maxDepth <- maxDepth + 1L
}

#Return:
depths
}

.NodeDepth.long <- function() {

depths <- c(leaf0s, vapply(minVertex:nVertex, function(node)
if (any(is.na(leaf0s[child[parent == node]]))) NA_integer_ else 1L
, 0L))
maxDepth <- 1L

while(any(is.na(depths))) {
for (node in rev(which(is.na(depths)))) {
incident <- c(depths[child[parent == node]],
depths[parent[child == node]])
na <- is.na(incident)
nNa <- sum(na)
if (nNa == 0L) {
depths[node] <- sort(incident, decreasing = TRUE)[2] + 1L
} else if (nNa == 1L) {
aIncident <- incident[!na]
if (all(aIncident <= maxDepth)) {
depths[node] <- max(aIncident) + 1L
}
}
}
maxDepth <- maxDepth + 1L
}

#Return:
depths
}


parent <- x[, 1]
child <- x[, 2]
minVertex <- min(parent)
nVertex <- max(parent)

nLeaf <- minVertex - 1L
nNode <- nVertex - nLeaf
leaf0s <- integer(nLeaf)

depths <- if (shortest) .NodeDepth.short() else .NodeDepth.long()
postorder <- PostorderOrder(x)
depths <- node_depth_unrooted(parent, child, postorder, shortest)

# Return:
if (includeTips) depths else depths[minVertex:nVertex]

if (includeTips) depths else depths[(min(parent)):max(parent)]
}

.NodeDepth.rooted <- function(x, shortest = FALSE, includeTips = TRUE) {
Expand Down
15 changes: 15 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,20 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// node_depth_unrooted
IntegerVector node_depth_unrooted(const IntegerVector parent, const IntegerVector child, const IntegerVector postorder, const bool shortest);
RcppExport SEXP _TreeTools_node_depth_unrooted(SEXP parentSEXP, SEXP childSEXP, SEXP postorderSEXP, SEXP shortestSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const IntegerVector >::type parent(parentSEXP);
Rcpp::traits::input_parameter< const IntegerVector >::type child(childSEXP);
Rcpp::traits::input_parameter< const IntegerVector >::type postorder(postorderSEXP);
Rcpp::traits::input_parameter< const bool >::type shortest(shortestSEXP);
rcpp_result_gen = Rcpp::wrap(node_depth_unrooted(parent, child, postorder, shortest));
return rcpp_result_gen;
END_RCPP
}
// path_lengths
NumericMatrix path_lengths(const IntegerMatrix edge, const DoubleVector weight, const LogicalVector init_nas);
RcppExport SEXP _TreeTools_path_lengths(SEXP edgeSEXP, SEXP weightSEXP, SEXP init_nasSEXP) {
Expand Down Expand Up @@ -516,6 +530,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_TreeTools_kept_vertices", (DL_FUNC) &_TreeTools_kept_vertices, 2},
{"_TreeTools_minimum_spanning_tree", (DL_FUNC) &_TreeTools_minimum_spanning_tree, 1},
{"_TreeTools_n_cherries_wrapper", (DL_FUNC) &_TreeTools_n_cherries_wrapper, 3},
{"_TreeTools_node_depth_unrooted", (DL_FUNC) &_TreeTools_node_depth_unrooted, 4},
{"_TreeTools_path_lengths", (DL_FUNC) &_TreeTools_path_lengths, 3},
{"_TreeTools_cpp_edge_to_splits", (DL_FUNC) &_TreeTools_cpp_edge_to_splits, 3},
{"_TreeTools_duplicated_splits", (DL_FUNC) &_TreeTools_duplicated_splits, 2},
Expand Down
141 changes: 141 additions & 0 deletions src/node_depth.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#include <Rcpp/Lightest>
#include <algorithm> /* for min_element, max_element, sort */
#include <vector>
using namespace Rcpp;

// Compute node depth for unrooted trees in O(n) via two-pass traversal.
// depth[v] = second-largest neighbour distance (shortest=false)
// or smallest neighbour distance (shortest=true)
// Tips have depth 0.
// Returns depths for all vertices 1..max_node.
// [[Rcpp::export]]
IntegerVector node_depth_unrooted(
const IntegerVector parent,
const IntegerVector child,
const IntegerVector postorder, // 1-based edge indices in postorder
const bool shortest
) {
const int n_edge = parent.size();
const int root = *std::min_element(parent.begin(), parent.end());
const int n_tip = root - 1;
const int max_node = *std::max_element(parent.begin(), parent.end());

// subtree_h[v]: max (or min) depth below v in the rooted representation
// Tips: 0. Internal: func(children's subtree_h + 1)
std::vector<int> subtree_h(max_node + 1, 0);

// For longest path: store top-2 subtree heights per node
// For shortest path: store bottom-2
// best1[v] >= best2[v] (longest) or best1[v] <= best2[v] (shortest)
// best1_child[v] = which child contributed best1
const int INF = 1000000;
const int NEG_INF = -1;
std::vector<int> best1(max_node + 1, shortest ? INF : NEG_INF);
std::vector<int> best2(max_node + 1, shortest ? INF : NEG_INF);
std::vector<int> best1_child(max_node + 1, -1);

// Postorder: compute subtree_h and best1/best2
for (int i = 0; i < n_edge; ++i) {
const int e = postorder[i] - 1; // 0-based
const int p = parent[e];
const int c = child[e];
const int ch = subtree_h[c] + 1; // height through this child

if (shortest) {
if (ch <= best1[p]) {
best2[p] = best1[p];
best1[p] = ch;
best1_child[p] = c;
} else if (ch < best2[p]) {
best2[p] = ch;
}
subtree_h[p] = best1[p];
} else {
if (ch >= best1[p]) {
best2[p] = best1[p];
best1[p] = ch;
best1_child[p] = c;
} else if (ch > best2[p]) {
best2[p] = ch;
}
subtree_h[p] = best1[p];
}
}

// from_above[v]: longest (or shortest) path from v going through parent
std::vector<int> from_above(max_node + 1, NEG_INF);
// Root has no parent; from_above[root] stays at NEG_INF

// Preorder: compute from_above (reverse postorder)
for (int i = n_edge - 1; i >= 0; --i) {
const int e = postorder[i] - 1;
const int p = parent[e];
const int c = child[e];

// Best alternative at p excluding path through c:
// If c contributed best1[p], use best2[p]; otherwise use best1[p]
int best_sibling;
if (best1_child[p] == c) {
best_sibling = best2[p];
} else {
best_sibling = best1[p];
}

if (shortest) {
// from_above[c] = 1 + min(from_above[p], best_sibling)
if (from_above[p] == NEG_INF) {
// p is root (or from_above not set): only siblings matter
from_above[c] = (best_sibling < INF) ? 1 + best_sibling : INF;
} else {
from_above[c] = 1 + std::min(from_above[p], best_sibling);
}
} else {
if (from_above[p] == NEG_INF) {
from_above[c] = (best_sibling > NEG_INF) ? 1 + best_sibling : NEG_INF;
} else {
from_above[c] = 1 + std::max(from_above[p], best_sibling);
}
}
}

// Combine: depth[v] for internal nodes
IntegerVector depths(max_node, 0); // 1-indexed via [v-1]; tips stay 0

for (int v = n_tip + 1; v <= max_node; ++v) {
// Collect all neighbour distances
// Children contribute subtree_h[child] + 1 (already in best1/best2)
// Parent direction contributes from_above[v]

if (shortest) {
int d = best1[v]; // smallest child path
if (from_above[v] != NEG_INF && from_above[v] < d) {
d = from_above[v];
}
depths[v - 1] = d;
} else {
// Need second-largest among all neighbour distances
// Neighbours: best1[v], best2[v], from_above[v]
// (best1 >= best2 for longest)
// Sort descending and take [1] (second largest)
int a = best1[v];
int b = best2[v];
int c_val = from_above[v];
// Ensure we handle cases where from_above is undefined (root)
if (c_val == NEG_INF) {
// Root node: only children matter; second largest = best2
depths[v - 1] = (b > NEG_INF) ? b : a;
} else {
// Three candidates: a >= b, c_val unknown
if (c_val >= a) {
depths[v - 1] = a; // c_val >= a >= b, second = a
} else if (c_val >= b) {
depths[v - 1] = c_val; // a > c_val >= b, second = c_val
} else {
depths[v - 1] = b; // a >= b > c_val, second = b
}
}
}
}

return depths;
}
Loading