diff --git a/.positai/settings.json b/.positai/settings.json index 500757de..645911e2 100644 --- a/.positai/settings.json +++ b/.positai/settings.json @@ -14,6 +14,7 @@ }, "model": { "id": "claude-opus-4-6", - "provider": "positai" + "provider": "positai", + "thinkingEffort": "high" } } \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md index 4b42e822..f01ca11f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -34,7 +34,9 @@ binary 0/1 in an underlying `raw` object. before moving on to the next task. - Increment the `.900X` dev version suffix in `DESCRIPTION` with each `NEWS.md` update. -- Check that existing tests cover all new code. (The GHA test suite uses codecov.) +- All new and changed code must have test coverage. The GHA test suite uses + codecov; uncovered lines will block the PR. Cover happy paths, error + branches, and edge cases (e.g. early returns). ## Optimization notes diff --git a/NEWS.md b/NEWS.md index 5d1315bb..541b04d3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,8 @@ replacing iterative R while-loop. - `duplicated.Splits()` uses hash-based O(n) de-duplication, replacing O(n²) pairwise comparison. +- `RenumberTips.multiPhylo()` applies tip permutation in a single C++ call, + avoiding per-tree overhead. ## Fixes diff --git a/R/RcppExports.R b/R/RcppExports.R index 27743d54..ee96e8d0 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -93,6 +93,10 @@ path_lengths <- function(edge, weight, init_nas) { .Call(`_TreeTools_path_lengths`, edge, weight, init_nas) } +renumber_tips_batch <- function(trees, perm, n_tip, new_labels) { + .Call(`_TreeTools_renumber_tips_batch`, trees, perm, n_tip, new_labels) +} + cpp_edge_to_splits <- function(edge, order, nTip) { .Call(`_TreeTools_cpp_edge_to_splits`, edge, order, nTip) } diff --git a/R/tree_numbering.R b/R/tree_numbering.R index 49264c46..1e95b8c3 100644 --- a/R/tree_numbering.R +++ b/R/tree_numbering.R @@ -665,10 +665,48 @@ RenumberTips.Splits <- function(tree, tipOrder) { RenumberTips.multiPhylo <- function(tree, tipOrder) { at <- attributes(tree) labelled <- !is.null(at[["TipLabel"]]) - tree <- lapply(tree, RenumberTips.phylo, tipOrder) - if (labelled) { - at[["TipLabel"]] <- TipLabels(tipOrder) + + startOrder <- if (labelled) at[["TipLabel"]] else tree[[1L]][["tip.label"]] + newOrder <- if (is.numeric(tipOrder)) { + startOrder[tipOrder] + } else { + TipLabels(tipOrder, single = TRUE) + } + + if (identical(startOrder, newOrder)) return(tree) + + if (any(duplicated(newOrder))) { + stop("Tree labels ", + paste0(newOrder[duplicated(newOrder)], collapse = ", "), + " repeated in `tipOrder`") } + + if (length(startOrder) != length(newOrder)) { + startOnly <- setdiff(startOrder, newOrder) + newOnly <- setdiff(newOrder, startOrder) + if (length(startOnly)) { + stop("Tree labels and tipOrder must match.", + if (length(newOnly)) "\n Missing in `tree`: ", + paste0(newOnly, collapse = ", "), + if (length(startOnly)) "\n Missing in `tipOrder`: ", + paste0(startOnly, collapse = ", ") + ) + } + newOrder <- intersect(newOrder, startOrder) + } + + nTip <- length(startOrder) + matchOrder <- match(startOrder, newOrder) + if (any(is.na(matchOrder))) { + stop("Tree labels ", + paste0(startOrder[is.na(matchOrder)], collapse = ", "), + " missing from `tipOrder`") + } + + tree <- .Call(`_TreeTools_renumber_tips_batch`, tree, matchOrder, nTip, + newOrder) + + if (labelled) at[["TipLabel"]] <- newOrder attributes(tree) <- at tree } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 4cf5bd0f..123c171a 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -298,6 +298,20 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// renumber_tips_batch +Rcpp::List renumber_tips_batch(Rcpp::List trees, const Rcpp::IntegerVector perm, int n_tip, const Rcpp::CharacterVector new_labels); +RcppExport SEXP _TreeTools_renumber_tips_batch(SEXP treesSEXP, SEXP permSEXP, SEXP n_tipSEXP, SEXP new_labelsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type trees(treesSEXP); + Rcpp::traits::input_parameter< const Rcpp::IntegerVector >::type perm(permSEXP); + Rcpp::traits::input_parameter< int >::type n_tip(n_tipSEXP); + Rcpp::traits::input_parameter< const Rcpp::CharacterVector >::type new_labels(new_labelsSEXP); + rcpp_result_gen = Rcpp::wrap(renumber_tips_batch(trees, perm, n_tip, new_labels)); + return rcpp_result_gen; +END_RCPP +} // cpp_edge_to_splits Rcpp::RawMatrix cpp_edge_to_splits(const Rcpp::IntegerMatrix& edge, const Rcpp::IntegerVector& order, const Rcpp::IntegerVector& nTip); RcppExport SEXP _TreeTools_cpp_edge_to_splits(SEXP edgeSEXP, SEXP orderSEXP, SEXP nTipSEXP) { @@ -532,6 +546,7 @@ static const R_CallMethodDef CallEntries[] = { {"_TreeTools_n_cherries_wrapper", (DL_FUNC) &_TreeTools_n_cherries_wrapper, 3}, {"_TreeTools_node_depth_unrooted", (DL_FUNC) &_TreeTools_node_depth_unrooted, 4}, {"_TreeTools_path_lengths", (DL_FUNC) &_TreeTools_path_lengths, 3}, + {"_TreeTools_renumber_tips_batch", (DL_FUNC) &_TreeTools_renumber_tips_batch, 4}, {"_TreeTools_cpp_edge_to_splits", (DL_FUNC) &_TreeTools_cpp_edge_to_splits, 3}, {"_TreeTools_duplicated_splits", (DL_FUNC) &_TreeTools_duplicated_splits, 2}, {"_TreeTools_mask_splits", (DL_FUNC) &_TreeTools_mask_splits, 1}, diff --git a/src/renumber_tips.cpp b/src/renumber_tips.cpp new file mode 100644 index 00000000..11285483 --- /dev/null +++ b/src/renumber_tips.cpp @@ -0,0 +1,51 @@ +#include +using namespace Rcpp; + +// Apply a precomputed tip permutation to every tree in batch. +// Returns a plain list of modified phylo objects (shallow-cloned, with new +// edge matrices, updated tip.label, and "preorder" downgraded to "cladewise"). +// [[Rcpp::export]] +Rcpp::List renumber_tips_batch( + Rcpp::List trees, + const Rcpp::IntegerVector perm, + int n_tip, + const Rcpp::CharacterVector new_labels +) { + const int n_trees = trees.size(); + Rcpp::List result(n_trees); + + for (int i = 0; i < n_trees; ++i) { + // Shallow-clone the phylo list so other components are shared + Rcpp::List tree_i = Rcpp::clone( + Rcpp::as(trees[i]) + ); + + // Clone and permute the edge matrix + Rcpp::IntegerMatrix edge = Rcpp::clone( + Rcpp::as(tree_i["edge"]) + ); + const int n_edge = edge.nrow(); + for (int j = 0; j < n_edge; ++j) { + int& child = edge(j, 1); + if (child <= n_tip) { + child = perm[child - 1]; + } + } + tree_i["edge"] = edge; + + // Replace tip labels (shared across all output trees) + tree_i["tip.label"] = new_labels; + + // Downgrade "preorder" to "cladewise" + if (tree_i.hasAttribute("order")) { + Rcpp::CharacterVector ord = tree_i.attr("order"); + if (ord[0] == "preorder") { + tree_i.attr("order") = Rcpp::CharacterVector::create("cladewise"); + } + } + + result[i] = tree_i; + } + + return result; +} diff --git a/tests/testthat/test-tree_numbering.R b/tests/testthat/test-tree_numbering.R index 6aff8e2b..9e66bd75 100644 --- a/tests/testthat/test-tree_numbering.R +++ b/tests/testthat/test-tree_numbering.R @@ -167,6 +167,61 @@ test_that("RenumberTips() works correctly", { }) +test_that("RenumberTips.multiPhylo() covers edge cases", { + mp8 <- structure( + list(BalancedTree(8), PectinateTree(8)), + class = "multiPhylo" + ) + + # Numeric tipOrder + result <- RenumberTips(mp8, 8:1) + expect_equal(TipLabels(result[[1]]), paste0("t", 8:1)) + + # Early return when order matches + expect_identical(RenumberTips(mp8, TipLabels(mp8[[1]])), mp8) + + # Duplicate error + expect_error(RenumberTips(mp8, rep("t1", 8)), "repeated") + + # Length mismatch error + expect_error(RenumberTips(mp8, paste0("t", 0:5)), + "Missing in `tree`.*Missing in `tipOrder`") + + # Missing label error + mp_shared <- structure( + list(BalancedTree(8), PectinateTree(8)), + TipLabel = paste0("t", 1:8), + class = "multiPhylo" + ) + expect_error( + RenumberTips(mp_shared, c(paste0("t", 1:7), "t_unknown")), + "missing from `tipOrder`" + ) +}) + +test_that("RenumberTips.multiPhylo() batch matches per-tree", { + set.seed(7429) + trees <- c( + replicate(50, RandomTree(12, root = TRUE), simplify = FALSE), + replicate(50, PectinateTree(12), simplify = FALSE) + ) + class(trees) <- "multiPhylo" + target <- sort(TipLabels(trees[[1]])) + + batch <- RenumberTips(trees, target) + per_tree <- structure( + lapply(trees, RenumberTips.phylo, target), + class = "multiPhylo" + ) + + for (i in seq_along(batch)) { + expect_equal(batch[[i]][["edge"]], per_tree[[i]][["edge"]], + info = paste("tree", i)) + expect_equal(batch[[i]][["tip.label"]], per_tree[[i]][["tip.label"]], + info = paste("tree", i)) + } +}) + test_that("postorder_order() works", { edg7 <- BalancedTree(7)$edge expect_postorder(edg7[postorder_order(edg7), ])