diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 069897bb7..2f2a88027 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -18,7 +18,7 @@ Always reference these instructions first and fallback to search or bash command ``` - Install TreeTools dependencies: ```bash - sudo R -e "install.packages(c('ape', 'bit64', 'lifecycle', 'fastmatch', 'RCurl', 'Rdpack', 'stringi', 'PlotTools'), repos='https://cran.r-project.org/', dependencies=TRUE)" + sudo R -e "install.packages(c('ape', 'bit64', 'lifecycle', 'fastmatch', 'RCurl', 'Rdpack', 'PlotTools'), repos='https://cran.r-project.org/', dependencies=TRUE)" ``` ### Building and Checking diff --git a/.github/workflows/ASan.yml b/.github/workflows/ASan.yml index 154e206c4..251a75042 100644 --- a/.github/workflows/ASan.yml +++ b/.github/workflows/ASan.yml @@ -43,8 +43,6 @@ jobs: _R_CHECK_FORCE_SUGGESTS_: false RSPM: https://packagemanager.rstudio.com/cran/__linux__/noble/latest USING_ASAN: true - STRINGI_DISABLE_PKG_CONFIG: true - BIOCONDUCTOR_USE_CONTAINER_REPOSITORY: FALSE # For stringi GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} ASAN_OPTIONS: detect_container_overflow=1:verify_asan_link_order=0 diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index 85b28879f..230f07950 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -75,6 +75,7 @@ jobs: - name: Set up R dependencies uses: r-lib/actions/setup-r-dependencies@v2 with: + cache-version: 2-${{ runner.arch }} needs: | check diff --git a/DESCRIPTION b/DESCRIPTION index cd750cb39..72d65deb2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TreeTools Title: Create, Modify and Analyse Phylogenetic Trees -Version: 2.2.0 +Version: 2.2.0.9001 Authors@R: c( person("Martin R.", 'Smith', role = c("aut", "cre", "cph"), email = "martin.smith@durham.ac.uk", @@ -45,7 +45,6 @@ Imports: methods, PlotTools, Rdpack (>= 2.6.6), - stringi, Suggests: RCurl, spelling, diff --git a/NAMESPACE b/NAMESPACE index 120e8e0f9..7f34d68f7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -508,7 +508,6 @@ importFrom(stats,median) importFrom(stats,na.omit) importFrom(stats,runif) importFrom(stats,setNames) -importFrom(stringi,stri_paste) importFrom(utils,combn) importFrom(utils,globalVariables) importFrom(utils,head) diff --git a/NEWS.md b/NEWS.md index 541b04d3e..577f39cc4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +# TreeTools 2.2.0.9001 # + +## New functionality + +- `Paste0()` provides a fast Rcpp-backed drop-in for `paste0()` / `stri_paste()` + with `NA` propagation. Exported for use by downstream packages. + +## Performance + +- Drop `stringi` dependency. +- `as.character.Splits()` reimplemented in C++; ~3× faster on 200-tip trees. + # TreeTools 2.2.0 (2026-03-18) # ## New functionality diff --git a/R/PhyToString.R b/R/PhyToString.R index c0a95a14d..1b6653bac 100644 --- a/R/PhyToString.R +++ b/R/PhyToString.R @@ -64,7 +64,6 @@ StringToPhydat <- StringToPhyDat #' #' @family phylogenetic matrix conversion functions #' @template MRS -#' @importFrom stringi stri_paste #' @export PhyToString <- function(phy, parentheses = "{", collapse = "", ps = "", useIndex = TRUE, byTaxon = TRUE, concatenate = TRUE) { @@ -119,17 +118,17 @@ PhyToString <- function(phy, parentheses = "{", collapse = "", ps = "", ret <- t(ret) } if (isTRUE(concatenate)) { - stri_paste(c(ret, ps), collapse = "") + paste0(c(ret, ps), collapse = "") } else { if (isTRUE(byTaxon)) { - stri_paste(stri_paste(ret, ps)) + paste0(ret, ps) } else { - stri_paste(ret, ps, collapse = "") + paste0(ret, ps, collapse = "") } } } else { if (isTRUE(byTaxon)) ret <- t(ret) - stri_paste(apply(ret, 1, stri_paste, collapse = ""), ps) + paste0(apply(ret, 1, paste0, collapse = ""), ps) } # Return: ret diff --git a/R/RcppExports.R b/R/RcppExports.R index ee96e8d01..ada9065f3 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -45,6 +45,10 @@ descendant_tips <- function(parent, child, postorder) { .Call(`_TreeTools_descendant_tips`, parent, child, postorder) } +splits_to_char <- function(membership, labels) { + .Call(`_TreeTools_splits_to_char`, membership, labels) +} + first_matching_split_pair <- function(x, table) { .Call(`_TreeTools_first_matching_split_pair`, x, table) } diff --git a/R/Splits.R b/R/Splits.R index bd57e1ff4..a6189022e 100644 --- a/R/Splits.R +++ b/R/Splits.R @@ -452,17 +452,16 @@ names.Splits <- function(x) rownames(x) #' @family Splits operations -#' @importFrom stringi stri_paste #' @export as.character.Splits <- function(x, ...) { tipLabels <- attr(x, "tip.label") - nTip <- attr(x, "nTip") - - apply(as.logical(x), 1L, function(inSplit) { - stri_paste(stri_paste(tipLabels[inSplit], collapse = " "), " | ", - stri_paste(tipLabels[!inSplit], collapse = " ")) - }) - + if (is.null(tipLabels)) { + tipLabels <- paste0("t", seq_len(attr(x, "nTip"))) + } + logx <- as.logical(x) + ret <- splits_to_char(logx, tipLabels) + names(ret) <- rownames(logx) + ret } #' @family Splits operations diff --git a/codemeta.json b/codemeta.json index c263a3727..a3a09b551 100644 --- a/codemeta.json +++ b/codemeta.json @@ -8,13 +8,13 @@ "codeRepository": "https://github.com/ms609/TreeTools/", "issueTracker": "https://github.com/ms609/TreeTools/issues/", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "2.2.0", + "version": "2.2.0.9001", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, - "runtimePlatform": "R version 4.5.3 (2026-03-11)", + "runtimePlatform": "R version 4.5.3 (2026-03-11 ucrt)", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -261,21 +261,9 @@ }, "sameAs": "https://CRAN.R-project.org/package=Rdpack" }, - "8": { - "@type": "SoftwareApplication", - "identifier": "stringi", - "name": "stringi", - "provider": { - "@id": "https://cran.r-project.org", - "@type": "Organization", - "name": "Comprehensive R Archive Network (CRAN)", - "url": "https://cran.r-project.org" - }, - "sameAs": "https://CRAN.R-project.org/package=stringi" - }, "SystemRequirements": "C++17" }, - "fileSize": "1849.432KB", + "fileSize": "1871.905KB", "citation": [ { "@type": "SoftwareSourceCode", @@ -290,7 +278,7 @@ ], "name": "TreeTools: create, modify and analyse phylogenetic trees", "identifier": "10.32614/CRAN.package.TreeTools", - "description": "R package version 2.2.0", + "description": "R package version 2.2.0.9001", "@id": "https://doi.org/10.32614/CRAN.package.TreeTools", "sameAs": "https://doi.org/10.32614/CRAN.package.TreeTools" } diff --git a/man/figures/Stemwardness.png b/man/figures/Stemwardness.png index d2954f608..3b49ff717 100644 Binary files a/man/figures/Stemwardness.png and b/man/figures/Stemwardness.png differ diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 123c171a6..31ebde5bb 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -149,6 +149,18 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// splits_to_char +CharacterVector splits_to_char(const LogicalMatrix membership, const CharacterVector labels); +RcppExport SEXP _TreeTools_splits_to_char(SEXP membershipSEXP, SEXP labelsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const LogicalMatrix >::type membership(membershipSEXP); + Rcpp::traits::input_parameter< const CharacterVector >::type labels(labelsSEXP); + rcpp_result_gen = Rcpp::wrap(splits_to_char(membership, labels)); + return rcpp_result_gen; +END_RCPP +} // first_matching_split_pair IntegerVector first_matching_split_pair(const RawMatrix x, const RawMatrix table); RcppExport SEXP _TreeTools_first_matching_split_pair(SEXP xSEXP, SEXP tableSEXP) { @@ -534,6 +546,7 @@ static const R_CallMethodDef CallEntries[] = { {"_TreeTools_descendant_edges", (DL_FUNC) &_TreeTools_descendant_edges, 3}, {"_TreeTools_descendant_edges_single", (DL_FUNC) &_TreeTools_descendant_edges_single, 5}, {"_TreeTools_descendant_tips", (DL_FUNC) &_TreeTools_descendant_tips, 3}, + {"_TreeTools_splits_to_char", (DL_FUNC) &_TreeTools_splits_to_char, 2}, {"_TreeTools_first_matching_split_pair", (DL_FUNC) &_TreeTools_first_matching_split_pair, 2}, {"_TreeTools_first_matching_split_index", (DL_FUNC) &_TreeTools_first_matching_split_index, 2}, {"_TreeTools_num_to_parent", (DL_FUNC) &_TreeTools_num_to_parent, 2}, diff --git a/src/fast_paste.cpp b/src/fast_paste.cpp new file mode 100644 index 000000000..4e119965d --- /dev/null +++ b/src/fast_paste.cpp @@ -0,0 +1,48 @@ +#include +#include +#include +#include + +using namespace Rcpp; + +// Format each row of a logical split matrix as "A B | C D". +// `membership` is nSplit x nTip (logical); `labels` is length nTip. +// [[Rcpp::export]] +CharacterVector splits_to_char(const LogicalMatrix membership, + const CharacterVector labels) { + const int n_split = membership.nrow(); + const int n_tip = membership.ncol(); + CharacterVector out(n_split); + + // Cache translated label pointers and lengths + std::vector lab(n_tip); + std::vector lab_len(n_tip); + for (int j = 0; j < n_tip; ++j) { + lab[j] = Rf_translateCharUTF8(STRING_ELT(labels, j)); + lab_len[j] = std::strlen(lab[j]); + } + + std::string buf; + for (int i = 0; i < n_split; ++i) { + buf.clear(); + bool first_in = true; + for (int j = 0; j < n_tip; ++j) { + if (membership(i, j)) { + if (!first_in) buf.push_back(' '); + buf.append(lab[j], lab_len[j]); + first_in = false; + } + } + buf.append(" | ", 3); + bool first_out = true; + for (int j = 0; j < n_tip; ++j) { + if (!membership(i, j)) { + if (!first_out) buf.push_back(' '); + buf.append(lab[j], lab_len[j]); + first_out = false; + } + } + SET_STRING_ELT(out, i, Rf_mkCharLenCE(buf.c_str(), buf.size(), CE_UTF8)); + } + return out; +}