From 876eee218d775422e17924efecf0aa50aa6c6674 Mon Sep 17 00:00:00 2001 From: Gregor Gorjanc Date: Thu, 26 Feb 2026 15:13:55 +0000 Subject: [PATCH] Provide access to more ts/tc properties in R Fixes #68 --- AGENTS.md | 3 + RcppTskit/DESCRIPTION | 2 +- RcppTskit/NEWS.md | 21 ++ RcppTskit/R/Class-TableCollection.R | 55 +++- RcppTskit/R/Class-TreeSequence.R | 53 ++- RcppTskit/R/RcppExports.R | 44 ++- RcppTskit/R/RcppTskit.R | 64 +++- RcppTskit/inst/examples/create_test.trees.R | 2 +- RcppTskit/inst/examples/create_test.trees.py | 46 ++- .../inst/examples/test_discrete_time.trees | Bin 0 -> 5516 bytes .../examples/test_non_discrete_genome.trees | Bin 0 -> 7228 bytes RcppTskit/man/TableCollection.Rd | 158 ++++++++- RcppTskit/man/TreeSequence.Rd | 151 ++++++++- RcppTskit/man/tc_load.Rd | 2 +- RcppTskit/man/tc_py_to_r.Rd | 6 +- RcppTskit/man/ts_load.Rd | 2 +- RcppTskit/man/ts_py_to_r.Rd | 8 +- RcppTskit/notes_pkg_dev.Rmd | 251 ++++++++++---- RcppTskit/src/RcppExports.cpp | 128 ++++++- RcppTskit/src/RcppTskit.cpp | 311 ++++++++++++++---- .../tests/testthat/test_TableCollection.R | 25 +- .../testthat/test_load_summary_and_dump.R | 250 ++++++++++++-- .../tests/testthat/test_r_to_py_and_py_to_r.R | 8 + 23 files changed, 1402 insertions(+), 188 deletions(-) create mode 100644 RcppTskit/inst/examples/test_discrete_time.trees create mode 100644 RcppTskit/inst/examples/test_non_discrete_genome.trees diff --git a/AGENTS.md b/AGENTS.md index b5a49b9..4c1c739 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,6 +14,9 @@ These notes apply to this repository root and the `RcppTskit/` package. * We run R CMD check for every code change. * We keep local quality gates green before handoff. * We update `RcppTskit/NEWS.md` for user-visible behavior or API changes. +* We aim for a comparative tskit Python API and tskit R API and + similarly for tskit C API and tskit C++ API + (the later is RcppTskit C++ binding to tskit C API). ## Permission diff --git a/RcppTskit/DESCRIPTION b/RcppTskit/DESCRIPTION index 96aa372..5f48265 100644 --- a/RcppTskit/DESCRIPTION +++ b/RcppTskit/DESCRIPTION @@ -12,7 +12,7 @@ Authors@R: c( Description: 'Tskit' enables efficient storage, manipulation, and analysis of ancestral recombination graphs (ARGs) using succinct tree sequence encoding. The tree sequence encoding of an ARG is described in Wong et - al. (2024) , while `tskit` project is + al. (2024) , while 'tskit' project is described in Jeffrey et al. (2026) . See also for project news, documentation, and tutorials. 'Tskit' provides 'Python', 'C', and 'Rust' application diff --git a/RcppTskit/NEWS.md b/RcppTskit/NEWS.md index 7bff30c..1aab60d 100644 --- a/RcppTskit/NEWS.md +++ b/RcppTskit/NEWS.md @@ -4,6 +4,27 @@ All notable changes to RcppTskit are documented in this file. The file format is based on [Keep a Changelog](https://keepachangelog.com), and releases adhere to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added (new features) + +- Added the following scalar getters to match tskit C/Python API + - `TreeSequence$discrete_genome()` to query whether genome coordinates + are discrete integer values. + - `TreeSequence$has_reference_sequence()` to query whether a tree sequence + contains a reference genome sequence. + - `TreeSequence$discrete_time()` to query whether time values are discrete + integer values. + - `TreeSequence$file_uuid()` to query the UUID of the source `.trees` file. + - `TableCollection$has_reference_sequence()` to query whether a table + collection contains a reference genome sequence. + - `TableCollection$file_uuid()` to query the UUID of the source `.trees` + file. + - `TableCollection$sequence_length()` to query the sequence length. + - `TableCollection$time_units()` to query the time units. + - `TableCollection$has_index()` to query whether edge indexes are present. + - TODO + ## [0.2.0] - 2026-02-22 ### Added (new features) diff --git a/RcppTskit/R/Class-TableCollection.R b/RcppTskit/R/Class-TableCollection.R index fae2335..7c7b726 100644 --- a/RcppTskit/R/Class-TableCollection.R +++ b/RcppTskit/R/Class-TableCollection.R @@ -14,7 +14,7 @@ TableCollection <- R6Class( #' @param file a string specifying the full path of the tree sequence file. #' @param skip_tables logical; if \code{TRUE}, load only non-table information. #' @param skip_reference_sequence logical; if \code{TRUE}, skip loading - #' reference sequence information. + #' reference genome sequence information. #' @param pointer an external pointer (\code{externalptr}) to a table collection. #' @details See the corresponding Python function at #' \url{https://github.com/tskit-dev/tskit/blob/dc394d72d121c99c6dcad88f7a4873880924dd72/python/tskit/tables.py#L3463}. @@ -88,12 +88,63 @@ TableCollection <- R6Class( tree_sequence = function() { # See https://tskit.dev/tskit/docs/stable/c-api.html#c.TSK_TS_INIT_BUILD_INDEXES # TSK_TS_INIT_BUILD_INDEXES (1 << 0) is bitwShiftL(1L, 0) or just 1L - # TODO: Should we also use https://tskit.dev/tskit/docs/stable/c-api.html#c.TSK_TS_INIT_COMPUTE_MUTATION_PARENTS? + # TODO: Should we also use TSK_TS_INIT_COMPUTE_MUTATION_PARENTS in TableCollection$tree_sequence()? #65 + # https://github.com/HighlanderLab/RcppTskit/issues/65 init_options <- bitwShiftL(1L, 0) ts_ptr <- tc_ptr_to_ts_ptr(self$pointer, options = init_options) TreeSequence$new(pointer = ts_ptr) }, + #' @description Get the sequence length. + #' @examples + #' tc_file <- system.file("examples/test.trees", package = "RcppTskit") + #' tc <- tc_load(tc_file) + #' tc$sequence_length() + sequence_length = function() { + tc_ptr_sequence_length(self$pointer) + }, + + #' @description Get the time units string. + #' @examples + #' tc_file <- system.file("examples/test.trees", package = "RcppTskit") + #' tc <- tc_load(tc_file) + #' tc$time_units() + time_units = function() { + tc_ptr_time_units(self$pointer) + }, + + #' @description Get whether the table collection has edge indexes. + #' @examples + #' tc_file <- system.file("examples/test.trees", package = "RcppTskit") + #' tc <- tc_load(tc_file) + #' tc$has_index() + has_index = function() { + tc_ptr_has_index(self$pointer) + }, + + #' @description Get whether the table collection has a reference genome sequence. + #' @examples + #' tc_file1 <- system.file("examples/test.trees", package = "RcppTskit") + #' tc_file2 <- system.file("examples/test_with_ref_seq.trees", package = "RcppTskit") + #' tc1 <- tc_load(tc_file1) + #' tc1$has_reference_sequence() + #' tc2 <- tc_load(tc_file2) + #' tc2$has_reference_sequence() + has_reference_sequence = function() { + tc_ptr_has_reference_sequence(self$pointer) + }, + + #' @description Get the file UUID string. + #' @details Returns the UUID of the file the table collection was loaded from. + #' If unavailable, returns \code{NA_character_}. + #' @examples + #' tc_file <- system.file("examples/test.trees", package = "RcppTskit") + #' tc <- tc_load(tc_file) + #' tc$file_uuid() + file_uuid = function() { + tc_ptr_file_uuid(self$pointer) + }, + #' @description This function saves a table collection from R to disk and #' loads it into reticulate Python for use with the \code{tskit} Python API. #' @param tskit_module reticulate Python module of \code{tskit}. By default, diff --git a/RcppTskit/R/Class-TreeSequence.R b/RcppTskit/R/Class-TreeSequence.R index 349d6c1..b090636 100644 --- a/RcppTskit/R/Class-TreeSequence.R +++ b/RcppTskit/R/Class-TreeSequence.R @@ -15,7 +15,7 @@ TreeSequence <- R6Class( #' @param file a string specifying the full path of the tree sequence file. #' @param skip_tables logical; if \code{TRUE}, load only non-table information. #' @param skip_reference_sequence logical; if \code{TRUE}, skip loading - #' reference sequence information. + #' reference genome sequence information. #' @param pointer an external pointer (\code{externalptr}) to a tree sequence. #' @details See the corresponding Python function at #' \url{https://tskit.dev/tskit/docs/latest/python-api.html#tskit.load}. @@ -252,6 +252,32 @@ TreeSequence <- R6Class( ts_ptr_sequence_length(self$pointer) }, + #' @description Get the discrete genome status. + #' @details Returns \code{TRUE} if all genomic coordinates in the tree + #' sequence are discrete integer values. + #' @examples + #' ts_file1 <- system.file("examples/test.trees", package = "RcppTskit") + #' ts_file2 <- system.file("examples/test_non_discrete_genome.trees", package = "RcppTskit") + #' ts1 <- ts_load(ts_file1) + #' ts1$discrete_genome() + #' ts2 <- ts_load(ts_file2) + #' ts2$discrete_genome() + discrete_genome = function() { + ts_ptr_discrete_genome(self$pointer) + }, + + #' @description Get whether the tree sequence has a reference genome sequence. + #' @examples + #' ts_file1 <- system.file("examples/test.trees", package = "RcppTskit") + #' ts_file2 <- system.file("examples/test_with_ref_seq.trees", package = "RcppTskit") + #' ts1 <- ts_load(ts_file1) + #' ts1$has_reference_sequence() + #' ts2 <- ts_load(ts_file2) + #' ts2$has_reference_sequence() + has_reference_sequence = function() { + ts_ptr_has_reference_sequence(self$pointer) + }, + #' @description Get the time units string. #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") @@ -261,6 +287,20 @@ TreeSequence <- R6Class( ts_ptr_time_units(self$pointer) }, + #' @description Get the discrete time status. + #' @details Returns \code{TRUE} if all time values in the tree sequence are + #' discrete integer values. + #' @examples + #' ts_file1 <- system.file("examples/test.trees", package = "RcppTskit") + #' ts_file2 <- system.file("examples/test_discrete_time.trees", package = "RcppTskit") + #' ts1 <- ts_load(ts_file1) + #' ts1$discrete_time() + #' ts2 <- ts_load(ts_file2) + #' ts2$discrete_time() + discrete_time = function() { + ts_ptr_discrete_time(self$pointer) + }, + #' @description Get the min time in node table and mutation table. #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") @@ -287,6 +327,17 @@ TreeSequence <- R6Class( #' ts$metadata_length() metadata_length = function() { ts_ptr_metadata_length(self$pointer) + }, + + #' @description Get the file UUID string. + #' @details Returns the UUID of the file the tree sequence was loaded from. + #' If unavailable, returns \code{NA_character_}. + #' @examples + #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") + #' ts <- ts_load(ts_file) + #' ts$file_uuid() + file_uuid = function() { + ts_ptr_file_uuid(self$pointer) } ) ) diff --git a/RcppTskit/R/RcppExports.R b/RcppTskit/R/RcppExports.R index 7675664..b194571 100644 --- a/RcppTskit/R/RcppExports.R +++ b/RcppTskit/R/RcppExports.R @@ -91,10 +91,22 @@ ts_ptr_sequence_length <- function(ts) { .Call(`_RcppTskit_ts_ptr_sequence_length`, ts) } +ts_ptr_discrete_genome <- function(ts) { + .Call(`_RcppTskit_ts_ptr_discrete_genome`, ts) +} + +ts_ptr_has_reference_sequence <- function(ts) { + .Call(`_RcppTskit_ts_ptr_has_reference_sequence`, ts) +} + ts_ptr_time_units <- function(ts) { .Call(`_RcppTskit_ts_ptr_time_units`, ts) } +ts_ptr_discrete_time <- function(ts) { + .Call(`_RcppTskit_ts_ptr_discrete_time`, ts) +} + ts_ptr_min_time <- function(ts) { .Call(`_RcppTskit_ts_ptr_min_time`, ts) } @@ -103,18 +115,42 @@ ts_ptr_max_time <- function(ts) { .Call(`_RcppTskit_ts_ptr_max_time`, ts) } -ts_ptr_summary <- function(ts) { - .Call(`_RcppTskit_ts_ptr_summary`, ts) +ts_ptr_file_uuid <- function(ts) { + .Call(`_RcppTskit_ts_ptr_file_uuid`, ts) } -tc_ptr_summary <- function(tc) { - .Call(`_RcppTskit_tc_ptr_summary`, tc) +ts_ptr_summary <- function(ts) { + .Call(`_RcppTskit_ts_ptr_summary`, ts) } ts_ptr_metadata_length <- function(ts) { .Call(`_RcppTskit_ts_ptr_metadata_length`, ts) } +tc_ptr_sequence_length <- function(tc) { + .Call(`_RcppTskit_tc_ptr_sequence_length`, tc) +} + +tc_ptr_has_reference_sequence <- function(tc) { + .Call(`_RcppTskit_tc_ptr_has_reference_sequence`, tc) +} + +tc_ptr_time_units <- function(tc) { + .Call(`_RcppTskit_tc_ptr_time_units`, tc) +} + +tc_ptr_file_uuid <- function(tc) { + .Call(`_RcppTskit_tc_ptr_file_uuid`, tc) +} + +tc_ptr_has_index <- function(tc) { + .Call(`_RcppTskit_tc_ptr_has_index`, tc) +} + +tc_ptr_summary <- function(tc) { + .Call(`_RcppTskit_tc_ptr_summary`, tc) +} + tc_ptr_metadata_length <- function(tc) { .Call(`_RcppTskit_tc_ptr_metadata_length`, tc) } diff --git a/RcppTskit/R/RcppTskit.R b/RcppTskit/R/RcppTskit.R index 0694aed..e13042a 100644 --- a/RcppTskit/R/RcppTskit.R +++ b/RcppTskit/R/RcppTskit.R @@ -139,7 +139,7 @@ load_args_to_options <- function( #' @param file a string specifying the full path to a tree sequence file. #' @param skip_tables logical; if \code{TRUE}, load only non-table information. #' @param skip_reference_sequence logical; if \code{TRUE}, skip loading -#' reference sequence information. +#' reference genome sequence information. #' @details See the corresponding Python function at #' \url{https://tskit.dev/tskit/docs/latest/python-api.html#tskit.load}. #' @return A \code{\link{TreeSequence}} object. @@ -175,7 +175,7 @@ ts_read <- ts_load #' @param file a string specifying the full path to a tree sequence file. #' @param skip_tables logical; if \code{TRUE}, load only non-table information. #' @param skip_reference_sequence logical; if \code{TRUE}, skip loading -#' reference sequence information. +#' reference genome sequence information. #' @return A \code{\link{TableCollection}} object. #' @details See the corresponding Python function at #' \url{https://github.com/tskit-dev/tskit/blob/dc394d72d121c99c6dcad88f7a4873880924dd72/python/tskit/tables.py#L3463}. @@ -229,21 +229,29 @@ ts_ptr_print <- function(ts) { ts = data.frame( property = c( "num_samples", - "sequence_length", "num_trees", + "sequence_length", + "discrete_genome", + "has_reference_sequence", "time_units", + "discrete_time", "min_time", "max_time", - "has_metadata" + "has_metadata", + "file_uuid" ), value = c( tmp_summary[["num_samples"]], - tmp_summary[["sequence_length"]], tmp_summary[["num_trees"]], + tmp_summary[["sequence_length"]], + tmp_summary[["discrete_genome"]], + tmp_summary[["has_reference_sequence"]], tmp_summary[["time_units"]], + tmp_summary[["discrete_time"]], tmp_summary[["min_time"]], tmp_summary[["max_time"]], - tmp_metadata[["ts"]] > 0 + tmp_metadata[["ts"]] > 0, + tmp_summary[["file_uuid"]] ) ), tables = data.frame( @@ -306,13 +314,19 @@ tc_ptr_print <- function(tc) { tc = data.frame( property = c( "sequence_length", + "has_reference_sequence", "time_units", - "has_metadata" + "has_metadata", + "file_uuid", + "has_index" ), value = c( tmp_summary[["sequence_length"]], + tmp_summary[["has_reference_sequence"]], tmp_summary[["time_units"]], - tmp_metadata[["tc"]] > 0 + tmp_metadata[["tc"]] > 0, + tmp_summary[["file_uuid"]], + tmp_summary[["has_index"]] ) ), tables = data.frame( @@ -352,13 +366,16 @@ tc_ptr_print <- function(tc) { } # @title Transfer a tree sequence from R to reticulate Python -# @description This function saves a tree sequence from R to disk and -# reads it into reticulate Python for use with \code{tskit} Python API. +# @description This function saves a tree sequence from R to +# temporary file on disk and reads it into reticulate Python +# for use with \code{tskit} Python API. # @param ts an external pointer (\code{externalptr}) to a \code{tsk_treeseq_t} object. # @param tskit_module reticulate Python module of \code{tskit}. By default, # it calls \code{\link{get_tskit_py}} to obtain the module. # @param cleanup logical; delete the temporary file at the end of the function? # @return A tree sequence in reticulate Python. +# @details Because this transfer is via a temporary file, +# the file UUID property changes. # @seealso \code{\link{ts_py_to_r}}, \code{\link{ts_load}}, and # \code{\link[=TreeSequence]{TreeSequence$dump}} on how this function # is used and presented to users, @@ -396,8 +413,9 @@ ts_ptr_r_to_py <- function(ts, tskit_module = get_tskit_py(), cleanup = TRUE) { } # @title Transfer a table collection from R to reticulate Python -# @description This function saves a table collection from R to disk and -# reads it into reticulate Python for use with \code{tskit} Python API. +# @description This function saves a table collection from R to +# temporary file on disk and reads it into reticulate Python +# for use with \code{tskit} Python API. # @param tc an external pointer (\code{externalptr}) to a # \code{tsk_table_collection_t} object. # @param tskit_module reticulate Python module of \code{tskit}. By default, @@ -405,6 +423,8 @@ ts_ptr_r_to_py <- function(ts, tskit_module = get_tskit_py(), cleanup = TRUE) { # @param cleanup logical; delete the temporary file at the end of the function? # @details See \url{https://tskit.dev/tutorials/tables_and_editing.html#tables-and-editing} # on what you can do with the tables. +# Because this transfer is via a temporary file, +# the file UUID property changes. # @return A table collection in reticulate Python. # @seealso \code{\link{tc_py_to_r}}, \code{\link{tc_load}}, and # \code{\link[=TableCollection]{TableCollection$dump}} on how this function @@ -443,11 +463,13 @@ tc_ptr_r_to_py <- function(tc, tskit_module = get_tskit_py(), cleanup = TRUE) { } # @title Transfer a tree sequence from reticulate Python to R -# @description This function saves a tree sequence from reticulate Python to disk -# and reads it into R for use with \code{RcppTskit}. +# @description This function saves a tree sequence from reticulate Python to +# temporary file on disk and reads it into R for use with \code{RcppTskit}. # @param ts tree sequence in reticulate Python. # @param cleanup logical; delete the temporary file at the end of the function? # @return An external pointer (\code{externalptr}) to a \code{tsk_treeseq_t} object. +# @details Because this transfer is via a temporary file, +# the file UUID property changes. # @seealso \code{\link[=TreeSequence]{TreeSequence$r_to_py}}, # \code{\link{ts_load}}, and \code{\link[=TreeSequence]{TreeSequence$dump}} # on how this function is used and presented to users, @@ -490,11 +512,13 @@ ts_ptr_py_to_r <- function(ts, cleanup = TRUE) { # @title Transfer a table collection from reticulate Python to R # @description This function saves a table collection from reticulate Python to -# disk and reads it into R for use with \code{RcppTskit}. +# temporary file on disk and reads it into R for use with \code{RcppTskit}. # @param tc table collection in reticulate Python. # @param cleanup logical; delete the temporary file at the end of the function? # @return An external pointer (\code{externalptr}) to a # \code{tsk_table_collection_t} object. +# @details Because this transfer is via a temporary file, +# the file UUID property changes. # @seealso \code{\link[=TableCollection]{TableCollection$r_to_py}}, # \code{\link{tc_load}}, and \code{\link[=TableCollection]{TableCollection$dump}} # on how this function is used and presented to users, @@ -536,11 +560,13 @@ tc_ptr_py_to_r <- function(tc, cleanup = TRUE) { } #' @title Transfer a tree sequence from reticulate Python to R -#' @description This function saves a tree sequence from reticulate Python to disk -#' and reads it into R for use with \code{RcppTskit}. +#' @description This function saves a tree sequence from reticulate Python to +#' temporary file on disk and reads it into R for use with \code{RcppTskit}. #' @param ts tree sequence in reticulate Python. #' @param cleanup logical; delete the temporary file at the end of the function? #' @return A \code{\link{TreeSequence}} object. +#' @details Because this transfer is via a temporary file, +#' the file UUID property changes. #' @seealso \code{\link[=TreeSequence]{TreeSequence$r_to_py}} #' \code{\link{ts_load}}, and \code{\link[=TreeSequence]{TreeSequence$dump}}. #' @examples @@ -574,10 +600,12 @@ ts_py_to_r <- function(ts, cleanup = TRUE) { #' @title Transfer a table collection from reticulate Python to R #' @description This function saves a table collection from reticulate Python -#' to disk and reads it into R for use with \code{RcppTskit}. +#' to temporary file on disk and reads it into R for use with \code{RcppTskit}. #' @param tc table collection in reticulate Python. #' @param cleanup logical; delete the temporary file at the end of the function? #' @return A \code{\link{TableCollection}} object. +#' @details Because this transfer is via a temporary file, +#' the file UUID property changes. #' @seealso \code{\link[=TableCollection]{TableCollection$r_to_py}} #' \code{\link{tc_load}}, and \code{\link[=TableCollection]{TableCollection$dump}}. #' @examples diff --git a/RcppTskit/inst/examples/create_test.trees.R b/RcppTskit/inst/examples/create_test.trees.R index b23c808..3813a98 100644 --- a/RcppTskit/inst/examples/create_test.trees.R +++ b/RcppTskit/inst/examples/create_test.trees.R @@ -111,7 +111,7 @@ length(ts$tables$individuals$metadata) # 21 # ----------------------------------------------------------------------------- -# Another example with a reference sequence +# Another example with a reference genome sequence ts <- msprime$sim_ancestry( samples = 3, diff --git a/RcppTskit/inst/examples/create_test.trees.py b/RcppTskit/inst/examples/create_test.trees.py index 0879455..dab1845 100644 --- a/RcppTskit/inst/examples/create_test.trees.py +++ b/RcppTskit/inst/examples/create_test.trees.py @@ -58,6 +58,9 @@ os.getcwd() ts.dump("RcppTskit/inst/examples/test.trees") +tskit.load("RcppTskit/inst/examples/test.trees").file_uuid +# '79ec383f-a57d-b44f-2a5c-f0feecbbcb32' +# test_trees_file_uuid <- "79ec383f-a57d-b44f-2a5c-f0feecbbcb32" # ts = tskit.load("RcppTskit/inst/examples/test.trees") # ----------------------------------------------------------------------------- @@ -114,6 +117,9 @@ ts.tables.individuals.metadata.shape # (21,) ts.dump("RcppTskit/inst/examples/test2.trees") +tskit.load("RcppTskit/inst/examples/test2.trees").file_uuid +# 'cf406b8c-be33-af4a-c00b-a4de1e6151ff' +# test2_trees_file_uuid <- "cf406b8c-be33-af4a-c00b-a4de1e6151ff" tables = ts.dump_tables() tables.metadata_schema = tskit.MetadataSchema(None) @@ -125,7 +131,24 @@ # ----------------------------------------------------------------------------- -# Another example with a reference sequence +# Tiny example with a non-discrete genome (continuous coordinates) + +ts = msprime.sim_ancestry( + samples=2, + sequence_length=10.5, + recombination_rate=1e-2, + random_seed=7, + discrete_genome=False, +) +ts.discrete_genome # False +ts.sequence_length # 10.5 +ts.dump("RcppTskit/inst/examples/test_non_discrete_genome.trees") +tskit.load("RcppTskit/inst/examples/test_non_discrete_genome.trees").file_uuid +# '42bc7ad8-f3a2-e722-a7e3-7f87c6c1dfc2' + +# ----------------------------------------------------------------------------- + +# Another example with a reference genome sequence ts = msprime.sim_ancestry(samples=3, ploidy=2, sequence_length=10, random_seed=2) ts = msprime.sim_mutations(ts, rate=0.1, random_seed=2) @@ -143,5 +166,26 @@ print(i) ts.dump("RcppTskit/inst/examples/test_with_ref_seq.trees") +tskit.load("RcppTskit/inst/examples/test_with_ref_seq.trees").file_uuid +# '71793465-49ed-e0f3-0657-c01926226b29' +# test_with_ref_seq_file_uuid <- "71793465-49ed-e0f3-0657-c01926226b29" + +# ----------------------------------------------------------------------------- + +# Tiny example with discrete time values + +tables = tskit.TableCollection(sequence_length=10) +tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0) +tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0) +tables.nodes.add_row(time=1) +tables.edges.add_row(0, 10, parent=2, child=0) +tables.edges.add_row(0, 10, parent=2, child=1) +tables.sort() +ts = tables.tree_sequence() +ts.discrete_time # True +ts.num_trees # 1 +ts.dump("RcppTskit/inst/examples/test_discrete_time.trees") +tskit.load("RcppTskit/inst/examples/test_discrete_time.trees").file_uuid +# 'ff35bc1d-8592-097c-c70a-03828ad847d8' # ----------------------------------------------------------------------------- diff --git a/RcppTskit/inst/examples/test_discrete_time.trees b/RcppTskit/inst/examples/test_discrete_time.trees new file mode 100644 index 0000000000000000000000000000000000000000..e5df66f08d793e608ebf9bd77d8d329a9e9be994 GIT binary patch literal 5516 zcmbW5O^6gn6vx{y)Q#U7%x;30>`AhjbyapGBv}uFkV6D7ddZ~iu9<1xBHGpU|IhT6({{}a>{h>5@AuxnURC!r z^VKJ(&+ndFnA_wy&PT=H%{PmGQ+&1}_TFB7@doU*L;7Mf?7CqmCjJ0F2m7oguXL-= zeeip<>0AI0vv$~qnaAL{eDYgrNj<^(&kVkAS)cvey-(-cg?i*|`!^4s{U2ENkIP#$ z_%FaSALGoC{cC|||G%-U&-H%@{v8xppOU`_egXWqmOS-K@cY640Nz}$Y4iMk37(iO z&-&}&-!|&=e3^A=bN;ST{|<#Pux4ca-;Da?nZE{iIRD7tf3mF4{eKSrAPU*?)V~iK z$NeWyeRcZjKMH;a9QQ2slV1Uk$ADwYbN#1h4?N$0-$MN+p8B`p!OgZUTk2>3=E3v*gFN%sEPwRegK}&lRoq>HgLl43EA@cLFnttP;@hW6^YFC-OSx+b&_sZ3L~T=y}pPL zIdD@EBOncW{j8g$k(;$*HxjrUic|(k9K^o3R+Lv2IzcQ&nibwolKLXuFqeub8M@(U zCKv{O?uN3}4c(qBLPw4;@m!T;k`L0WQ(WUD-)q9L-YTV6y+I3H$iq?`ks}IvX{lJY zd?B-nsdl1blAGXSg>fndzgi?td{IeJZcxRRNuGMufGmhalxHJ}X)qLiM;4AcH`R$H_BTIL7oKFT-HA=FI$MMjY0C2X`H$Z+b@t(}8pkcVYAw5^Jb zeX_;8lYGcNaZ1-1q{&dkZtNA}Q{feN!mA!;se~#K#g;^aYV^nnK#D85C^C0K5%;pb z>X~enrLwfRx5_P@=p_~p~7=^3qp)bCj1 z-?A3hl+SS%*D`rI$llLVA!O-Mzkl(Wsq)(7EJLWI7Po7#?T0XVvEqSZU?$Yw{_Db9J+b352_Dkj)Qhxvd literal 0 HcmV?d00001 diff --git a/RcppTskit/inst/examples/test_non_discrete_genome.trees b/RcppTskit/inst/examples/test_non_discrete_genome.trees new file mode 100644 index 0000000000000000000000000000000000000000..9abaef4aff051f336709b6a33b1ec5a46f8d26e1 GIT binary patch literal 7228 zcmb_gO^h5@5uSu3I1UbLa6)Vd>NMdJ?aog3jAzypOx6N}ln}6DE5br?)7$-Kwv+xD zUiYlM*=!V{TtG;iKsj1M;=&<^2!uG910oU!;6Tn12q{u7IR*$aRo(SX_qMm$-c?!} zzkXHqRee?Ob-lO#(K8n=pE!Qz_+6b&=d1Ml$`|OLLp=A0?X6E2FAvCk{Z#RCT;@%~ z{i67j;9cUth^yCjJmgooe9YxwUA9{ylgA5p(m!K3~&!QWj6)aJ|^8~^V${0A-LWBvXs_|HmuZSknzk}Mq7ANYg#qnb|${ul%ge_y6FknZ!{S51WB(i$|H={LWB>g4hS-*RwapV5+uHYYQN?hcq-)X_){&7qi$wBuw)NfG3FA4t51L6b!9l=W(I=2LW z`+x%+{9VDLf83Hp+VX!{@Yp|Z3x08)HROT#KNURgukW>tkM;kR;BkNWK=7zfL-@u1 z{aua!hk|dyK@R?ZuJH%HZT$BIkN(*~ArPoxMErl%_ygZI{)4jdv3|!|#z+0XBzW*| zi$_|P;PL$ekA4cfrtwL^qn|t?{2y-uh+O$om9Bc5LFgB+p_*jHTck4AcQbeI%tG%RmgEa+5a(@e5pfgVBz_PI zqxgt|sm>XvtG-IJ%9K~4G6ilQ5h9eXV0>qgIPq1*MZQ3ld78|%S2f6jNJaB(K~V== z%6C!{G>O4x0RoxQ4J3`G;JxsX(_YMq%9j4ppvs0Rv%PGO;Vh##>XF{kZEOvJx zoF%h)m{+5ibFfmDuEOtrGV#8zs)#Yu$(D-U*dy{3kme zc^Bd%n)@`LwLEeU<98mpzu!l{`t$GG-#qEu{OBo+bv@?LV}$&$BXa>CaAIDcto;hV zUtVt-!J|Ib?V-=omx8RDX{FNKg#Uk!WAcMqfe)WWTsN*K+wk3yWz3Upm_N%{$2O=p zy?*QubF2QE0>I{W?P;?AxaB`QzJ2Qa-M@Ki@aVO_{_OSp*Uz6C%tCiM|d2Q%Kg=GZisO+LryjlN>1XYN)AJx1?4rtACpMt8$YI+^j2rc!g9>A6}q zJ?csxVP8zi*|JGr>%yQGXvfU9XB6gpW}4AiIOVwWFf?D-1p!#VV3#zSKJh>+pK&?O z|EfpFbVt$_lVXyEY$(EyZ~*s6jRsLnZr%7l&RRSNs6PuDbMNB zSX2`#nB(ecsVMiMlb=+j#!Fl(yGOgqN!66j$}Dw~IIM7w=aCb-8!F_*k^D5RViFO8 zx0f;E)ye1+aH&Le=L(%jdv3*A&BB1LVtlWoOCBkDyg5|773d`9m?YXUlVX#k+bJF3 zBr(74>gxf08-~fGS#r5QTIpTwUw!%;!{=G9RjAxFSJ{K^u-K`UomhA4khq49ii z(=-axvGHsY8&A8jVVyH@^4w3Z>5eu9=XTn k!hyZvja+|iWzu);6*aQ$6?dfiE2GKU$Qyc=Kk@AU0cJ!BAOHXW literal 0 HcmV?d00001 diff --git a/RcppTskit/man/TableCollection.Rd b/RcppTskit/man/TableCollection.Rd index 11c6a39..42fda20 100644 --- a/RcppTskit/man/TableCollection.Rd +++ b/RcppTskit/man/TableCollection.Rd @@ -40,6 +40,49 @@ tc <- TableCollection$new(file = ts_file) ts <- tc$tree_sequence() is(ts) +## ------------------------------------------------ +## Method `TableCollection$sequence_length` +## ------------------------------------------------ + +tc_file <- system.file("examples/test.trees", package = "RcppTskit") +tc <- tc_load(tc_file) +tc$sequence_length() + +## ------------------------------------------------ +## Method `TableCollection$time_units` +## ------------------------------------------------ + +tc_file <- system.file("examples/test.trees", package = "RcppTskit") +tc <- tc_load(tc_file) +tc$time_units() + +## ------------------------------------------------ +## Method `TableCollection$has_index` +## ------------------------------------------------ + +tc_file <- system.file("examples/test.trees", package = "RcppTskit") +tc <- tc_load(tc_file) +tc$has_index() + +## ------------------------------------------------ +## Method `TableCollection$has_reference_sequence` +## ------------------------------------------------ + +tc_file1 <- system.file("examples/test.trees", package = "RcppTskit") +tc_file2 <- system.file("examples/test_with_ref_seq.trees", package = "RcppTskit") +tc1 <- tc_load(tc_file1) +tc1$has_reference_sequence() +tc2 <- tc_load(tc_file2) +tc2$has_reference_sequence() + +## ------------------------------------------------ +## Method `TableCollection$file_uuid` +## ------------------------------------------------ + +tc_file <- system.file("examples/test.trees", package = "RcppTskit") +tc <- tc_load(tc_file) +tc$file_uuid() + ## ------------------------------------------------ ## Method `TableCollection$r_to_py` ## ------------------------------------------------ @@ -90,6 +133,11 @@ tc \item \href{#method-TableCollection-dump}{\code{TableCollection$dump()}} \item \href{#method-TableCollection-write}{\code{TableCollection$write()}} \item \href{#method-TableCollection-tree_sequence}{\code{TableCollection$tree_sequence()}} +\item \href{#method-TableCollection-sequence_length}{\code{TableCollection$sequence_length()}} +\item \href{#method-TableCollection-time_units}{\code{TableCollection$time_units()}} +\item \href{#method-TableCollection-has_index}{\code{TableCollection$has_index()}} +\item \href{#method-TableCollection-has_reference_sequence}{\code{TableCollection$has_reference_sequence()}} +\item \href{#method-TableCollection-file_uuid}{\code{TableCollection$file_uuid()}} \item \href{#method-TableCollection-r_to_py}{\code{TableCollection$r_to_py()}} \item \href{#method-TableCollection-print}{\code{TableCollection$print()}} \item \href{#method-TableCollection-clone}{\code{TableCollection$clone()}} @@ -117,7 +165,7 @@ Create a \code{\link{TableCollection}} from a file or a pointer. \item{\code{skip_tables}}{logical; if \code{TRUE}, load only non-table information.} \item{\code{skip_reference_sequence}}{logical; if \code{TRUE}, skip loading -reference sequence information.} +reference genome sequence information.} \item{\code{pointer}}{an external pointer (\code{externalptr}) to a table collection.} } @@ -226,6 +274,114 @@ is(ts) } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TableCollection-sequence_length}{}}} +\subsection{Method \code{sequence_length()}}{ +Get the sequence length. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TableCollection$sequence_length()}\if{html}{\out{
}} +} + +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{tc_file <- system.file("examples/test.trees", package = "RcppTskit") +tc <- tc_load(tc_file) +tc$sequence_length() +} +\if{html}{\out{
}} + +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TableCollection-time_units}{}}} +\subsection{Method \code{time_units()}}{ +Get the time units string. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TableCollection$time_units()}\if{html}{\out{
}} +} + +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{tc_file <- system.file("examples/test.trees", package = "RcppTskit") +tc <- tc_load(tc_file) +tc$time_units() +} +\if{html}{\out{
}} + +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TableCollection-has_index}{}}} +\subsection{Method \code{has_index()}}{ +Get whether the table collection has edge indexes. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TableCollection$has_index()}\if{html}{\out{
}} +} + +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{tc_file <- system.file("examples/test.trees", package = "RcppTskit") +tc <- tc_load(tc_file) +tc$has_index() +} +\if{html}{\out{
}} + +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TableCollection-has_reference_sequence}{}}} +\subsection{Method \code{has_reference_sequence()}}{ +Get whether the table collection has a reference genome sequence. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TableCollection$has_reference_sequence()}\if{html}{\out{
}} +} + +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{tc_file1 <- system.file("examples/test.trees", package = "RcppTskit") +tc_file2 <- system.file("examples/test_with_ref_seq.trees", package = "RcppTskit") +tc1 <- tc_load(tc_file1) +tc1$has_reference_sequence() +tc2 <- tc_load(tc_file2) +tc2$has_reference_sequence() +} +\if{html}{\out{
}} + +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TableCollection-file_uuid}{}}} +\subsection{Method \code{file_uuid()}}{ +Get the file UUID string. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TableCollection$file_uuid()}\if{html}{\out{
}} +} + +\subsection{Details}{ +Returns the UUID of the file the table collection was loaded from. + If unavailable, returns \code{NA_character_}. +} + +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{tc_file <- system.file("examples/test.trees", package = "RcppTskit") +tc <- tc_load(tc_file) +tc$file_uuid() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/RcppTskit/man/TreeSequence.Rd b/RcppTskit/man/TreeSequence.Rd index 04d8f86..4e03d68 100644 --- a/RcppTskit/man/TreeSequence.Rd +++ b/RcppTskit/man/TreeSequence.Rd @@ -165,6 +165,28 @@ ts_file <- system.file("examples/test.trees", package = "RcppTskit") ts <- ts_load(ts_file) ts$sequence_length() +## ------------------------------------------------ +## Method `TreeSequence$discrete_genome` +## ------------------------------------------------ + +ts_file1 <- system.file("examples/test.trees", package = "RcppTskit") +ts_file2 <- system.file("examples/test_non_discrete_genome.trees", package = "RcppTskit") +ts1 <- ts_load(ts_file1) +ts1$discrete_genome() +ts2 <- ts_load(ts_file2) +ts2$discrete_genome() + +## ------------------------------------------------ +## Method `TreeSequence$has_reference_sequence` +## ------------------------------------------------ + +ts_file1 <- system.file("examples/test.trees", package = "RcppTskit") +ts_file2 <- system.file("examples/test_with_ref_seq.trees", package = "RcppTskit") +ts1 <- ts_load(ts_file1) +ts1$has_reference_sequence() +ts2 <- ts_load(ts_file2) +ts2$has_reference_sequence() + ## ------------------------------------------------ ## Method `TreeSequence$time_units` ## ------------------------------------------------ @@ -173,6 +195,17 @@ ts_file <- system.file("examples/test.trees", package = "RcppTskit") ts <- ts_load(ts_file) ts$time_units() +## ------------------------------------------------ +## Method `TreeSequence$discrete_time` +## ------------------------------------------------ + +ts_file1 <- system.file("examples/test.trees", package = "RcppTskit") +ts_file2 <- system.file("examples/test_discrete_time.trees", package = "RcppTskit") +ts1 <- ts_load(ts_file1) +ts1$discrete_time() +ts2 <- ts_load(ts_file2) +ts2$discrete_time() + ## ------------------------------------------------ ## Method `TreeSequence$min_time` ## ------------------------------------------------ @@ -196,6 +229,14 @@ ts$max_time() ts_file <- system.file("examples/test.trees", package = "RcppTskit") ts <- ts_load(ts_file) ts$metadata_length() + +## ------------------------------------------------ +## Method `TreeSequence$file_uuid` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$file_uuid() } \seealso{ \code{\link{ts_load}} @@ -230,10 +271,14 @@ ts$metadata_length() \item \href{#method-TreeSequence-num_sites}{\code{TreeSequence$num_sites()}} \item \href{#method-TreeSequence-num_mutations}{\code{TreeSequence$num_mutations()}} \item \href{#method-TreeSequence-sequence_length}{\code{TreeSequence$sequence_length()}} +\item \href{#method-TreeSequence-discrete_genome}{\code{TreeSequence$discrete_genome()}} +\item \href{#method-TreeSequence-has_reference_sequence}{\code{TreeSequence$has_reference_sequence()}} \item \href{#method-TreeSequence-time_units}{\code{TreeSequence$time_units()}} +\item \href{#method-TreeSequence-discrete_time}{\code{TreeSequence$discrete_time()}} \item \href{#method-TreeSequence-min_time}{\code{TreeSequence$min_time()}} \item \href{#method-TreeSequence-max_time}{\code{TreeSequence$max_time()}} \item \href{#method-TreeSequence-metadata_length}{\code{TreeSequence$metadata_length()}} +\item \href{#method-TreeSequence-file_uuid}{\code{TreeSequence$file_uuid()}} \item \href{#method-TreeSequence-clone}{\code{TreeSequence$clone()}} } } @@ -260,7 +305,7 @@ Create a \code{\link{TreeSequence}} from a file or a pointer. \item{\code{skip_tables}}{logical; if \code{TRUE}, load only non-table information.} \item{\code{skip_reference_sequence}}{logical; if \code{TRUE}, skip loading -reference sequence information.} +reference genome sequence information.} \item{\code{pointer}}{an external pointer (\code{externalptr}) to a tree sequence.} } @@ -669,6 +714,57 @@ ts$sequence_length() } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TreeSequence-discrete_genome}{}}} +\subsection{Method \code{discrete_genome()}}{ +Get the discrete genome status. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TreeSequence$discrete_genome()}\if{html}{\out{
}} +} + +\subsection{Details}{ +Returns \code{TRUE} if all genomic coordinates in the tree + sequence are discrete integer values. +} + +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file1 <- system.file("examples/test.trees", package = "RcppTskit") +ts_file2 <- system.file("examples/test_non_discrete_genome.trees", package = "RcppTskit") +ts1 <- ts_load(ts_file1) +ts1$discrete_genome() +ts2 <- ts_load(ts_file2) +ts2$discrete_genome() +} +\if{html}{\out{
}} + +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TreeSequence-has_reference_sequence}{}}} +\subsection{Method \code{has_reference_sequence()}}{ +Get whether the tree sequence has a reference genome sequence. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TreeSequence$has_reference_sequence()}\if{html}{\out{
}} +} + +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file1 <- system.file("examples/test.trees", package = "RcppTskit") +ts_file2 <- system.file("examples/test_with_ref_seq.trees", package = "RcppTskit") +ts1 <- ts_load(ts_file1) +ts1$has_reference_sequence() +ts2 <- ts_load(ts_file2) +ts2$has_reference_sequence() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} @@ -689,6 +785,34 @@ ts$time_units() } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TreeSequence-discrete_time}{}}} +\subsection{Method \code{discrete_time()}}{ +Get the discrete time status. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TreeSequence$discrete_time()}\if{html}{\out{
}} +} + +\subsection{Details}{ +Returns \code{TRUE} if all time values in the tree sequence are + discrete integer values. +} + +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file1 <- system.file("examples/test.trees", package = "RcppTskit") +ts_file2 <- system.file("examples/test_discrete_time.trees", package = "RcppTskit") +ts1 <- ts_load(ts_file1) +ts1$discrete_time() +ts2 <- ts_load(ts_file2) +ts2$discrete_time() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} @@ -752,6 +876,31 @@ ts$metadata_length() } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TreeSequence-file_uuid}{}}} +\subsection{Method \code{file_uuid()}}{ +Get the file UUID string. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TreeSequence$file_uuid()}\if{html}{\out{
}} +} + +\subsection{Details}{ +Returns the UUID of the file the tree sequence was loaded from. + If unavailable, returns \code{NA_character_}. +} + +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$file_uuid() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/RcppTskit/man/tc_load.Rd b/RcppTskit/man/tc_load.Rd index 1e81776..5e14aa2 100644 --- a/RcppTskit/man/tc_load.Rd +++ b/RcppTskit/man/tc_load.Rd @@ -15,7 +15,7 @@ tc_read(file, skip_tables = FALSE, skip_reference_sequence = FALSE) \item{skip_tables}{logical; if \code{TRUE}, load only non-table information.} \item{skip_reference_sequence}{logical; if \code{TRUE}, skip loading -reference sequence information.} +reference genome sequence information.} } \value{ A \code{\link{TableCollection}} object. diff --git a/RcppTskit/man/tc_py_to_r.Rd b/RcppTskit/man/tc_py_to_r.Rd index b481ad3..fafce33 100644 --- a/RcppTskit/man/tc_py_to_r.Rd +++ b/RcppTskit/man/tc_py_to_r.Rd @@ -16,7 +16,11 @@ A \code{\link{TableCollection}} object. } \description{ This function saves a table collection from reticulate Python - to disk and reads it into R for use with \code{RcppTskit}. + to temporary file on disk and reads it into R for use with \code{RcppTskit}. +} +\details{ +Because this transfer is via a temporary file, + the file UUID property changes. } \examples{ \dontrun{ diff --git a/RcppTskit/man/ts_load.Rd b/RcppTskit/man/ts_load.Rd index 940c216..45c9b2b 100644 --- a/RcppTskit/man/ts_load.Rd +++ b/RcppTskit/man/ts_load.Rd @@ -15,7 +15,7 @@ ts_read(file, skip_tables = FALSE, skip_reference_sequence = FALSE) \item{skip_tables}{logical; if \code{TRUE}, load only non-table information.} \item{skip_reference_sequence}{logical; if \code{TRUE}, skip loading -reference sequence information.} +reference genome sequence information.} } \value{ A \code{\link{TreeSequence}} object. diff --git a/RcppTskit/man/ts_py_to_r.Rd b/RcppTskit/man/ts_py_to_r.Rd index 2ab7788..3932633 100644 --- a/RcppTskit/man/ts_py_to_r.Rd +++ b/RcppTskit/man/ts_py_to_r.Rd @@ -15,8 +15,12 @@ ts_py_to_r(ts, cleanup = TRUE) A \code{\link{TreeSequence}} object. } \description{ -This function saves a tree sequence from reticulate Python to disk - and reads it into R for use with \code{RcppTskit}. +This function saves a tree sequence from reticulate Python to + temporary file on disk and reads it into R for use with \code{RcppTskit}. +} +\details{ +Because this transfer is via a temporary file, + the file UUID property changes. } \examples{ \dontrun{ diff --git a/RcppTskit/notes_pkg_dev.Rmd b/RcppTskit/notes_pkg_dev.Rmd index cedfbb2..56684fd 100644 --- a/RcppTskit/notes_pkg_dev.Rmd +++ b/RcppTskit/notes_pkg_dev.Rmd @@ -2,20 +2,209 @@ ## Next TODOs +## TableCollection indexes and `tree_sequence()` (notes for future work) + +### Why this note exists + +I got a bit hazy on how indexes are handled across tskit C, tskit Python, +and `RcppTskit` when converting a `TableCollection` to a `TreeSequence`. +This note Codex summarised the current understanding and suggests a future path. +We have to study this and decide how to move onwards. + +### tskit Python API behavior (reference behavior for R API) + +- `TableCollection.tree_sequence()` is user-facing and Pythonic (no bitwise flags). +- It checks `self.has_index()`. +- If indexes are missing, it calls `self.build_index()`. +- Then it calls `tskit.TreeSequence.load_tables(self)`. + +In other words: Python handles index preparation explicitly at the +`TableCollection` layer before creating a `TreeSequence`. + +Python also exposes these `TableCollection` methods: + +- `has_index()` +- `build_index()` +- `drop_index()` + +### tskit C API behavior (reference behavior for low-level C++ wrappers) + +- `tsk_table_collection_has_index(...)` +- `tsk_table_collection_build_index(...)` +- `tsk_table_collection_drop_index(...)` +- `tsk_treeseq_init(..., options)` where `options` may include: + - `TSK_TS_INIT_BUILD_INDEXES` + - `TSK_TS_INIT_COMPUTE_MUTATION_PARENTS` + +`TSK_TS_INIT_BUILD_INDEXES` is a C-level bitwise option and fits naturally in +the low-level `tc_ptr_to_ts_ptr(..., options)` wrapper. + +### Current `RcppTskit` behavior (as of this note) + +- Public R method `TableCollection$tree_sequence()` currently sets + `TSK_TS_INIT_BUILD_INDEXES` (via `bitwShiftL(1L, 0)`) and passes it to + `tc_ptr_to_ts_ptr(...)`. +- This is functionally fine, but it exposes C-style flag handling in the R + method implementation. + +Important nuance: + +- `tc_ptr_to_ts_ptr()` calls `tsk_treeseq_init(...)` without `TSK_TAKE_OWNERSHIP`. +- In tskit C, this means the tree sequence gets its own copied tables. +- If `TSK_TS_INIT_BUILD_INDEXES` is used there, indexes are built on the copied + tables (inside the tree sequence init path), not necessarily on the original + R `TableCollection` object. + +This differs from Python `TableCollection.tree_sequence()` behavior, which +builds indexes on the table collection itself before conversion. + +### Consistency goal (recommended) + +Use two layers with different styles on purpose: + +- R public API: Python-like, simple methods / logical arguments +- C++ low-level wrappers: C-like, bitwise `options` allowed + +That gives: + +- R-Python consistency for user-facing behavior +- C-C++ consistency for thin wrappers + +### Suggested future refactor for `TableCollection$tree_sequence()` + +Refactor R method to follow Python semantics explicitly: + +```r +tree_sequence = function() { + if (!self$has_index()) { + self$build_index() + } + ts_ptr <- tc_ptr_to_ts_ptr(self$pointer, options = 0L) + TreeSequence$new(pointer = ts_ptr) +} +``` + +Benefits: + +- No bitwise flag logic in R method implementation +- Easier to read and explain +- Matches Python behavior more closely +- Makes `has_index()` tests and state transitions clearer + +### Why `skip_tables = TRUE` is not a good "no index" test case + +I initially wondered whether loading a table collection with empty tables would +imply `has_index() == FALSE`. It does not reliably do that. + +Observation from local Python check: + +- `TableCollection.load(..., skip_tables=True)` can still report `has_index() == TRUE` + even when `num_edges == 0`. + +So: + +- "no rows" does not necessarily mean "no index" +- `skip_tables=TRUE` is not a robust negative test for `has_index()` + +### Best way to test `has_index()` false cases + +Add explicit index mutators to R API (matching Python): + +- `tc_ptr_build_index()` + `TableCollection$build_index()` +- `tc_ptr_drop_index()` + `TableCollection$drop_index()` + +Then tests become simple and deterministic: + +1. Load `tc` +2. `expect_true(tc$has_index())` +3. `tc$drop_index()` +4. `expect_false(tc$has_index())` +5. `tc$build_index()` +6. `expect_true(tc$has_index())` + +And for `tree_sequence()` behavior: + +1. Load `tc` +2. `tc$drop_index()` +3. `expect_false(tc$has_index())` +4. `ts <- tc$tree_sequence()` +5. Decide/document expected side effect: + - Python-consistent target: `tc$has_index()` becomes `TRUE` + - Current C-flag-inside-init approach may leave `tc` unchanged + +### Design decision to make explicitly (before implementing) + +Choose one and document it: + +- **Python-consistent behavior (recommended)**: + `TableCollection$tree_sequence()` may build indexes on `self` if missing. +- **Non-mutating conversion behavior**: + `TableCollection$tree_sequence()` builds indexes only in the transient treeseq + init path and leaves `self` as-is. + +The first option is more consistent with tskit Python and easier to reason +about for users switching between R and Python. + +### Minimal next steps when time allows + +1. Add `TableCollection$build_index()` / `$drop_index()` (+ `tc_ptr_*` wrappers). +2. Add focused tests for `has_index()` false/true transitions. +3. Refactor `TableCollection$tree_sequence()` to Python-style pre-check and + `build_index()`. +4. Add a regression test for `tc$tree_sequence()` behavior after dropping index. +5. Keep `tc_ptr_to_ts_ptr(..., options)` as low-level C-like API (do not remove). + +### Optional later (separate concern) + +If needed, consider a Python-style logical argument on the R method for +mutation-parent computation instead of exposing bitwise flags, e.g. +`compute_mutation_parents = FALSE`, while still translating to +`TSK_TS_INIT_COMPUTE_MUTATION_PARENTS` inside the low-level wrapper layer. + // TODO: This will go into AlphaSimR + +Tskit examples of building tree sequence in C +extern/tskit/c/tests/testlib.c has lots of examples of constructing a tree sequence using C +extern/tskit/c/tests/test_minimal_cpp.cpp has some C++ example too +extern/tskit/c/examples/haploid_wright_fisher.c +extern/tskit/c/examples/multichrom_wright_fisher_singlethreaded.c +extern/tskit/c/examples/multichrom_wright_fisher.c + // [[Rcpp::export]] SEXP tc_grow(SEXP tc) { RcppTskit_table_collection_xptr tc_xptr(tc); int ret; ret = 0; - // TODO: What do we need to do here now? How do we grow a tree sequence? - // Look into the simple example in C code online or look into what SLiM - // is doing!? + // Minimal example: grow the table collection in place by appending one child + // node and one edge spanning the full sequence. A real simulator would choose + // parent(s), times, and intervals according to its model. + if (tc_xptr->nodes.num_rows == 0) { + ret = TSK_ERR_NODE_OUT_OF_BOUNDS; + } else { + const tsk_id_t parent = tc_xptr->nodes.num_rows - 1; + const tsk_id_t child = tsk_node_table_add_row(&tc_xptr->nodes, TSK_NODE_IS_SAMPLE, + 0.0, TSK_NULL, TSK_NULL, NULL, 0); + if (child < 0) { + ret = static_cast(child); + } else { + const tsk_id_t edge = tsk_edge_table_add_row( + &tc_xptr->edges, 0.0, tc_xptr->sequence_length, parent, child, NULL, 0); + if (edge < 0) { + ret = static_cast(edge); + } + } + } + if (ret == 0) { + ret = tsk_table_collection_sort(tc_xptr, NULL, 0); + } + if (ret == 0) { + ret = tsk_table_collection_build_index(tc_xptr, 0); + } if (ret != 0) { // TODO: What should we do if something goes wrong? We can clearly throw an // error using Rcpp::stop(), but should we also do something with the // ts pointer and object? If we delete, we discard/delete past work, - // but if we don't, do we risk of returning a corrupted ts? + // but if we do not, do we risk of returning a corrupted ts? // tsk_table_collection_free(tc_ptr); // delete tc_ptr; Rcpp::stop(tsk_strerror(ret)); @@ -28,60 +217,6 @@ SEXP tc_grow(SEXP tc) { (it saves various ts information as attributes) https://github.com/bodkan/slendr/pull/191/changes#diff-f46eb0da2f9267022ecc6e09316598fde6bdcd2f980963906dc041b5096f344f -* TODO Tskit examples of building tree sequence in C -extern/tskit/c/tests/testlib.c has lots of examples of constructing a tree sequence using C -extern/tskit/c/tests/test_minimal_cpp.cpp has some C++ example too -extern/tskit/c/examples/haploid_wright_fisher.c -extern/tskit/c/examples/multichrom_wright_fisher_singlethreaded.c -extern/tskit/c/examples/multichrom_wright_fisher.c - -This test nicely shows the required columns: - -static void -test_missing_required_columns(void) -{ - int ret; - size_t j; - tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3); - tsk_table_collection_t t; - const char *required_cols[] = { - "edges/child", - "edges/left", - "edges/parent", - "edges/right", - "format/name", - "format/version", - "individuals/flags", - "migrations/dest", - "migrations/left", - "migrations/node", - "migrations/right", - "migrations/source", - "migrations/time", - "mutations/node", - "mutations/parent", - "mutations/site", - "nodes/flags", - "nodes/individual", - "nodes/population", - "nodes/time", - "sequence_length", - "sites/position", - "uuid", - }; - const char *drop_cols[1]; - - for (j = 0; j < sizeof(required_cols) / sizeof(*required_cols); j++) { - drop_cols[0] = required_cols[j]; - copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name); - ret = tsk_table_collection_load(&t, _tmp_file_name, 0); - CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND); - tsk_table_collection_free(&t); - } - - tsk_treeseq_free(ts); - free(ts); -} ## NEWS template diff --git a/RcppTskit/src/RcppExports.cpp b/RcppTskit/src/RcppExports.cpp index e940393..08ed6bc 100644 --- a/RcppTskit/src/RcppExports.cpp +++ b/RcppTskit/src/RcppExports.cpp @@ -223,6 +223,28 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// ts_ptr_discrete_genome +bool ts_ptr_discrete_genome(const SEXP ts); +RcppExport SEXP _RcppTskit_ts_ptr_discrete_genome(SEXP tsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type ts(tsSEXP); + rcpp_result_gen = Rcpp::wrap(ts_ptr_discrete_genome(ts)); + return rcpp_result_gen; +END_RCPP +} +// ts_ptr_has_reference_sequence +bool ts_ptr_has_reference_sequence(const SEXP ts); +RcppExport SEXP _RcppTskit_ts_ptr_has_reference_sequence(SEXP tsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type ts(tsSEXP); + rcpp_result_gen = Rcpp::wrap(ts_ptr_has_reference_sequence(ts)); + return rcpp_result_gen; +END_RCPP +} // ts_ptr_time_units Rcpp::String ts_ptr_time_units(const SEXP ts); RcppExport SEXP _RcppTskit_ts_ptr_time_units(SEXP tsSEXP) { @@ -234,6 +256,17 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// ts_ptr_discrete_time +bool ts_ptr_discrete_time(const SEXP ts); +RcppExport SEXP _RcppTskit_ts_ptr_discrete_time(SEXP tsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type ts(tsSEXP); + rcpp_result_gen = Rcpp::wrap(ts_ptr_discrete_time(ts)); + return rcpp_result_gen; +END_RCPP +} // ts_ptr_min_time double ts_ptr_min_time(const SEXP ts); RcppExport SEXP _RcppTskit_ts_ptr_min_time(SEXP tsSEXP) { @@ -256,25 +289,25 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// ts_ptr_summary -Rcpp::List ts_ptr_summary(const SEXP ts); -RcppExport SEXP _RcppTskit_ts_ptr_summary(SEXP tsSEXP) { +// ts_ptr_file_uuid +Rcpp::String ts_ptr_file_uuid(const SEXP ts); +RcppExport SEXP _RcppTskit_ts_ptr_file_uuid(SEXP tsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP >::type ts(tsSEXP); - rcpp_result_gen = Rcpp::wrap(ts_ptr_summary(ts)); + rcpp_result_gen = Rcpp::wrap(ts_ptr_file_uuid(ts)); return rcpp_result_gen; END_RCPP } -// tc_ptr_summary -Rcpp::List tc_ptr_summary(const SEXP tc); -RcppExport SEXP _RcppTskit_tc_ptr_summary(SEXP tcSEXP) { +// ts_ptr_summary +Rcpp::List ts_ptr_summary(const SEXP ts); +RcppExport SEXP _RcppTskit_ts_ptr_summary(SEXP tsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const SEXP >::type tc(tcSEXP); - rcpp_result_gen = Rcpp::wrap(tc_ptr_summary(tc)); + Rcpp::traits::input_parameter< const SEXP >::type ts(tsSEXP); + rcpp_result_gen = Rcpp::wrap(ts_ptr_summary(ts)); return rcpp_result_gen; END_RCPP } @@ -289,6 +322,72 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// tc_ptr_sequence_length +double tc_ptr_sequence_length(const SEXP tc); +RcppExport SEXP _RcppTskit_tc_ptr_sequence_length(SEXP tcSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type tc(tcSEXP); + rcpp_result_gen = Rcpp::wrap(tc_ptr_sequence_length(tc)); + return rcpp_result_gen; +END_RCPP +} +// tc_ptr_has_reference_sequence +bool tc_ptr_has_reference_sequence(const SEXP tc); +RcppExport SEXP _RcppTskit_tc_ptr_has_reference_sequence(SEXP tcSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type tc(tcSEXP); + rcpp_result_gen = Rcpp::wrap(tc_ptr_has_reference_sequence(tc)); + return rcpp_result_gen; +END_RCPP +} +// tc_ptr_time_units +Rcpp::String tc_ptr_time_units(const SEXP tc); +RcppExport SEXP _RcppTskit_tc_ptr_time_units(SEXP tcSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type tc(tcSEXP); + rcpp_result_gen = Rcpp::wrap(tc_ptr_time_units(tc)); + return rcpp_result_gen; +END_RCPP +} +// tc_ptr_file_uuid +Rcpp::String tc_ptr_file_uuid(const SEXP tc); +RcppExport SEXP _RcppTskit_tc_ptr_file_uuid(SEXP tcSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type tc(tcSEXP); + rcpp_result_gen = Rcpp::wrap(tc_ptr_file_uuid(tc)); + return rcpp_result_gen; +END_RCPP +} +// tc_ptr_has_index +bool tc_ptr_has_index(const SEXP tc); +RcppExport SEXP _RcppTskit_tc_ptr_has_index(SEXP tcSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type tc(tcSEXP); + rcpp_result_gen = Rcpp::wrap(tc_ptr_has_index(tc)); + return rcpp_result_gen; +END_RCPP +} +// tc_ptr_summary +Rcpp::List tc_ptr_summary(const SEXP tc); +RcppExport SEXP _RcppTskit_tc_ptr_summary(SEXP tcSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type tc(tcSEXP); + rcpp_result_gen = Rcpp::wrap(tc_ptr_summary(tc)); + return rcpp_result_gen; +END_RCPP +} // tc_ptr_metadata_length Rcpp::List tc_ptr_metadata_length(const SEXP tc); RcppExport SEXP _RcppTskit_tc_ptr_metadata_length(SEXP tcSEXP) { @@ -389,12 +488,21 @@ static const R_CallMethodDef CallEntries[] = { {"_RcppTskit_ts_ptr_num_sites", (DL_FUNC) &_RcppTskit_ts_ptr_num_sites, 1}, {"_RcppTskit_ts_ptr_num_mutations", (DL_FUNC) &_RcppTskit_ts_ptr_num_mutations, 1}, {"_RcppTskit_ts_ptr_sequence_length", (DL_FUNC) &_RcppTskit_ts_ptr_sequence_length, 1}, + {"_RcppTskit_ts_ptr_discrete_genome", (DL_FUNC) &_RcppTskit_ts_ptr_discrete_genome, 1}, + {"_RcppTskit_ts_ptr_has_reference_sequence", (DL_FUNC) &_RcppTskit_ts_ptr_has_reference_sequence, 1}, {"_RcppTskit_ts_ptr_time_units", (DL_FUNC) &_RcppTskit_ts_ptr_time_units, 1}, + {"_RcppTskit_ts_ptr_discrete_time", (DL_FUNC) &_RcppTskit_ts_ptr_discrete_time, 1}, {"_RcppTskit_ts_ptr_min_time", (DL_FUNC) &_RcppTskit_ts_ptr_min_time, 1}, {"_RcppTskit_ts_ptr_max_time", (DL_FUNC) &_RcppTskit_ts_ptr_max_time, 1}, + {"_RcppTskit_ts_ptr_file_uuid", (DL_FUNC) &_RcppTskit_ts_ptr_file_uuid, 1}, {"_RcppTskit_ts_ptr_summary", (DL_FUNC) &_RcppTskit_ts_ptr_summary, 1}, - {"_RcppTskit_tc_ptr_summary", (DL_FUNC) &_RcppTskit_tc_ptr_summary, 1}, {"_RcppTskit_ts_ptr_metadata_length", (DL_FUNC) &_RcppTskit_ts_ptr_metadata_length, 1}, + {"_RcppTskit_tc_ptr_sequence_length", (DL_FUNC) &_RcppTskit_tc_ptr_sequence_length, 1}, + {"_RcppTskit_tc_ptr_has_reference_sequence", (DL_FUNC) &_RcppTskit_tc_ptr_has_reference_sequence, 1}, + {"_RcppTskit_tc_ptr_time_units", (DL_FUNC) &_RcppTskit_tc_ptr_time_units, 1}, + {"_RcppTskit_tc_ptr_file_uuid", (DL_FUNC) &_RcppTskit_tc_ptr_file_uuid, 1}, + {"_RcppTskit_tc_ptr_has_index", (DL_FUNC) &_RcppTskit_tc_ptr_has_index, 1}, + {"_RcppTskit_tc_ptr_summary", (DL_FUNC) &_RcppTskit_tc_ptr_summary, 1}, {"_RcppTskit_tc_ptr_metadata_length", (DL_FUNC) &_RcppTskit_tc_ptr_metadata_length, 1}, {"_RcppTskit_test_tsk_bug_assert_c", (DL_FUNC) &_RcppTskit_test_tsk_bug_assert_c, 0}, {"_RcppTskit_test_tsk_bug_assert_cpp", (DL_FUNC) &_RcppTskit_test_tsk_bug_assert_cpp, 0}, diff --git a/RcppTskit/src/RcppTskit.cpp b/RcppTskit/src/RcppTskit.cpp index 36d26b0..d91ee7e 100644 --- a/RcppTskit/src/RcppTskit.cpp +++ b/RcppTskit/src/RcppTskit.cpp @@ -340,12 +340,37 @@ SEXP tc_ptr_to_ts_ptr(const SEXP tc, const int options = 0) { return ts_xptr; } -// See tsk_treeseq_t inst/include/tskit/tskit/trees.h on what it contains. Here -// is the Python summary +// See tsk_treeseq_t inst/include/tskit/tskit/trees.h on which elements +// are there in a tsk_treeseq_t type. +// Here is a copy with comments on what we have implemented in RcppTskit: +// * tsk_size_t num_trees; SCALAR, IMPLEMENTED HERE +// * tsk_size_t num_samples; SCALAR, IMPLEMENTED HERE +// * tsk_id_t *samples; ARRAY, TODO LATER #49 +// * bool time_uncalibrated; SKIPPED (for now) since we have time_units +// * bool discrete_genome; SCALAR, IMPLEMENTED HERE +// * bool discrete_time; SCALAR, IMPLEMENTED HERE +// * double min_time; SCALAR, IMPLEMENTED HERE +// * double max_time; SCALAR, IMPLEMENTED HERE +// * double *breakpoints; ARRAY, TODO LATER #49 +// * tsk_id_t *sample_index_map; ARRAY, TODO LATER #49 +// * tsk_id_t *individual_nodes_mem; ARRAY, TODO LATER #49 +// * tsk_id_t **individual_nodes; ARRAY, TODO LATER #49 +// * tsk_size_t *individual_nodes_length; ARRAY, TODO LATER #49 +// * tsk_site_t *tree_sites_mem; ARRAY, TODO LATER #49 +// * tsk_site_t **tree_sites; ARRAY, TODO LATER #49 +// * tsk_size_t *tree_sites_length; ARRAY, TODO LATER #49 +// * tsk_mutation_t *site_mutations_mem; ARRAY, TODO LATER #49 +// * tsk_mutation_t **site_mutations; ARRAY, TODO LATER #49 +// * tsk_size_t *site_mutations_length; ARRAY, TODO LATER #49 +// * tsk_table_collection_t *tables; SKIPPED since we might look into +// table-column arrays/vectors +// +// Here is the Python API summary // https://tskit.dev/tskit/docs/stable/python-api.html#trees-and-tree-sequences -// See tsk_table_collection_t inst/include/tskit/tskit/tables.h on what it -// contains. Here is the Python summary -// https://tskit.dev/tskit/docs/stable/python-api.html#sec-tables-api-table-collection +// https://tskit.dev/tskit/docs/stable/python-api.html#tskit.TreeSequence +// https://tskit.dev/tskit/docs/stable/python-api.html#tskit.TreeSequence.max_root_time +// is a scalar property (which we have strived to implement / mirror here), +// but this one requires table-column arrays, which we will look into in #49 // @describeIn ts_ptr_summary Get the number of provenances in a tree sequence // [[Rcpp::export]] @@ -425,6 +450,24 @@ double ts_ptr_sequence_length(const SEXP ts) { return tsk_treeseq_get_sequence_length(ts_xptr); } +// @describeIn ts_ptr_summary Get the discrete genome status +// [[Rcpp::export]] +bool ts_ptr_discrete_genome(const SEXP ts) { + RcppTskit_treeseq_xptr ts_xptr(ts); + return tsk_treeseq_get_discrete_genome(ts_xptr); +} + +// @describeIn ts_ptr_summary Does the tree sequence hold a reference genome +// sequence +// @details Note that tsk_treeseq_has_reference_sequence is undocumented +// method in the tskit C API (see trees.h), but documented in tskit Python API +// https://tskit.dev/tskit/docs/stable/python-api.html#tskit.TreeSequence.has_reference_sequence +// [[Rcpp::export]] +bool ts_ptr_has_reference_sequence(const SEXP ts) { + RcppTskit_treeseq_xptr ts_xptr(ts); + return tsk_treeseq_has_reference_sequence(ts_xptr); +} + // @describeIn ts_ptr_summary Get the time units string // [[Rcpp::export]] Rcpp::String ts_ptr_time_units(const SEXP ts) { @@ -438,6 +481,13 @@ Rcpp::String ts_ptr_time_units(const SEXP ts) { return Rcpp::String(time_units); } +// @describeIn ts_ptr_summary Get the discrete time status +// [[Rcpp::export]] +bool ts_ptr_discrete_time(const SEXP ts) { + RcppTskit_treeseq_xptr ts_xptr(ts); + return tsk_treeseq_get_discrete_time(ts_xptr); +} + // @describeIn ts_ptr_summary Get the min time in node table and mutation table // [[Rcpp::export]] double ts_ptr_min_time(const SEXP ts) { @@ -452,6 +502,17 @@ double ts_ptr_max_time(const SEXP ts) { return tsk_treeseq_get_max_time(ts_xptr); } +// @describeIn ts_ptr_summary Get the file uuid string +// [[Rcpp::export]] +Rcpp::String ts_ptr_file_uuid(const SEXP ts) { + RcppTskit_treeseq_xptr ts_xptr(ts); + const char *p = tsk_treeseq_get_file_uuid(ts_xptr); + if (p == NULL || p[0] == '\0') { + return Rcpp::String(NA_STRING); + } + return Rcpp::String(p); +} + // @name ts_ptr_summary // @title Summary of properties and number of records in a tree sequence // @param ts an external pointer to tree sequence as a \code{tsk_treeseq_t} @@ -469,13 +530,16 @@ double ts_ptr_max_time(const SEXP ts) { // \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_treeseq_get_num_sites}, // \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_treeseq_get_num_mutations}, // \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_treeseq_get_sequence_length}, +// \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_treeseq_get_discrete_genome}, +// \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_treeseq_has_reference_sequence}, // \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_treeseq_get_time_units}, +// \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_treeseq_get_discrete_time}, // \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_treeseq_get_min_time}, -// and // \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_treeseq_get_max_time}, -// @return A named list with the number/value for all items, -// while other functions \code{ts_num_x} and \code{ts_ptr_num_x} etc. -// return the number/value of each item. +// and +// \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_treeseq_get_file_uuid}, +// @return \code{ts_ptr_summary} returns a named list with numbers and values, +// while functions \code{ts_ptr_*} return the number or value for each item. // @examples // ts_file <- system.file("examples/test.trees", package = "RcppTskit") // ts_ptr <- RcppTskit:::ts_ptr_load(ts_file) @@ -491,9 +555,13 @@ double ts_ptr_max_time(const SEXP ts) { // RcppTskit:::ts_ptr_num_sites(ts_ptr) // RcppTskit:::ts_ptr_num_mutations(ts_ptr) // RcppTskit:::ts_ptr_sequence_length(ts_ptr) +// RcppTskit:::ts_ptr_discrete_genome(ts_ptr) +// RcppTskit:::ts_ptr_has_reference_sequence(ts_ptr) // RcppTskit:::ts_ptr_time_units(ts_ptr) +// RcppTskit:::ts_ptr_discrete_time(ts_ptr) // RcppTskit:::ts_ptr_min_time(ts_ptr) // RcppTskit:::ts_ptr_max_time(ts_ptr) +// RcppTskit:::ts_ptr_file_uuid(ts_ptr) // [[Rcpp::export]] Rcpp::List ts_ptr_summary(const SEXP ts) { RcppTskit_treeseq_xptr ts_xptr(ts); @@ -509,39 +577,14 @@ Rcpp::List ts_ptr_summary(const SEXP ts) { Rcpp::_["num_sites"] = tsk_treeseq_get_num_sites(ts_xptr), Rcpp::_["num_mutations"] = tsk_treeseq_get_num_mutations(ts_xptr), Rcpp::_["sequence_length"] = tsk_treeseq_get_sequence_length(ts_xptr), + Rcpp::_["discrete_genome"] = tsk_treeseq_get_discrete_genome(ts_xptr), + Rcpp::_["has_reference_sequence"] = + tsk_treeseq_has_reference_sequence(ts_xptr), Rcpp::_["time_units"] = ts_ptr_time_units(ts), + Rcpp::_["discrete_time"] = tsk_treeseq_get_discrete_time(ts_xptr), Rcpp::_["min_time"] = ts_ptr_min_time(ts), - Rcpp::_["max_time"] = ts_ptr_max_time(ts)); -} - -// @title Summary of properties and number of records in a table collection -// @param tc an external pointer to table collection as a -// \code{tsk_table_collection_t} object. -// @return A named list with the number/value for all items. -// @examples -// ts_file <- system.file("examples/test.trees", package = "RcppTskit") -// tc_ptr <- RcppTskit:::tc_ptr_load(ts_file) -// RcppTskit:::tc_ptr_summary(tc_ptr) -// [[Rcpp::export]] -Rcpp::List tc_ptr_summary(const SEXP tc) { - RcppTskit_table_collection_xptr tc_xptr(tc); - const tsk_table_collection_t *tables = tc_xptr; - std::string time_units; - if (tables->time_units_length > 0 && tables->time_units != NULL) { - time_units.assign(tables->time_units, - tables->time_units + tables->time_units_length); - } - return Rcpp::List::create( - Rcpp::_["num_provenances"] = tables->provenances.num_rows, - Rcpp::_["num_populations"] = tables->populations.num_rows, - Rcpp::_["num_migrations"] = tables->migrations.num_rows, - Rcpp::_["num_individuals"] = tables->individuals.num_rows, - Rcpp::_["num_nodes"] = tables->nodes.num_rows, - Rcpp::_["num_edges"] = tables->edges.num_rows, - Rcpp::_["num_sites"] = tables->sites.num_rows, - Rcpp::_["num_mutations"] = tables->mutations.num_rows, - Rcpp::_["sequence_length"] = tables->sequence_length, - Rcpp::_["time_units"] = time_units); + Rcpp::_["max_time"] = ts_ptr_max_time(ts), + Rcpp::_["file_uuid"] = ts_ptr_file_uuid(ts)); } // @title Get the length of metadata in a tree sequence and its tables @@ -566,6 +609,7 @@ Rcpp::List ts_ptr_metadata_length(const SEXP ts) { // tsk_treeseq_get_metadata_length() returns self->tables->metadata_length // Rcpp::_["ts"] = // static_cast(tsk_treeseq_get_metadata_length(ts_xptr)), + // hence we just use that here Rcpp::_["ts"] = static_cast(tables->metadata_length), Rcpp::_["populations"] = static_cast(tables->populations.metadata_length), @@ -580,6 +624,161 @@ Rcpp::List ts_ptr_metadata_length(const SEXP ts) { static_cast(tables->mutations.metadata_length)); } +// # nocov start +// TODO: Metadata notes if we do anything with metadata #36 +// https://github.com/HighlanderLab/RcppTskit/issues/36 +// This is how we would get metadata, but it will be raw bytes, +// so would have to work with schema and codes ... +// ts_file <- system.file("examples/test.trees", package = "RcppTskit") +// ts_ptr <- RcppTskit:::ts_ptr_load(ts_file) +// RcppTskit:::ts_ptr_metadata(ts_ptr) +// slendr::ts_metadata(slim_ts) +Rcpp::String ts_ptr_metadata(const SEXP ts) { + RcppTskit_treeseq_xptr ts_xptr(ts); + const char *p = tsk_treeseq_get_metadata(ts_xptr); + tsk_size_t n = tsk_treeseq_get_metadata_length(ts_xptr); + std::string metadata; + if (n > 0 && p != NULL) { + metadata.assign(p, p + n); + } + return Rcpp::String(metadata); +} +// # nocov end + +// TODO: Metadata notes if we do anything with metadata #36 +// https://github.com/HighlanderLab/RcppTskit/issues/36 +// int ts_ptr_metadata_schema_length(const SEXP ts) { +// RcppTskit_treeseq_xptr ts_xptr(ts); +// return static_cast(tsk_treeseq_get_metadata_schema_length(ts_xptr)); +// } +// TODO: test the above function +// TODO: document the above function +// TODO: expose the above function to R, including TreeSequence method + +// TODO: Develop tsk_treeseq_get_metadata +// TODO: Metadata notes if we do anything with metadata #36 +// https://github.com/HighlanderLab/RcppTskit/issues/36 + +// See tsk_treeseq_t inst/include/tskit/tskit/tables.h on which elements +// are there in a tsk_table_collection_t type. +// Here is a copy with comments on what we have implemented in RcppTskit: +// * double sequence_length; SCALAR, IMPLEMENTED HERE +// * char *file_uuid; "SCALAR", IMPLEMENTED HERE +// * char *time_units; "SCALAR", IMPLEMENTED HERE +// * tsk_size_t time_units_length; IMPLEMENTED HERE (as part of the above) +// TODO: Metadata notes if we do anything with metadata #36 +// https://github.com/HighlanderLab/RcppTskit/issues/36 +// * char *metadata; "SCALAR", TODO --> tc_ptr_metadata +// * tsk_size_t metadata_length; SCALAR, TODO as part of the above +// TODO: Metadata notes if we do anything with metadata #36 +// https://github.com/HighlanderLab/RcppTskit/issues/36 +// * char *metadata_schema; "SCALAR", TODO --> tc_ptr_metadata_schema +// * tsk_size_t metadata_schema_length; SCALAR, TODO as part of the above +// * tsk_reference_sequence_t reference_sequence; TODO? +// * tsk_individual_table_t individuals; TABLE, SKIP OR TODO LATER #49 +// * tsk_node_table_t nodes; TABLE, SKIP OR TODO LATER #49 +// * tsk_edge_table_t edges; TABLE, SKIP OR TODO LATER #49 +// * tsk_migration_table_t migrations; TABLE, SKIP OR TODO LATER #49 +// * tsk_site_table_t sites; TABLE, SKIP OR TODO LATER #49 +// * tsk_mutation_table_t mutations; TABLE, SKIP OR TODO LATER #49 +// * tsk_population_table_t populations; TABLE, SKIP OR TODO LATER #49 +// * tsk_provenance_table_t provenances; TABLE, SKIP OR TODO LATER #49 +// * struct { +// tsk_id_t *edge_insertion_order; +// tsk_id_t *edge_removal_order; +// tsk_size_t num_edges; +// *} indexes; SKIPPED (for now) +// +// Here is the Python summary +// https://tskit.dev/tskit/docs/stable/python-api.html#sec-tables-api-table-collection +// https://tskit.dev/tskit/docs/stable/python-api.html#tskit.TableCollection + +// @describeIn tc_ptr_summary Get the sequence length +// [[Rcpp::export]] +double tc_ptr_sequence_length(const SEXP tc) { + RcppTskit_table_collection_xptr tc_xptr(tc); + return tc_xptr->sequence_length; +} + +// @describeIn tc_ptr_summary Does the table collection hold a reference genome +// sequence +// @details Note that tsk_table_collection_has_reference_sequence is +// undocumented method in the tskit C API (see tables.h), +// but documented in tskit Python API +// https://tskit.dev/tskit/docs/stable/python-api.html#tskit.TableCollection.has_reference_sequence +// [[Rcpp::export]] +bool tc_ptr_has_reference_sequence(const SEXP tc) { + RcppTskit_table_collection_xptr tc_xptr(tc); + return tsk_table_collection_has_reference_sequence(tc_xptr); +} + +// @describeIn tc_ptr_summary Get the time units string +// [[Rcpp::export]] +Rcpp::String tc_ptr_time_units(const SEXP tc) { + RcppTskit_table_collection_xptr tc_xptr(tc); + const char *p = tc_xptr->time_units; + tsk_size_t n = tc_xptr->time_units_length; + std::string time_units; + if (n > 0 && p != NULL) { + time_units.assign(p, p + n); + } + return Rcpp::String(time_units); +} + +// @describeIn tc_ptr_summary Get the file uuid string +// [[Rcpp::export]] +Rcpp::String tc_ptr_file_uuid(const SEXP tc) { + RcppTskit_table_collection_xptr tc_xptr(tc); + if (tc_xptr->file_uuid == NULL || tc_xptr->file_uuid[0] == '\0') { + return Rcpp::String(NA_STRING); + } + return Rcpp::String(tc_xptr->file_uuid); +} + +// @describeIn tc_ptr_summary Is the table collection indexed +// [[Rcpp::export]] +bool tc_ptr_has_index(const SEXP tc) { + RcppTskit_table_collection_xptr tc_xptr(tc); + return tsk_table_collection_has_index(tc_xptr, 0); +} + +// @title Summary of properties and number of records in a table collection +// @param tc an external pointer to table collection as a +// \code{tsk_table_collection_t} object. +// @details These functions return the summary of properties and number of +// records in a table collection, by accessing its elements and/or calling +// \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_table_collection_has_index} +// @return \code{tc_ptr_summary} returns a named list with numbers and values, +// while functions \code{tc_ptr_*} return the number or value for each item. +// @examples +// ts_file <- system.file("examples/test.trees", package = "RcppTskit") +// tc_ptr <- RcppTskit:::tc_ptr_load(ts_file) +// RcppTskit:::tc_ptr_summary(tc_ptr) +// RcppTskit:::tc_ptr_sequence_length(tc_ptr) +// RcppTskit:::tc_ptr_has_reference_sequence(tc_ptr) +// RcppTskit:::tc_ptr_time_units(tc_ptr) +// RcppTskit:::tc_ptr_file_uuid(tc_ptr) +// RcppTskit:::tc_ptr_has_index(tc_ptr) +// [[Rcpp::export]] +Rcpp::List tc_ptr_summary(const SEXP tc) { + RcppTskit_table_collection_xptr tc_xptr(tc); + const tsk_table_collection_t *tables = tc_xptr; + return Rcpp::List::create( + Rcpp::_["num_provenances"] = tables->provenances.num_rows, + Rcpp::_["num_populations"] = tables->populations.num_rows, + Rcpp::_["num_migrations"] = tables->migrations.num_rows, + Rcpp::_["num_individuals"] = tables->individuals.num_rows, + Rcpp::_["num_nodes"] = tables->nodes.num_rows, + Rcpp::_["num_edges"] = tables->edges.num_rows, + Rcpp::_["num_sites"] = tables->sites.num_rows, + Rcpp::_["num_mutations"] = tables->mutations.num_rows, + Rcpp::_["sequence_length"] = tables->sequence_length, + Rcpp::_["has_reference_sequence"] = tc_ptr_has_reference_sequence(tc), + Rcpp::_["time_units"] = tc_ptr_time_units(tc), + Rcpp::_["file_uuid"] = tc_ptr_file_uuid(tc), + Rcpp::_["has_index"] = tc_ptr_has_index(tc)); +} + // @title Get the length of metadata in a table collection and its tables // @param tc an external pointer to table collection as a // \code{tsk_table_collection_t} object. @@ -606,22 +805,16 @@ Rcpp::List tc_ptr_metadata_length(const SEXP tc) { static_cast(tc_xptr->mutations.metadata_length)); } -// # nocov start -// This is how we would get metadata, but it will be raw bytes, -// so would have to work with schema and codes ... but see -// https://github.com/HighlanderLab/RcppTskit/issues/36 -// ts_file <- system.file("examples/test.trees", package = "RcppTskit") -// ts_ptr <- RcppTskit:::ts_ptr_load(ts_file) -// RcppTskit:::ts_ptr_metadata(ts_ptr) -// slendr::ts_metadata(slim_ts) -Rcpp::String ts_ptr_metadata(const SEXP ts) { - RcppTskit_treeseq_xptr ts_xptr(ts); - const char *p = tsk_treeseq_get_metadata(ts_xptr); - tsk_size_t n = tsk_treeseq_get_metadata_length(ts_xptr); - std::string metadata; - if (n > 0 && p != NULL) { - metadata.assign(p, p + n); - } - return Rcpp::String(metadata); -} -// # nocov end +// TODO: Metadata notes if we do anything with metadata #36 +// https://github.com/HighlanderLab/RcppTskit/issues/36 +// int tc_ptr_metadata_schema_length(const SEXP tc) { +// RcppTskit_table_collection_xptr tc_xptr(tc); +// return static_cast(tc_xptr->metadata_schema_length); +// } +// TODO: test the above function +// TODO: document the above function +// TODO: expose the above function to R, including TableCollection method + +// TODO: Develop tc_ptr_metadata_schema +// TODO: Metadata notes if we do anything with metadata #36 +// https://github.com/HighlanderLab/RcppTskit/issues/36 diff --git a/RcppTskit/tests/testthat/test_TableCollection.R b/RcppTskit/tests/testthat/test_TableCollection.R index eebabd3..80aaeb5 100644 --- a/RcppTskit/tests/testthat/test_TableCollection.R +++ b/RcppTskit/tests/testthat/test_TableCollection.R @@ -43,6 +43,7 @@ test_that("TableCollection$new() works", { test_that("TableCollection and TreeSequence round-trip works", { ts_file <- system.file("examples/test.trees", package = "RcppTskit") + test_trees_file_uuid <- "79ec383f-a57d-b44f-2a5c-f0feecbbcb32" ts_ptr <- ts_ptr_load(ts_file) # ---- Integer bitmask of tskit flags ---- @@ -75,10 +76,13 @@ test_that("TableCollection and TreeSequence round-trip works", { tc = data.frame( property = c( "sequence_length", + "has_reference_sequence", "time_units", - "has_metadata" + "has_metadata", + "file_uuid", + "has_index" ), - value = c(100, "generations", FALSE) + value = c(100, FALSE, "generations", FALSE, NA_character_, TRUE) ), tables = data.frame( table = c( @@ -118,7 +122,12 @@ test_that("TableCollection and TreeSequence round-trip works", { "externalptr" )) ts_ptr2 <- tc_ptr_to_ts_ptr(tc_ptr) - expect_equal(ts_ptr_print(ts_ptr), ts_ptr_print(ts_ptr2)) + p_ts_ptr <- ts_ptr_print(ts_ptr) + p_ts_ptr2 <- ts_ptr_print(ts_ptr2) + i_file_uuid <- p_ts_ptr$ts$property == "file_uuid" + p_ts_ptr$ts$value[i_file_uuid] <- NA_character_ + p_ts_ptr2$ts$value[p_ts_ptr2$ts$property == "file_uuid"] <- NA_character_ + expect_equal(p_ts_ptr, p_ts_ptr2) # ---- ts --> tc --> ts ---- @@ -139,10 +148,13 @@ test_that("TableCollection and TreeSequence round-trip works", { tc = data.frame( property = c( "sequence_length", + "has_reference_sequence", "time_units", - "has_metadata" + "has_metadata", + "file_uuid", + "has_index" ), - value = c(100, "generations", FALSE) + value = c(100, FALSE, "generations", FALSE, NA_character_, TRUE) ), tables = data.frame( table = c( @@ -182,6 +194,9 @@ test_that("TableCollection and TreeSequence round-trip works", { tmp <- capture.output(ts_print <- ts$print()) # jarl-ignore implicit_assignment: it's just a test tmp <- capture.output(ts2_print <- ts2$print()) + i_file_uuid <- ts_print$ts$property == "file_uuid" + ts_print$ts$value[i_file_uuid] <- NA_character_ + ts2_print$ts$value[ts2_print$ts$property == "file_uuid"] <- NA_character_ expect_equal(ts_print, ts2_print) # Edge cases diff --git a/RcppTskit/tests/testthat/test_load_summary_and_dump.R b/RcppTskit/tests/testthat/test_load_summary_and_dump.R index 320a3be..d7e6e27 100644 --- a/RcppTskit/tests/testthat/test_load_summary_and_dump.R +++ b/RcppTskit/tests/testthat/test_load_summary_and_dump.R @@ -1,4 +1,9 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { + # UUIDs of packaged example fixtures generated by inst/examples/create_test.trees.py + test_trees_file_uuid <- "79ec383f-a57d-b44f-2a5c-f0feecbbcb32" + test2_trees_file_uuid <- "cf406b8c-be33-af4a-c00b-a4de1e6151ff" + test_with_ref_seq_file_uuid <- "71793465-49ed-e0f3-0657-c01926226b29" + # ---- ts_load() ---- expect_error(ts_ptr_load()) @@ -143,9 +148,13 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { "num_sites" = 25L, "num_mutations" = 30L, "sequence_length" = 100.0, + "discrete_genome" = TRUE, + "has_reference_sequence" = FALSE, "time_units" = "generations", + "discrete_time" = FALSE, "min_time" = 0, - "max_time" = 6.9619933371908083 + "max_time" = 6.9619933371908083, + "file_uuid" = test_trees_file_uuid ) ) @@ -226,6 +235,39 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { expect_equal(n_ptr, 100) expect_equal(ts$sequence_length(), 100) + expect_error(ts_ptr_discrete_genome()) + expect_error(ts_ptr_discrete_genome(ts)) + l_ptr <- ts_ptr_discrete_genome(ts_ptr) + expect_true(is.logical(l_ptr)) + expect_true(l_ptr) + expect_true(ts$discrete_genome()) + + ts_non_discrete_file <- system.file( + "examples/test_non_discrete_genome.trees", + package = "RcppTskit" + ) + ts_non_discrete_ptr <- ts_ptr_load(ts_non_discrete_file) + ts_non_discrete <- ts_load(ts_non_discrete_file) + expect_false(ts_ptr_discrete_genome(ts_non_discrete_ptr)) + expect_false(ts_non_discrete$discrete_genome()) + + expect_error(ts_ptr_has_reference_sequence()) + expect_error(ts_ptr_has_reference_sequence(ts)) + l_ptr <- ts_ptr_has_reference_sequence(ts_ptr) + expect_true(is.logical(l_ptr)) + expect_false(l_ptr) + expect_false(ts$has_reference_sequence()) + + ts_with_ref_seq_ptr <- ts_ptr_load(ts_with_ref_seq_file) + ts_with_ref_seq <- ts_load(ts_with_ref_seq_file) + expect_true(ts_ptr_has_reference_sequence(ts_with_ref_seq_ptr)) + expect_true(ts_with_ref_seq$has_reference_sequence()) + + f_ptr <- ts_ptr_sequence_length(ts_non_discrete_ptr) + expect_true(is.numeric(f_ptr)) + expect_equal(f_ptr, 10.5) + expect_equal(ts_non_discrete$sequence_length(), 10.5) + expect_error(ts_ptr_time_units()) expect_error(ts_ptr_time_units(ts)) c_ptr <- ts_ptr_time_units(ts_ptr) @@ -233,6 +275,22 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { expect_equal(c_ptr, "generations") expect_equal(ts$time_units(), "generations") + expect_error(ts_ptr_discrete_time()) + expect_error(ts_ptr_discrete_time(ts)) + l_ptr <- ts_ptr_discrete_time(ts_ptr) + expect_true(is.logical(l_ptr)) + expect_false(l_ptr) + expect_false(ts$discrete_time()) + + ts_discrete_time_file <- system.file( + "examples/test_discrete_time.trees", + package = "RcppTskit" + ) + ts_discrete_time_ptr <- ts_ptr_load(ts_discrete_time_file) + ts_discrete_time <- ts_load(ts_discrete_time_file) + expect_true(ts_ptr_discrete_time(ts_discrete_time_ptr)) + expect_true(ts_discrete_time$discrete_time()) + expect_error(ts_ptr_min_time()) expect_error(ts_ptr_min_time(ts)) d_ptr <- ts_ptr_min_time(ts_ptr) @@ -247,6 +305,24 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { expect_equal(d_ptr, 6.9619933371908083) expect_equal(ts$max_time(), 6.9619933371908083) + expect_error(ts_ptr_file_uuid()) + expect_error(ts_ptr_file_uuid(ts)) + c_ptr <- ts_ptr_file_uuid(ts_ptr) + expect_true(is.character(c_ptr)) + expect_equal(length(c_ptr), 1L) + expect_equal(c_ptr, test_trees_file_uuid) + expect_equal(c_ptr, ts$file_uuid()) + expect_equal( + ts_ptr_file_uuid(ts_with_ref_seq_ptr), + test_with_ref_seq_file_uuid + ) + expect_equal(ts_with_ref_seq$file_uuid(), test_with_ref_seq_file_uuid) + expect_false(identical(c_ptr, ts_ptr_file_uuid(ts_with_ref_seq_ptr))) + + ts_from_tc <- ts$dump_tables()$tree_sequence() + expect_true(is.na(ts_from_tc$file_uuid())) + expect_true(is.na(ts_ptr_file_uuid(ts_from_tc$pointer))) + # ---- tc_ptr_summary() ---- # Simple comparison of summaries @@ -263,9 +339,61 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { "num_sites", "num_mutations", "sequence_length", - "time_units" + "has_reference_sequence", + "time_units", + "file_uuid" + ) + expect_equal(n_ptr_tc[shared_items], n_ptr_ts[shared_items]) + + expect_error(tc_ptr_sequence_length()) + expect_error(tc_ptr_sequence_length(ts)) + n_ptr <- tc_ptr_sequence_length(tc_ptr) + expect_true(is.numeric(n_ptr)) + expect_equal(n_ptr, 100) + expect_equal(tc$sequence_length(), 100) + + tc_non_discrete_file <- system.file( + "examples/test_non_discrete_genome.trees", + package = "RcppTskit" ) - expect_equal(n_ptr_tc, n_ptr_ts[shared_items]) + tc_non_discrete_ptr <- tc_ptr_load(tc_non_discrete_file) + tc_non_discrete <- tc_load(tc_non_discrete_file) + f_ptr <- tc_ptr_sequence_length(tc_non_discrete_ptr) + expect_true(is.numeric(f_ptr)) + expect_equal(f_ptr, 10.5) + expect_equal(tc_non_discrete$sequence_length(), 10.5) + expect_error(tc_ptr_has_reference_sequence()) + expect_error(tc_ptr_has_reference_sequence(ts)) + l_ptr <- tc_ptr_has_reference_sequence(tc_ptr) + expect_true(is.logical(l_ptr)) + expect_false(l_ptr) + expect_false(tc$has_reference_sequence()) + + expect_error(tc_ptr_time_units()) + expect_error(tc_ptr_time_units(ts)) + c_ptr <- tc_ptr_time_units(tc_ptr) + expect_true(is.character(c_ptr)) + expect_equal(c_ptr, "generations") + expect_equal(tc$time_units(), "generations") + + expect_error(tc_ptr_file_uuid()) + expect_error(tc_ptr_file_uuid(ts)) + c_ptr <- tc_ptr_file_uuid(tc_ptr) + expect_true(is.character(c_ptr)) + expect_equal(length(c_ptr), 1L) + expect_equal(c_ptr, test_trees_file_uuid) + expect_equal(c_ptr, tc$file_uuid()) + + tc_from_ts <- ts$dump_tables() + expect_true(is.na(tc_from_ts$file_uuid())) + expect_true(is.na(tc_ptr_file_uuid(tc_from_ts$pointer))) + + expect_error(tc_ptr_has_index()) + expect_error(tc_ptr_has_index(ts)) + l_ptr <- tc_ptr_has_index(tc_ptr) + expect_true(is.logical(l_ptr)) + expect_true(l_ptr) + expect_true(tc$has_index()) # ---- ts_ptr_print() and ts$print() ---- @@ -283,14 +411,30 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { ts = data.frame( property = c( "num_samples", - "sequence_length", "num_trees", + "sequence_length", + "discrete_genome", + "has_reference_sequence", "time_units", + "discrete_time", "min_time", "max_time", - "has_metadata" + "has_metadata", + "file_uuid" ), - value = c(16, 100, 9, "generations", 0.0, 6.9619933371908083, FALSE) + value = c( + 16, + 9, + 100, + TRUE, + FALSE, + "generations", + FALSE, + 0.0, + 6.9619933371908083, + FALSE, + test_trees_file_uuid + ) ), tables = data.frame( table = c( @@ -335,10 +479,20 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { tc = data.frame( property = c( "sequence_length", + "has_reference_sequence", "time_units", - "has_metadata" + "has_metadata", + "file_uuid", + "has_index" ), - value = c(100, "generations", FALSE) + value = c( + 100, + FALSE, + "generations", + FALSE, + test_trees_file_uuid, + TRUE + ) ), tables = data.frame( table = c( @@ -409,9 +563,16 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { "num_sites" = 25L, "num_mutations" = 30L, "sequence_length" = 100.0, + "discrete_genome" = TRUE, + "has_reference_sequence" = FALSE, "time_units" = "generations", + "discrete_time" = FALSE, "min_time" = 0.0, - "max_time" = 6.9619933371908083 + "max_time" = 6.9619933371908083, + "file_uuid" = ts_ptr_file_uuid(ts_ptr) + # using ts_ptr_file_uuid() here since I don't have UUID + # on this file from Python code due to disk serialisation + # on our end (we test for correctness elsewhere anyway) ) ) @@ -455,9 +616,11 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { "num_sites", "num_mutations", "sequence_length", - "time_units" + "has_reference_sequence", + "time_units", + "file_uuid" ) - expect_equal(n_ts[shared_items], n_tc) + expect_equal(n_ts[shared_items], n_tc[shared_items]) # ---- ts$dump() ---- @@ -495,9 +658,16 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { "num_sites" = 25L, "num_mutations" = 30L, "sequence_length" = 100.0, + "discrete_genome" = TRUE, + "has_reference_sequence" = FALSE, "time_units" = "generations", + "discrete_time" = FALSE, "min_time" = 0.0, - "max_time" = 6.9619933371908083 + "max_time" = 6.9619933371908083, + "file_uuid" = ts_ptr_file_uuid(ts$pointer) + # using ts_ptr_file_uuid() here since I don't have UUID + # on this file from Python code due to disk serialisation + # on our end (we test for correctness elsewhere anyway) ) ) @@ -535,7 +705,13 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { "num_sites" = 25L, "num_mutations" = 30L, "sequence_length" = 100.0, - "time_units" = "generations" + "has_reference_sequence" = FALSE, + "time_units" = "generations", + "file_uuid" = tc_ptr_file_uuid(tc$pointer), + "has_index" = TRUE + # using ts_ptr_file_uuid() here since I don't have UUID + # on this file from Python code due to disk serialisation + # on our end (we test for correctness elsewhere anyway) ) ) @@ -580,9 +756,13 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { "num_sites" = 25L, "num_mutations" = 30L, "sequence_length" = 100.0, + "discrete_genome" = TRUE, + "has_reference_sequence" = FALSE, "time_units" = "generations", + "discrete_time" = FALSE, "min_time" = 0, - "max_time" = 6.9619933371908083 + "max_time" = 6.9619933371908083, + "file_uuid" = test2_trees_file_uuid ) ) @@ -613,14 +793,30 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { ts = data.frame( property = c( "num_samples", - "sequence_length", "num_trees", + "sequence_length", + "discrete_genome", + "has_reference_sequence", "time_units", + "discrete_time", "min_time", "max_time", - "has_metadata" + "has_metadata", + "file_uuid" ), - value = c(16, 100, 9, "generations", 0.0, 6.9619933371908083, TRUE) + value = c( + 16, + 9, + 100, + TRUE, + FALSE, + "generations", + FALSE, + 0.0, + 6.9619933371908083, + TRUE, + test2_trees_file_uuid + ) ), tables = data.frame( table = c( @@ -677,9 +873,11 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { "num_sites", "num_mutations", "sequence_length", - "time_units" + "has_reference_sequence", + "time_units", + "file_uuid" ) - expect_equal(n_ts[shared_items], n_tc) + expect_equal(n_ptr_ts[shared_items], n_ptr_tc[shared_items]) m_ptr_tc <- tc_ptr_metadata_length(tc_ptr) m_ptr_ts <- ts_ptr_metadata_length(ts_ptr) @@ -695,10 +893,20 @@ test_that("ts/tc_load(), ts/tc_summary*(), and ts/tc_dump(x) work", { tc = data.frame( property = c( "sequence_length", + "has_reference_sequence", "time_units", - "has_metadata" + "has_metadata", + "file_uuid", + "has_index" ), - value = c(100, "generations", TRUE) + value = c( + 100, + FALSE, + "generations", + TRUE, + test2_trees_file_uuid, + TRUE + ) ), tables = data.frame( table = c( diff --git a/RcppTskit/tests/testthat/test_r_to_py_and_py_to_r.R b/RcppTskit/tests/testthat/test_r_to_py_and_py_to_r.R index 7af9762..b6bd4ee 100644 --- a/RcppTskit/tests/testthat/test_r_to_py_and_py_to_r.R +++ b/RcppTskit/tests/testthat/test_r_to_py_and_py_to_r.R @@ -146,6 +146,10 @@ test_that("ts_r_to_py() and ts_py_to_r() work", { tmp <- capture.output(ts2_r_print <- ts2_r$print()) # jarl-ignore implicit_assignment: it's just a test tmp <- capture.output(ts_ptr2_r_print <- ts_ptr_print(ts_ptr2_r)) + sel <- ts2_r_print$ts$property == "file_uuid" + ts2_r_print$ts$value[sel] <- NA_character_ + sel <- ts_ptr2_r_print$ts$property == "file_uuid" + ts_ptr2_r_print$ts$value[sel] <- NA_character_ expect_equal(ts2_r_print, ts_ptr2_r_print) }) @@ -270,5 +274,9 @@ test_that("tc_r_to_py() and tc_py_to_r() work", { tmp <- capture.output(tc2_r_print <- tc2_r$print()) # jarl-ignore implicit_assignment: it's just a test tmp <- capture.output(tc_ptr2_r <- tc_ptr_print(tc_ptr2_r)) + sel <- tc2_r_print$tc$property == "file_uuid" + tc2_r_print$tc$value[sel] <- NA_character_ + sel <- tc_ptr2_r$tc$property == "file_uuid" + tc_ptr2_r$tc$value[sel] <- NA_character_ expect_equal(tc2_r_print, tc_ptr2_r) })