Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ These notes apply to this repository root and the `RcppTskit/` package.
* We run R CMD check for every code change.
* We keep local quality gates green before handoff.
* We update `RcppTskit/NEWS.md` for user-visible behavior or API changes.
* We aim for a comparative tskit Python API and tskit R API and
similarly for tskit C API and tskit C++ API
(the later is RcppTskit C++ binding to tskit C API).

## Permission

Expand Down
2 changes: 1 addition & 1 deletion RcppTskit/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Authors@R: c(
Description: 'Tskit' enables efficient storage, manipulation, and analysis
of ancestral recombination graphs (ARGs) using succinct tree sequence
encoding. The tree sequence encoding of an ARG is described in Wong et
al. (2024) <doi:10.1093/genetics/iyae100>, while `tskit` project is
al. (2024) <doi:10.1093/genetics/iyae100>, while 'tskit' project is
described in Jeffrey et al. (2026) <doi:10.48550/arXiv.2602.09649>.
See also <https://tskit.dev> for project news, documentation, and
tutorials. 'Tskit' provides 'Python', 'C', and 'Rust' application
Expand Down
21 changes: 21 additions & 0 deletions RcppTskit/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,27 @@ All notable changes to RcppTskit are documented in this file.
The file format is based on [Keep a Changelog](https://keepachangelog.com),
and releases adhere to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added (new features)

- Added the following scalar getters to match tskit C/Python API
- `TreeSequence$discrete_genome()` to query whether genome coordinates
are discrete integer values.
- `TreeSequence$has_reference_sequence()` to query whether a tree sequence
contains a reference genome sequence.
- `TreeSequence$discrete_time()` to query whether time values are discrete
integer values.
- `TreeSequence$file_uuid()` to query the UUID of the source `.trees` file.
- `TableCollection$has_reference_sequence()` to query whether a table
collection contains a reference genome sequence.
- `TableCollection$file_uuid()` to query the UUID of the source `.trees`
file.
- `TableCollection$sequence_length()` to query the sequence length.
- `TableCollection$time_units()` to query the time units.
- `TableCollection$has_index()` to query whether edge indexes are present.
- TODO

## [0.2.0] - 2026-02-22

### Added (new features)
Expand Down
55 changes: 53 additions & 2 deletions RcppTskit/R/Class-TableCollection.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ TableCollection <- R6Class(
#' @param file a string specifying the full path of the tree sequence file.
#' @param skip_tables logical; if \code{TRUE}, load only non-table information.
#' @param skip_reference_sequence logical; if \code{TRUE}, skip loading
#' reference sequence information.
#' reference genome sequence information.
#' @param pointer an external pointer (\code{externalptr}) to a table collection.
#' @details See the corresponding Python function at
#' \url{https://github.com/tskit-dev/tskit/blob/dc394d72d121c99c6dcad88f7a4873880924dd72/python/tskit/tables.py#L3463}.
Expand Down Expand Up @@ -88,12 +88,63 @@ TableCollection <- R6Class(
tree_sequence = function() {
# See https://tskit.dev/tskit/docs/stable/c-api.html#c.TSK_TS_INIT_BUILD_INDEXES
# TSK_TS_INIT_BUILD_INDEXES (1 << 0) is bitwShiftL(1L, 0) or just 1L
# TODO: Should we also use https://tskit.dev/tskit/docs/stable/c-api.html#c.TSK_TS_INIT_COMPUTE_MUTATION_PARENTS?
# TODO: Should we also use TSK_TS_INIT_COMPUTE_MUTATION_PARENTS in TableCollection$tree_sequence()? #65
# https://github.com/HighlanderLab/RcppTskit/issues/65
init_options <- bitwShiftL(1L, 0)
ts_ptr <- tc_ptr_to_ts_ptr(self$pointer, options = init_options)
TreeSequence$new(pointer = ts_ptr)
},

#' @description Get the sequence length.
#' @examples
#' tc_file <- system.file("examples/test.trees", package = "RcppTskit")
#' tc <- tc_load(tc_file)
#' tc$sequence_length()
sequence_length = function() {
tc_ptr_sequence_length(self$pointer)
},

#' @description Get the time units string.
#' @examples
#' tc_file <- system.file("examples/test.trees", package = "RcppTskit")
#' tc <- tc_load(tc_file)
#' tc$time_units()
time_units = function() {
tc_ptr_time_units(self$pointer)
},

#' @description Get whether the table collection has edge indexes.
#' @examples
#' tc_file <- system.file("examples/test.trees", package = "RcppTskit")
#' tc <- tc_load(tc_file)
#' tc$has_index()
has_index = function() {
tc_ptr_has_index(self$pointer)
},

#' @description Get whether the table collection has a reference genome sequence.
#' @examples
#' tc_file1 <- system.file("examples/test.trees", package = "RcppTskit")
#' tc_file2 <- system.file("examples/test_with_ref_seq.trees", package = "RcppTskit")
#' tc1 <- tc_load(tc_file1)
#' tc1$has_reference_sequence()
#' tc2 <- tc_load(tc_file2)
#' tc2$has_reference_sequence()
has_reference_sequence = function() {
tc_ptr_has_reference_sequence(self$pointer)
},

#' @description Get the file UUID string.
#' @details Returns the UUID of the file the table collection was loaded from.
#' If unavailable, returns \code{NA_character_}.
#' @examples
#' tc_file <- system.file("examples/test.trees", package = "RcppTskit")
#' tc <- tc_load(tc_file)
#' tc$file_uuid()
file_uuid = function() {
tc_ptr_file_uuid(self$pointer)
},

#' @description This function saves a table collection from R to disk and
#' loads it into reticulate Python for use with the \code{tskit} Python API.
#' @param tskit_module reticulate Python module of \code{tskit}. By default,
Expand Down
53 changes: 52 additions & 1 deletion RcppTskit/R/Class-TreeSequence.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ TreeSequence <- R6Class(
#' @param file a string specifying the full path of the tree sequence file.
#' @param skip_tables logical; if \code{TRUE}, load only non-table information.
#' @param skip_reference_sequence logical; if \code{TRUE}, skip loading
#' reference sequence information.
#' reference genome sequence information.
#' @param pointer an external pointer (\code{externalptr}) to a tree sequence.
#' @details See the corresponding Python function at
#' \url{https://tskit.dev/tskit/docs/latest/python-api.html#tskit.load}.
Expand Down Expand Up @@ -252,6 +252,32 @@ TreeSequence <- R6Class(
ts_ptr_sequence_length(self$pointer)
},

#' @description Get the discrete genome status.
#' @details Returns \code{TRUE} if all genomic coordinates in the tree
#' sequence are discrete integer values.
#' @examples
#' ts_file1 <- system.file("examples/test.trees", package = "RcppTskit")
#' ts_file2 <- system.file("examples/test_non_discrete_genome.trees", package = "RcppTskit")
#' ts1 <- ts_load(ts_file1)
#' ts1$discrete_genome()
#' ts2 <- ts_load(ts_file2)
#' ts2$discrete_genome()
discrete_genome = function() {
ts_ptr_discrete_genome(self$pointer)
},

#' @description Get whether the tree sequence has a reference genome sequence.
#' @examples
#' ts_file1 <- system.file("examples/test.trees", package = "RcppTskit")
#' ts_file2 <- system.file("examples/test_with_ref_seq.trees", package = "RcppTskit")
#' ts1 <- ts_load(ts_file1)
#' ts1$has_reference_sequence()
#' ts2 <- ts_load(ts_file2)
#' ts2$has_reference_sequence()
has_reference_sequence = function() {
ts_ptr_has_reference_sequence(self$pointer)
},

#' @description Get the time units string.
#' @examples
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
Expand All @@ -261,6 +287,20 @@ TreeSequence <- R6Class(
ts_ptr_time_units(self$pointer)
},

#' @description Get the discrete time status.
#' @details Returns \code{TRUE} if all time values in the tree sequence are
#' discrete integer values.
#' @examples
#' ts_file1 <- system.file("examples/test.trees", package = "RcppTskit")
#' ts_file2 <- system.file("examples/test_discrete_time.trees", package = "RcppTskit")
#' ts1 <- ts_load(ts_file1)
#' ts1$discrete_time()
#' ts2 <- ts_load(ts_file2)
#' ts2$discrete_time()
discrete_time = function() {
ts_ptr_discrete_time(self$pointer)
},

#' @description Get the min time in node table and mutation table.
#' @examples
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
Expand All @@ -287,6 +327,17 @@ TreeSequence <- R6Class(
#' ts$metadata_length()
metadata_length = function() {
ts_ptr_metadata_length(self$pointer)
},

#' @description Get the file UUID string.
#' @details Returns the UUID of the file the tree sequence was loaded from.
#' If unavailable, returns \code{NA_character_}.
#' @examples
#' ts_file <- system.file("examples/test.trees", package = "RcppTskit")
#' ts <- ts_load(ts_file)
#' ts$file_uuid()
file_uuid = function() {
ts_ptr_file_uuid(self$pointer)
}
)
)
44 changes: 40 additions & 4 deletions RcppTskit/R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,22 @@ ts_ptr_sequence_length <- function(ts) {
.Call(`_RcppTskit_ts_ptr_sequence_length`, ts)
}

ts_ptr_discrete_genome <- function(ts) {
.Call(`_RcppTskit_ts_ptr_discrete_genome`, ts)
}

ts_ptr_has_reference_sequence <- function(ts) {
.Call(`_RcppTskit_ts_ptr_has_reference_sequence`, ts)
}

ts_ptr_time_units <- function(ts) {
.Call(`_RcppTskit_ts_ptr_time_units`, ts)
}

ts_ptr_discrete_time <- function(ts) {
.Call(`_RcppTskit_ts_ptr_discrete_time`, ts)
}

ts_ptr_min_time <- function(ts) {
.Call(`_RcppTskit_ts_ptr_min_time`, ts)
}
Expand All @@ -103,18 +115,42 @@ ts_ptr_max_time <- function(ts) {
.Call(`_RcppTskit_ts_ptr_max_time`, ts)
}

ts_ptr_summary <- function(ts) {
.Call(`_RcppTskit_ts_ptr_summary`, ts)
ts_ptr_file_uuid <- function(ts) {
.Call(`_RcppTskit_ts_ptr_file_uuid`, ts)
}

tc_ptr_summary <- function(tc) {
.Call(`_RcppTskit_tc_ptr_summary`, tc)
ts_ptr_summary <- function(ts) {
.Call(`_RcppTskit_ts_ptr_summary`, ts)
}

ts_ptr_metadata_length <- function(ts) {
.Call(`_RcppTskit_ts_ptr_metadata_length`, ts)
}

tc_ptr_sequence_length <- function(tc) {
.Call(`_RcppTskit_tc_ptr_sequence_length`, tc)
}

tc_ptr_has_reference_sequence <- function(tc) {
.Call(`_RcppTskit_tc_ptr_has_reference_sequence`, tc)
}

tc_ptr_time_units <- function(tc) {
.Call(`_RcppTskit_tc_ptr_time_units`, tc)
}

tc_ptr_file_uuid <- function(tc) {
.Call(`_RcppTskit_tc_ptr_file_uuid`, tc)
}

tc_ptr_has_index <- function(tc) {
.Call(`_RcppTskit_tc_ptr_has_index`, tc)
}

tc_ptr_summary <- function(tc) {
.Call(`_RcppTskit_tc_ptr_summary`, tc)
}

tc_ptr_metadata_length <- function(tc) {
.Call(`_RcppTskit_tc_ptr_metadata_length`, tc)
}
Expand Down
Loading
Loading