From cf45bdf7fd4b2a4c05375754ea37ad14510602bc Mon Sep 17 00:00:00 2001 From: sbudai Date: Mon, 30 Mar 2026 23:56:36 +0200 Subject: [PATCH] 1.0.0 >> 1.1.0 ## New func - 4 new vignettes (Generation Mix Analysis, Getting Started with entsoeapi, Time Series Restructuring Explained, Working with EIC Codes) have been introduced. - The `all_allocated_eic()` function has been exported. - The `get_news()` function has been introduced. - The ENTSO-E API is known for returning 503 (Service Unavailable) response when it's under heavy load or when a client sends too many requests. Hence we have built-in an automated backoff-and-retry mechanism. ## Changes - Typos fixed in the documentations. - The package `DESCRIPTION` document slightly adjusted. ## Misc - An orphaned, not exported function and its related unit tests have been removed. --- .github/CONTRIBUTING.md | 2 +- DESCRIPTION | 7 +- NAMESPACE | 10 + NEWS.md | 18 + R/constants.R | 1 + R/en_helpers.R | 134 ++++- R/en_market.R | 5 +- R/utils.R | 113 +--- README.Rmd | 2 +- README.md | 8 +- RELEASE_CHECKLIST.md | 8 +- man/accounting_point_eic.Rd | 2 +- man/all_allocated_eic.Rd | 36 ++ man/all_approved_eic.Rd | 2 +- man/area_eic.Rd | 2 +- man/congestion_income.Rd | 2 +- man/get_news.Rd | 30 ++ man/implicit_offered_transfer_capacities.Rd | 2 +- man/location_eic.Rd | 2 +- man/party_eic.Rd | 2 +- man/resource_object_eic.Rd | 2 +- man/substation_eic.Rd | 2 +- man/tie_line_eic.Rd | 2 +- .../fixtures/input_lists_for_grouping.rds | Bin 713 -> 0 bytes tests/testthat/fixtures/news_feed.xml | 26 + tests/testthat/test-en_helpers.R | 403 ++++++++++++++- tests/testthat/test-en_market.R | 6 +- tests/testthat/test-utils.R | 133 +---- vignettes/architecture.Rmd | 394 +++++++------- vignettes/da-price-spread-vignette.Rmd | 31 +- vignettes/generation-mix-analysis.Rmd | 419 +++++++++++++++ vignettes/getting-started.Rmd | 284 ++++++++++ vignettes/time-series-restructuring.Rmd | 445 ++++++++++++++++ vignettes/working-with-eic-codes.Rmd | 489 ++++++++++++++++++ 34 files changed, 2556 insertions(+), 468 deletions(-) create mode 100644 man/all_allocated_eic.Rd create mode 100644 man/get_news.Rd delete mode 100644 tests/testthat/fixtures/input_lists_for_grouping.rds create mode 100644 tests/testthat/fixtures/news_feed.xml create mode 100644 vignettes/generation-mix-analysis.Rmd create mode 100644 vignettes/getting-started.Rmd create mode 100644 vignettes/time-series-restructuring.Rmd create mode 100644 vignettes/working-with-eic-codes.Rmd diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c422a06d..bf9e97b5 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -40,7 +40,7 @@ Open a [feature request](https://github.com/krose/entsoeapi/issues/new?template= covr::package_coverage() ``` -6. Open the pull request against `master` and describe what changed and why. +6. Open the pull request against `main` and describe what changed and why. ### Conventions diff --git a/DESCRIPTION b/DESCRIPTION index 171a29fb..9ac3f076 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: entsoeapi Type: Package Title: Client for the 'ENTSO-E' Transparency Platform API -Version: 1.0.0 +Version: 1.1.0 Authors@R: c(person("Kenneth", "Rose", role=c("aut", "cph"), email="kennethrose82@gmail.com"), person("Sándor", "Budai", role=c("aut", "cre", "cph"), email="sbudai.ga@gmail.com")) Description: Provides a standardized R client for the 'ENTSO-E' (European Network of @@ -37,9 +37,12 @@ Suggests: knitr, rmarkdown, testthat (>= 3.0.0), + curl, tibble, tidyselect, - ggplot2 + ggplot2, + kableExtra, + scales VignetteBuilder: knitr Config/testthat/edition: 3 Collate: diff --git a/NAMESPACE b/NAMESPACE index 7f0260da..a312c8a3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(accounting_point_eic) export(activated_balancing_prices) export(aggregated_balancing_energy_bids) export(aggregated_bids) +export(all_allocated_eic) export(all_approved_eic) export(allocated_transfer_capacities_3rd_countries) export(allocation_of_cross_zonal_balancing_cap) @@ -40,6 +41,7 @@ export(gen_per_gen_unit) export(gen_per_prod_type) export(gen_storage_mean_filling_rate) export(gen_wind_solar_forecasts) +export(get_news) export(hvdc_link_constrains) export(imbalance_prices) export(imbalance_volumes) @@ -87,13 +89,16 @@ importFrom(checkmate,assert_integerish) importFrom(checkmate,assert_string) importFrom(cli,cli_abort) importFrom(cli,cli_alert) +importFrom(cli,cli_alert_danger) importFrom(cli,cli_alert_info) importFrom(cli,cli_alert_success) importFrom(cli,cli_alert_warning) importFrom(cli,cli_h1) +importFrom(cli,cli_h2) importFrom(cli,cli_progress_bar) importFrom(cli,cli_progress_done) importFrom(cli,cli_progress_update) +importFrom(cli,cli_text) importFrom(cli,cli_warn) importFrom(dplyr,across) importFrom(dplyr,all_of) @@ -116,6 +121,7 @@ importFrom(dplyr,rename) importFrom(dplyr,rename_with) importFrom(dplyr,select) importFrom(dplyr,ungroup) +importFrom(httr2,req_headers) importFrom(httr2,req_method) importFrom(httr2,req_perform) importFrom(httr2,req_progress) @@ -138,6 +144,7 @@ importFrom(lubridate,parse_date_time) importFrom(lubridate,year) importFrom(lubridate,ymd) importFrom(snakecase,to_snake_case) +importFrom(stats,runif) importFrom(stats,setNames) importFrom(stringr,str_c) importFrom(stringr,str_detect) @@ -157,9 +164,12 @@ importFrom(utils,read.table) importFrom(utils,unzip) importFrom(xml2,as_list) importFrom(xml2,as_xml_document) +importFrom(xml2,read_html) importFrom(xml2,read_xml) importFrom(xml2,xml_children) importFrom(xml2,xml_contents) +importFrom(xml2,xml_find_all) +importFrom(xml2,xml_find_first) importFrom(xml2,xml_length) importFrom(xml2,xml_name) importFrom(xml2,xml_text) diff --git a/NEWS.md b/NEWS.md index a198f73e..19c7fc7a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,21 @@ +# entsoeapi v1.1.0 (2026-03-31) + +## New functionality + +- 4 new vignettes (Generation Mix Analysis, Getting Started with entsoeapi, Time Series Restructuring Explained, Working with EIC Codes) have been introduced. +- The `all_allocated_eic()` function has been exported. +- The `get_news()` function has been introduced. +- The ENTSO-E API is known for returning 503 (Service Unavailable) response when it's under heavy load or when a client sends too many requests. Hence we have built-in an automated backoff-and-retry mechanism. + +## Changes + +- Typos fixed in the documentations. +- The package `DESCRIPTION` document slightly adjusted. + +## Miscellaneous + +- An orphaned, not exported function and its related unit tests have been removed. + # entsoeapi v1.0.0 (2026-03-25) ## New functionality diff --git a/R/constants.R b/R/constants.R index 203a903b..9a94a41f 100644 --- a/R/constants.R +++ b/R/constants.R @@ -7,6 +7,7 @@ .pd_domain <- "eepublicdownloads.blob.core.windows.net" .pd_alloc_eic <- "cio-lio/xml/allocated-eic-codes.xml" .pd_csv_eic <- "/cio-lio/csv/" +.feed_url <- "https://external-api.tp.entsoe.eu/news/feed" possible_eic_chars <- stats::setNames( object = 0L:36L, nm = c(as.character(0:9), LETTERS, "-") diff --git a/R/en_helpers.R b/R/en_helpers.R index 7b1ac77a..e6bb750d 100644 --- a/R/en_helpers.R +++ b/R/en_helpers.R @@ -6,6 +6,8 @@ #' @param cache_key Character scalar. The cache key #' (e.g. `"party_eic_df_key"`). #' +#' @return A tibble extracted from the source csv. +#' #' @noRd fetch_eic_csv <- function(csv_file, cache_key) { cache_get_or_compute( # nolint: object_usage_linter @@ -34,7 +36,7 @@ fetch_eic_csv <- function(csv_file, cache_key) { #' #' @export #' -#' @examplesIf there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L +#' @examplesIf there_is_provider() #' eic_party <- entsoeapi::party_eic() #' #' dplyr::glimpse(eic_party) @@ -64,7 +66,7 @@ party_eic <- function() { #' #' @export #' -#' @examplesIf there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L +#' @examplesIf there_is_provider() #' eic_area <- entsoeapi::area_eic() #' #' dplyr::glimpse(eic_area) @@ -96,7 +98,7 @@ area_eic <- function() { #' #' @export #' -#' @examplesIf there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L +#' @examplesIf there_is_provider() #' eic_accounting_point <- entsoeapi::accounting_point_eic() #' #' dplyr::glimpse(eic_accounting_point) @@ -128,7 +130,7 @@ accounting_point_eic <- function() { #' #' @export #' -#' @examplesIf there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L +#' @examplesIf there_is_provider() #' eic_tie_line <- entsoeapi::tie_line_eic() #' #' dplyr::glimpse(eic_tie_line) @@ -159,7 +161,7 @@ tie_line_eic <- function() { #' #' @export #' -#' @examplesIf there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L +#' @examplesIf there_is_provider() #' eic_location <- entsoeapi::location_eic() #' #' dplyr::glimpse(eic_location) @@ -191,7 +193,7 @@ location_eic <- function() { #' #' @export #' -#' @examplesIf there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L +#' @examplesIf there_is_provider() #' eic_resource_object <- entsoeapi::resource_object_eic() #' #' dplyr::glimpse(eic_resource_object) @@ -228,7 +230,7 @@ resource_object_eic <- function() { #' #' @export #' -#' @examplesIf there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L +#' @examplesIf there_is_provider() #' eic_substation <- entsoeapi::substation_eic() #' #' dplyr::glimpse(eic_substation) @@ -258,9 +260,11 @@ substation_eic <- function() { #' `MarketParticipantIsoCountryCode`, `MarketParticipantVatCode`, #' `EicTypeFunctionList` and `type`. #' +#' @importFrom dplyr bind_rows +#' #' @export #' -#' @examplesIf there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L +#' @examplesIf there_is_provider() #' eic_all <- entsoeapi::all_approved_eic() #' #' dplyr::glimpse(eic_all) @@ -275,9 +279,9 @@ all_approved_eic <- function() { resource_object_eic(), substation_eic() ) |> - dplyr::bind_rows() |> + bind_rows() |> unique() |> - tibble::as_tibble() + as_tbl() } @@ -285,27 +289,33 @@ all_approved_eic <- function() { #' Get all Allocated Energy Identification Codes #' #' @description -#' Beware, this is a REAL SLOW function, it runs for ages! -#' Be patient!! +#' Beware, this is a REAL SLOW function, it runs for minutes, be patient! #' This function downloads all allocated #' energy identification codes from this link: #' https://eepublicdownloads.blob.core.windows.net/ #' cio-lio/xml/allocated-eic-codes.xml #' Further details are under: #' https://www.entsoe.eu/data/energy-identification-codes-eic/ +#' It is an alternative of `all_approved_eic()` function call +#' providing more details. +#' +#' @export #' #' @return #' A tibble of all allocated EIC codes, which contains such columns as -#' `doc_status`, `doc_status_value`, `revision_number`, `created_date_time`, -#' `eic_code`, `instance_component_attribute`, `long_name`, `display_name`, -#' `last_request_date`, `eic_code_deactivation_requested_date_and_or_time_date`, -#' `description`, `eic_code_market_participant_vat_code_name`, -#' `eic_code_market_participant_acer_code_name` and -#' `parent_market_document_mrid` +#' `revision_number`, `created_date_time`, `eic_code`, `doc_status_value`, +#' `doc_status`, `instance_component_attribute`, `long_name`, `display_name`, +#' `last_request_date`, `deactivation_requested_date_and_or_time_date`, +#' `eic_code_market_participant_street_address`, +#' `market_participant_vat_code_name`, `market_participant_acer_code_name`, +#' `description`, `responsible_market_participant_mrid`, `function_names` +#' and `parent_market_document_mrid` #' -#' @importFrom stats setNames +#' @examplesIf there_is_provider() +#' eic_all <- entsoeapi::all_allocated_eic() +#' +#' dplyr::glimpse(eic_all) #' -#' @noRd all_allocated_eic <- function() { cache_get_or_compute( # nolint: object_usage_linter key = "all_allocated_eic_df_key", @@ -313,3 +323,87 @@ all_allocated_eic <- function() { compute_fn = get_all_allocated_eic ) } + + +#' @title +#' Display the ENTSO-E Transparency Platform news feed +#' +#' @description +#' Fetches the RSS news feed from the ENTSO-E Transparency Platform and +#' displays the entries in the console. Useful for checking platform +#' maintenance windows, data publication delays, and other announcements +#' that may affect API availability. +#' +#' @param feed_url the URL of the RSS news feed from the ENTSO-E +#' Transparency Platform. +#' @param n Integer scalar. Maximum number of feed items to display. +#' Defaults to `5L`. Use `Inf` to show all items. +#' +#' @return A tibble of feed items with columns `title`, `pub_date`, and +#' `description`, returned invisibly. +#' +#' @export +#' +#' @importFrom httr2 request req_method req_user_agent req_timeout req_retry +#' resp_body_xml +#' @importFrom xml2 xml_find_all xml_find_first xml_text read_html +#' @importFrom cli cli_h1 cli_h2 cli_text cli_alert_info +#' +#' @examplesIf there_is_provider() +#' entsoeapi::get_news() +#' +get_news <- function(feed_url = .feed_url, n = 5L) { + resp <- feed_url |> + request() |> + req_method(method = "GET") |> + req_user_agent(string = user_agent_string) |> + req_timeout(seconds = 30L) |> + req_retry( + max_tries = 3L, + is_transient = \(resp) resp_status(resp) == 503L, + backoff = \(x) 10 + ) |> + req_perform() + + feed_xml <- resp_body_xml(resp = resp) + items <- xml_find_all(x = feed_xml, xpath = "//item") + + n_show <- min(length(items), n) + + titles <- character(n_show) + dates <- character(n_show) + descriptions <- character(n_show) + + cli_h1("ENTSO-E Transparency Platform News") + + for (i in seq_len(n_show)) { + titles[[i]] <- xml_find_first(items[[i]], "title") |> + xml_text() + dates[[i]] <- xml_find_first(items[[i]], "pubDate") |> + xml_text() + raw_desc <- xml_find_first(items[[i]], "description") |> + xml_text() + descriptions[[i]] <- tryCatch( + expr = { + paste0("", raw_desc, "") |> + read_html() |> + xml_text() |> + trimws() + }, + error = \(e) trimws(raw_desc) + ) + + cli_h2("{titles[[i]]}") + cli_alert_info("{dates[[i]]}") + cli_text("{descriptions[[i]]}") + } + + result <- data.frame( + title = titles, + pub_date = dates, + description = descriptions + ) |> + as_tbl() + + invisible(result) +} diff --git a/R/en_market.R b/R/en_market.R index 0fa47044..b0331826 100644 --- a/R/en_market.R +++ b/R/en_market.R @@ -337,7 +337,8 @@ continuous_offered_transfer_capacity <- function( # nolint: object_length_linter implicit_offered_transfer_capacities <- function( # nolint: object_length_linter eic_in = NULL, eic_out = NULL, - period_start = lubridate::ymd(Sys.Date() - lubridate::days(x = 1L), + period_start = lubridate::ymd( + x = Sys.Date() - lubridate::days(x = 1L), tz = "CET" ), period_end = lubridate::ymd(Sys.Date(), @@ -1160,7 +1161,7 @@ net_positions <- function( #' can be checked from contract_types table; #' "A01" = Day ahead #' "A02" = Weekly -#' "A032 = Monthly +#' "A03" = Monthly #' "A04" = Yearly #' "A06" = Long Term #' "A07" = Intraday diff --git a/R/utils.R b/R/utils.R index 4da546de..b2e8b76a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -162,83 +162,6 @@ assert_eic <- function(eic, var_name = "eic", null_ok = FALSE) { } -#' @title -#' Organize list of strings into group -#' -#' @description -#' This function solves a connected components problem where vectors -#' are connected if they share at least one common string. -#' It returns with groups containing the indices of elements. -#' -#' @param vector_list A list of character vectors to group by shared strings. -#' -#' @return A list of integer vectors, each containing the indices of -#' `vector_list` elements that belong to the same connected component. -#' -#' @noRd -grouping_by_common_strings <- function(vector_list) { - n <- length(vector_list) - - if (n == 0L) { - return(list()) - } - if (n == 1L) { - return(list(1L)) - } - - # Build an inverted index: string -> vector indices containing that string - string_to_indices <- new.env(hash = TRUE) - - for (i in 1L:n) { - unique_strings <- unique(vector_list[[i]]) - for (s in unique_strings) { - if (exists(x = s, envir = string_to_indices)) { - string_to_indices[[s]] <- c(string_to_indices[[s]], i) - } else { - string_to_indices[[s]] <- i - } - } - } - - # Union-Find with path compression - parent <- list2env(x = list(data = 1L:n), parent = emptyenv()) - - find_root <- function(i) { - if (parent$data[i] != i) { - parent$data[i] <- find_root(i = parent$data[i]) - } - parent$data[i] - } - - union_sets <- function(i, j) { - root_i <- find_root(i) - root_j <- find_root(j) - if (root_i != root_j) { - parent$data[root_j] <- root_i - } - } - - # For each string, union all vectors that contain it - for (s in ls(string_to_indices)) { - indices <- string_to_indices[[s]] - if (length(indices) > 1L) { - for (k in 2L:length(indices)) { - union_sets(i = indices[1L], j = indices[k]) - } - } - } - - # Normalize all parents - for (i in 1L:n) { - parent$data[i] <- find_root(i) - } - - # Group indices by their root parent - split(x = 1L:n, f = parent$data) |> - unname() -} - - #' @title #' Calculate the Number of Children for a Given Nodeset #' @@ -1079,10 +1002,11 @@ read_zipped_xml <- function(temp_file_path) { #' @importFrom checkmate assert_string #' @importFrom cli cli_h1 cli_alert cli_alert_success cli_abort #' @importFrom httr2 request req_method req_user_agent req_verbose req_timeout -#' resp_status resp_content_type resp_body_raw resp_body_xml resp_body_html -#' resp_body_json resp_status_desc +#' req_retry resp_status resp_content_type resp_body_raw resp_body_xml +#' resp_body_html resp_body_json resp_status_desc req_headers #' @importFrom xmlconvert xml_to_list #' @importFrom stringr str_detect +#' @importFrom stats setNames runif #' #' @noRd api_req <- function( @@ -1094,24 +1018,28 @@ api_req <- function( ) { assert_string(query_string) assert_string(security_token) - url <- paste0( - api_scheme, api_domain, api_name, query_string, "&securityToken=" - ) + url <- paste0(api_scheme, api_domain, api_name, query_string) cli_h1("API call") - cli_alert("{url}<...>") + cli_alert("{url}&securityToken=<...>") # retrieve data from the API - req <- paste0(url, security_token) |> + req <- url |> request() |> req_method(method = "GET") |> req_user_agent(string = user_agent_string) |> + req_headers(SECURITY_TOKEN = security_token) |> req_verbose( header_req = FALSE, header_resp = TRUE, body_req = FALSE, body_resp = FALSE ) |> - req_timeout(seconds = .req_timeout) # nolint: object_usage_linter. + req_timeout(seconds = .req_timeout) |> # nolint: object_usage_linter. + req_retry( + max_tries = 3L, + is_transient = \(resp) resp_status(resp) == 503L, + backoff = \(x) runif(n = 1L, min = 5, max = 15) + ) resp <- req_perform_safe(req = req) if (is.null(x = resp$error)) { @@ -1346,7 +1274,7 @@ url_posixct_format <- function(x) { #' #' @importFrom stringr str_replace_all #' @importFrom utils read.table -#' @importFrom cli cli_abort +#' @importFrom cli cli_abort cli_alert_danger #' #' @noRd get_eiccodes <- function( @@ -1400,7 +1328,9 @@ get_eiccodes <- function( # return eiccodes } else { - cli_abort(content$error$message) + cli_alert_danger(content$error$message) + # return + NULL } } @@ -1418,7 +1348,7 @@ get_eiccodes <- function( #' @return A tibble of all allocated EIC codes with snake_case column names, #' enriched with document-status definitions. #' -#' @importFrom stats setNames +#' @importFrom stats setNames runif #' @importFrom httr2 request req_url_path_append req_method req_user_agent #' req_progress req_verbose req_timeout req_retry resp_body_raw #' @importFrom xml2 as_xml_document xml_contents xml_children xml_name xml_text @@ -1455,7 +1385,10 @@ get_all_allocated_eic <- function( body_resp = FALSE ) |> req_timeout(seconds = 120) |> - req_retry(max_tries = 3L, backoff = \(resp) 10) + req_retry( + max_tries = 3L, + backoff = \(x) runif(n = 1L, min = 5, max = 15) + ) resp <- req_perform_safe(req = req) if (is.null(resp$error)) { @@ -2435,6 +2368,6 @@ there_is_provider <- function( req_user_agent(string = user_agent_string) |> req_timeout(seconds = 10L) |> req_retry(max_tries = 1L) - resp <- req_perform_safe(req) + resp <- req_perform_safe(req = req) !is.null(resp$error$resp) && resp_status(resp$error$resp) == 401L } diff --git a/README.Rmd b/README.Rmd index d601f84c..1ff7778f 100644 --- a/README.Rmd +++ b/README.Rmd @@ -24,7 +24,7 @@ knitr::opts_chunk$set( -The goal of `entsoeapi` package is to create an easy wrapper around the ENTSO-E [API](https://documenter.getpostman.com/view/7009892/2s93JtP3F6)'s data and transform them to tabular format without effort. (The downloadable data are available interactively on the ENTSO-E [transparency platform](https://transparency.entsoe.eu/) website as well.) +The goal of `entsoeapi` package is to create an easy wrapper around the ENTSO-E [API](https://documenter.getpostman.com/view/7009892/2s93JtP3F6)'s data and transform them to tabular format without effort. (The downloadable data are available interactively on the ENTSO-E [transparency platform](https://transparency.entsoe.eu/) website as well.) The package helps with diff --git a/README.md b/README.md index 87cec260..3ccac53b 100644 --- a/README.md +++ b/README.md @@ -507,7 +507,7 @@ entsoeapi::load_actual_total( #> ── API call ──────────────────────────────────────────────────────────────────── #> → https://web-api.tp.entsoe.eu/api?documentType=A65&processType=A16&outBiddingZone_Domain=10Y1001A1001A83F&periodStart=201912312300&periodEnd=202001012300&securityToken=<...> #> <- HTTP/2 200 -#> <- date: Tue, 24 Mar 2026 18:52:37 GMT +#> <- date: Mon, 30 Mar 2026 21:56:04 GMT #> <- content-type: text/xml #> <- content-disposition: inline; filename="Actual Total Load_201912312300-202001012300.xml" #> <- x-content-type-options: nosniff @@ -535,7 +535,7 @@ entsoeapi::load_actual_total( #> $ ts_object_aggregation_def "Area", "Area", "Area", "Area", "Area"… #> $ ts_business_type "A04", "A04", "A04", "A04", "A04", "A0… #> $ ts_business_type_def "Consumption", "Consumption", "Consump… -#> $ created_date_time 2026-03-24 18:52:37, 2026-03-24 18:52… +#> $ created_date_time 2026-03-30 21:56:04, 2026-03-30 21:56… #> $ revision_number 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,… #> $ time_period_time_interval_start 2019-12-31 23:00:00, 2019-12-31 23:00… #> $ time_period_time_interval_end 2020-01-01 23:00:00, 2020-01-01 23:00… @@ -565,7 +565,7 @@ entsoeapi::gen_per_prod_type( #> ── API call ──────────────────────────────────────────────────────────────────── #> → https://web-api.tp.entsoe.eu/api?documentType=A75&processType=A16&in_Domain=10Y1001A1001A83F&periodStart=201912312300&periodEnd=202001012300&securityToken=<...> #> <- HTTP/2 200 -#> <- date: Tue, 24 Mar 2026 18:52:38 GMT +#> <- date: Mon, 30 Mar 2026 21:56:06 GMT #> <- content-type: text/xml #> <- content-disposition: inline; filename="Aggregated Generation per Type_201912312300-202001012300.xml" #> <- x-content-type-options: nosniff @@ -594,7 +594,7 @@ entsoeapi::gen_per_prod_type( #> $ ts_business_type_def "Production", "Production", "Productio… #> $ ts_mkt_psr_type "B10", "B10", "B10", "B10", "B10", "B1… #> $ ts_mkt_psr_type_def "Hydro-electric pure pumped storage he… -#> $ created_date_time 2026-03-24 18:52:38, 2026-03-24 18:52… +#> $ created_date_time 2026-03-30 21:56:06, 2026-03-30 21:56… #> $ revision_number 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,… #> $ time_period_time_interval_start 2019-12-31 23:00:00, 2019-12-31 23:00… #> $ time_period_time_interval_end 2020-01-01 23:00:00, 2020-01-01 23:00… diff --git a/RELEASE_CHECKLIST.md b/RELEASE_CHECKLIST.md index 18f672a1..278269bb 100644 --- a/RELEASE_CHECKLIST.md +++ b/RELEASE_CHECKLIST.md @@ -29,13 +29,13 @@ ## 4. R CMD CHECK - [ ] `devtools::check(cran = TRUE)` — **0 errors, 0 warnings, 0 notes** - - Common notes to fix before CRAN: `LazyData` without `LazyDataCompression`, missing `\value` in `.Rd` files +- Common notes to fix before CRAN: `LazyData` without `LazyDataCompression`, missing `\value` in `.Rd` files ## 5. Merge & Tag - [ ] Push + merge / merge + push: `develop` → `main` (PR or direct) -- [ ] Create a git tag: `git tag v1.0.0 && git push origin v1.0.0` -- [ ] Create a GitHub Release with the `NEWS.md` entry as release notes: `gh release create v0.9.8 --title "v0.9.8" --notes "See NEWS.md for changes" --repo krose/entsoeapi` +- [ ] Create a git tag: `git tag vX.Y.Z && git push origin vX.Y.Z` +- [ ] Create a GitHub Release with the `NEWS.md` entry as release notes: `gh release create vX.Y.Z --title "vX.Y.Z" --notes "See NEWS.md for changes" --repo krose/entsoeapi` ## 6. Pkgdown Site @@ -44,7 +44,7 @@ ## 7. (When ready) CRAN Submission - [ ] Run `devtools::install()` to install package locally -- [ ] Run `rhub::rhub_check(gh_url = "https://github.com/krose/entsoeapi", platforms = c("windows", "macos-arm64", "linux"), branch = "main")`— triggers multi-platform check on GitHub +- [ ] Run `rhub::rhub_check(gh_url = "https://github.com/krose/entsoeapi", platforms = c("windows", "macos-arm64", "linux"), branch = "main", r_versions = "release")`— triggers multi-platform check on GitHub - [ ] Review [CRAN policies](https://cran.r-project.org/web/packages/policies.html) - [ ] `devtools::submit_cran()` or upload via - [ ] Reply promptly to CRAN maintainer emails (within 2 weeks) diff --git a/man/accounting_point_eic.Rd b/man/accounting_point_eic.Rd index 6c5cc114..02e1b4a3 100644 --- a/man/accounting_point_eic.Rd +++ b/man/accounting_point_eic.Rd @@ -21,7 +21,7 @@ An entity under balance responsibility where balance supplier change can take place and for which commercial business processes are defined. } \examples{ -\dontshow{if (there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L) withAutoprint(\{ # examplesIf} +\dontshow{if (there_is_provider()) withAutoprint(\{ # examplesIf} eic_accounting_point <- entsoeapi::accounting_point_eic() dplyr::glimpse(eic_accounting_point) diff --git a/man/all_allocated_eic.Rd b/man/all_allocated_eic.Rd new file mode 100644 index 00000000..3f417f35 --- /dev/null +++ b/man/all_allocated_eic.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/en_helpers.R +\name{all_allocated_eic} +\alias{all_allocated_eic} +\title{Get all Allocated Energy Identification Codes} +\usage{ +all_allocated_eic() +} +\value{ +A tibble of all allocated EIC codes, which contains such columns as +\code{revision_number}, \code{created_date_time}, \code{eic_code}, \code{doc_status_value}, +\code{doc_status}, \code{instance_component_attribute}, \code{long_name}, \code{display_name}, +\code{last_request_date}, \code{deactivation_requested_date_and_or_time_date}, +\code{eic_code_market_participant_street_address}, +\code{market_participant_vat_code_name}, \code{market_participant_acer_code_name}, +\code{description}, \code{responsible_market_participant_mrid}, \code{function_names} +and \code{parent_market_document_mrid} +} +\description{ +Beware, this is a REAL SLOW function, it runs for minutes, be patient! +This function downloads all allocated +energy identification codes from this link: +https://eepublicdownloads.blob.core.windows.net/ +cio-lio/xml/allocated-eic-codes.xml +Further details are under: +https://www.entsoe.eu/data/energy-identification-codes-eic/ +It is an alternative of \code{all_approved_eic()} function call +providing more details. +} +\examples{ +\dontshow{if (there_is_provider()) withAutoprint(\{ # examplesIf} +eic_all <- entsoeapi::all_allocated_eic() + +dplyr::glimpse(eic_all) +\dontshow{\}) # examplesIf} +} diff --git a/man/all_approved_eic.Rd b/man/all_approved_eic.Rd index f853937f..caa77d29 100644 --- a/man/all_approved_eic.Rd +++ b/man/all_approved_eic.Rd @@ -21,7 +21,7 @@ Further details are under: https://www.entsoe.eu/data/energy-identification-codes-eic/#eic-documentation } \examples{ -\dontshow{if (there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L) withAutoprint(\{ # examplesIf} +\dontshow{if (there_is_provider()) withAutoprint(\{ # examplesIf} eic_all <- entsoeapi::all_approved_eic() dplyr::glimpse(eic_all) diff --git a/man/area_eic.Rd b/man/area_eic.Rd index d6ffd47c..d95001f1 100644 --- a/man/area_eic.Rd +++ b/man/area_eic.Rd @@ -19,7 +19,7 @@ energy identification codes from this site: https://www.entsoe.eu/data/energy-identification-codes-eic/eic-approved-codes } \examples{ -\dontshow{if (there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L) withAutoprint(\{ # examplesIf} +\dontshow{if (there_is_provider()) withAutoprint(\{ # examplesIf} eic_area <- entsoeapi::area_eic() dplyr::glimpse(eic_area) diff --git a/man/congestion_income.Rd b/man/congestion_income.Rd index 0c934071..a725ab8d 100644 --- a/man/congestion_income.Rd +++ b/man/congestion_income.Rd @@ -26,7 +26,7 @@ One year range limit applies} can be checked from contract_types table; "A01" = Day ahead "A02" = Weekly -"A032 = Monthly +"A03" = Monthly "A04" = Yearly "A06" = Long Term "A07" = Intraday diff --git a/man/get_news.Rd b/man/get_news.Rd new file mode 100644 index 00000000..9d32912e --- /dev/null +++ b/man/get_news.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/en_helpers.R +\name{get_news} +\alias{get_news} +\title{Display the ENTSO-E Transparency Platform news feed} +\usage{ +get_news(feed_url = .feed_url, n = 5L) +} +\arguments{ +\item{feed_url}{the URL of the RSS news feed from the ENTSO-E +Transparency Platform.} + +\item{n}{Integer scalar. Maximum number of feed items to display. +Defaults to \code{5L}. Use \code{Inf} to show all items.} +} +\value{ +A tibble of feed items with columns \code{title}, \code{pub_date}, and +\code{description}, returned invisibly. +} +\description{ +Fetches the RSS news feed from the ENTSO-E Transparency Platform and +displays the entries in the console. Useful for checking platform +maintenance windows, data publication delays, and other announcements +that may affect API availability. +} +\examples{ +\dontshow{if (there_is_provider()) withAutoprint(\{ # examplesIf} +entsoeapi::get_news() +\dontshow{\}) # examplesIf} +} diff --git a/man/implicit_offered_transfer_capacities.Rd b/man/implicit_offered_transfer_capacities.Rd index 5e6b99c1..8f041fbd 100644 --- a/man/implicit_offered_transfer_capacities.Rd +++ b/man/implicit_offered_transfer_capacities.Rd @@ -7,7 +7,7 @@ implicit_offered_transfer_capacities( eic_in = NULL, eic_out = NULL, - period_start = lubridate::ymd(Sys.Date() - lubridate::days(x = 1L), tz = "CET"), + period_start = lubridate::ymd(x = Sys.Date() - lubridate::days(x = 1L), tz = "CET"), period_end = lubridate::ymd(Sys.Date(), tz = "CET"), contract_type = "A01", tidy_output = TRUE, diff --git a/man/location_eic.Rd b/man/location_eic.Rd index 286964f4..d465a52f 100644 --- a/man/location_eic.Rd +++ b/man/location_eic.Rd @@ -20,7 +20,7 @@ https://www.entsoe.eu/data/energy-identification-codes-eic/eic-approved-codes It covers an endpoint, or an IT-system. } \examples{ -\dontshow{if (there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L) withAutoprint(\{ # examplesIf} +\dontshow{if (there_is_provider()) withAutoprint(\{ # examplesIf} eic_location <- entsoeapi::location_eic() dplyr::glimpse(eic_location) diff --git a/man/party_eic.Rd b/man/party_eic.Rd index d6ffcea7..53706606 100644 --- a/man/party_eic.Rd +++ b/man/party_eic.Rd @@ -20,7 +20,7 @@ https://www.entsoe.eu/data/energy-identification-codes-eic/eic-approved-codes It covers market participants. } \examples{ -\dontshow{if (there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L) withAutoprint(\{ # examplesIf} +\dontshow{if (there_is_provider()) withAutoprint(\{ # examplesIf} eic_party <- entsoeapi::party_eic() dplyr::glimpse(eic_party) diff --git a/man/resource_object_eic.Rd b/man/resource_object_eic.Rd index 46a37b9e..288c717f 100644 --- a/man/resource_object_eic.Rd +++ b/man/resource_object_eic.Rd @@ -21,7 +21,7 @@ A resource that can either produce or consume energy and that is reported in a schedule. } \examples{ -\dontshow{if (there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L) withAutoprint(\{ # examplesIf} +\dontshow{if (there_is_provider()) withAutoprint(\{ # examplesIf} eic_resource_object <- entsoeapi::resource_object_eic() dplyr::glimpse(eic_resource_object) diff --git a/man/substation_eic.Rd b/man/substation_eic.Rd index 1cb27b35..ad5e777c 100644 --- a/man/substation_eic.Rd +++ b/man/substation_eic.Rd @@ -26,7 +26,7 @@ They can be classified as normal outside substation, armoured substation and underground substation. } \examples{ -\dontshow{if (there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L) withAutoprint(\{ # examplesIf} +\dontshow{if (there_is_provider()) withAutoprint(\{ # examplesIf} eic_substation <- entsoeapi::substation_eic() dplyr::glimpse(eic_substation) diff --git a/man/tie_line_eic.Rd b/man/tie_line_eic.Rd index da680ae9..e887e005 100644 --- a/man/tie_line_eic.Rd +++ b/man/tie_line_eic.Rd @@ -21,7 +21,7 @@ It covers a transmission line that connects different areas excluding HVDC interconnectors. } \examples{ -\dontshow{if (there_is_provider() && nchar(Sys.getenv("ENTSOE_PAT")) > 0L) withAutoprint(\{ # examplesIf} +\dontshow{if (there_is_provider()) withAutoprint(\{ # examplesIf} eic_tie_line <- entsoeapi::tie_line_eic() dplyr::glimpse(eic_tie_line) diff --git a/tests/testthat/fixtures/input_lists_for_grouping.rds b/tests/testthat/fixtures/input_lists_for_grouping.rds deleted file mode 100644 index fd9f94f2c1acdf26298f70bce6c635a200e1e3a9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 713 zcmb2|=3oE==C?O|^MxEG4tzYw8Z}G8w2sAgX2z|X-)FaOdlfsm%7{Bhlan>Z;qlqo z_Z{?)&2E?K4S!!F>io-)CpA4%?q_g(RnW6To6j3tZ{9DtNz&p@kXPiYh)q*MgS=K= zntYtSoO$UL-=$Z2zkKcrdiCpPdSK|Xxr^%d3ojRcT6Lv&*Tfe=c{0a6KWXjNGFzPZ zN4$CI&7%v`4^P#5eY4`w+8vjBCRbTy?z{iyUSy8!*62lZ_8Zpj3KRNz)=R|o`=NE5 zt2PChI{JoXsoj`7)mHdT{Ii;NwTfMzJU?0I>fW9u`a5Oqoo}h}Oxk-lZ?O2Gv(lx| zF0w9=>$9t1=c`uRtd|ke`}ZB#lY8~gHeH|M2J1f#VREw`OkDHkx%+Ipl3<_Bman$W z?|+nTJ>mL=BAW%)|cnF zgyxBQtn;rEoAWTR5)@oB;npn&H6Q+`Ibzs%11 zen{T2`}zB&%U`}Zvm=>FSNj&rwlx-N1&LR(j~6b#y!-#A=_RwiOYIddu8}_0{BW(T zF8iG4>DQ&*_uhM3HKk+gPOIqI*3F5R-fgR!6}8uHyZNHUA2X{0-?=?rub*^L_4K^> zT(!rS557pLI`6dp)RVi%PyRliKHqM}?S1vXjVgbqS>CkjzLtCU+S~d0&+dlD?`HsV s{_RhTW8kgS$(q&hk5B)?>Eu}_dVkHXfAR0)|6Y3rrsm!gauN&-06b!BF8}}l diff --git a/tests/testthat/fixtures/news_feed.xml b/tests/testthat/fixtures/news_feed.xml new file mode 100644 index 00000000..55d1f408 --- /dev/null +++ b/tests/testthat/fixtures/news_feed.xml @@ -0,0 +1,26 @@ + + + + Transparency Platform News + https://transparency.entsoe.eu/ + ENTSO-E Transparency Platform News RSS channel + en-US + Thu, 26 Mar 2026 13:31:47 GMT + Thu, 26 Mar 2026 13:31:47 GMT + + TP PROD data publication delays + <p>Some data publications are <strong>delayed</strong>.</p> + Thu, 26 Mar 2026 13:31:47 GMT + + + Scheduled maintenance window + Platform maintenance on Saturday. + Tue, 24 Mar 2026 12:18:19 GMT + + + New dataset available + A new dataset has been published. + Mon, 23 Mar 2026 09:00:00 GMT + + + diff --git a/tests/testthat/test-en_helpers.R b/tests/testthat/test-en_helpers.R index b30fe52f..4b94d71d 100644 --- a/tests/testthat/test-en_helpers.R +++ b/tests/testthat/test-en_helpers.R @@ -13,8 +13,10 @@ testthat::test_that( desc = "all_approved_eic() works", code = { testthat::skip_if_not( - condition = there_is_provider(), - message = "The Entso-e API cannot be reached" + condition = curl::nslookup(host = .pd_domain, error = FALSE) |> + is.null() |> + isFALSE(), + message = "The Entso-e download site cannot be reached" ) testthat::expect_no_error(object = tbl <- all_approved_eic()) testthat::expect_s3_class(object = tbl, class = "tbl_df", exact = FALSE) @@ -54,8 +56,10 @@ testthat::test_that( desc = "party_eic() works", code = { testthat::skip_if_not( - condition = there_is_provider(), - message = "The Entso-e API cannot be reached" + condition = curl::nslookup(host = .pd_domain, error = FALSE) |> + is.null() |> + isFALSE(), + message = "The Entso-e download site cannot be reached" ) testthat::expect_no_error(object = tbl <- party_eic()) testthat::expect_s3_class(object = tbl, class = "tbl_df", exact = FALSE) @@ -96,8 +100,10 @@ testthat::test_that( desc = "area_eic() works", code = { testthat::skip_if_not( - condition = there_is_provider(), - message = "The Entso-e API cannot be reached" + condition = curl::nslookup(host = .pd_domain, error = FALSE) |> + is.null() |> + isFALSE(), + message = "The Entso-e download site cannot be reached" ) testthat::expect_no_error(object = tbl <- area_eic()) testthat::expect_s3_class(object = tbl, class = "tbl_df", exact = FALSE) @@ -138,8 +144,10 @@ testthat::test_that( desc = "accounting_point_eic() works", code = { testthat::skip_if_not( - condition = there_is_provider(), - message = "The Entso-e API cannot be reached" + condition = curl::nslookup(host = .pd_domain, error = FALSE) |> + is.null() |> + isFALSE(), + message = "The Entso-e download site cannot be reached" ) testthat::expect_no_error(object = tbl <- accounting_point_eic()) testthat::expect_s3_class(object = tbl, class = "tbl_df", exact = FALSE) @@ -180,8 +188,10 @@ testthat::test_that( desc = "tie_line_eic() works", code = { testthat::skip_if_not( - condition = there_is_provider(), - message = "The Entso-e API cannot be reached" + condition = curl::nslookup(host = .pd_domain, error = FALSE) |> + is.null() |> + isFALSE(), + message = "The Entso-e download site cannot be reached" ) testthat::expect_no_error(object = tbl <- tie_line_eic()) testthat::expect_s3_class(object = tbl, class = "tbl_df", exact = FALSE) @@ -222,8 +232,10 @@ testthat::test_that( desc = "location_eic() works", code = { testthat::skip_if_not( - condition = there_is_provider(), - message = "The Entso-e API cannot be reached" + condition = curl::nslookup(host = .pd_domain, error = FALSE) |> + is.null() |> + isFALSE(), + message = "The Entso-e download site cannot be reached" ) testthat::expect_no_error(object = tbl <- location_eic()) testthat::expect_s3_class(object = tbl, class = "tbl_df", exact = FALSE) @@ -264,8 +276,10 @@ testthat::test_that( desc = "resource_object_eic() works", code = { testthat::skip_if_not( - condition = there_is_provider(), - message = "The Entso-e API cannot be reached" + condition = curl::nslookup(host = .pd_domain, error = FALSE) |> + is.null() |> + isFALSE(), + message = "The Entso-e download site cannot be reached" ) testthat::expect_no_error(object = tbl <- resource_object_eic()) testthat::expect_s3_class(object = tbl, class = "tbl_df", exact = FALSE) @@ -309,8 +323,10 @@ testthat::test_that( desc = "substation_eic() works", code = { testthat::skip_if_not( - condition = there_is_provider(), - message = "The Entso-e API cannot be reached" + condition = curl::nslookup(host = .pd_domain, error = FALSE) |> + is.null() |> + isFALSE(), + message = "The Entso-e download site cannot be reached" ) testthat::expect_no_error(object = tbl <- substation_eic()) testthat::expect_s3_class(object = tbl, class = "tbl_df", exact = FALSE) @@ -324,6 +340,184 @@ testthat::test_that( ) +testthat::test_that( + desc = "get_news() validates inputs", + code = { + testthat::expect_error( + object = get_news(url = "foo"), + regexp = 'unused argument \\(url = "foo"\\)' + ) + } +) + + +testthat::test_that( + desc = "get_news() returns a tibble with expected columns", + code = { + xml_fixture <- readLines( + con = testthat::test_path("fixtures", "news_feed.xml"), + encoding = "UTF-8" + ) |> + paste(collapse = "\n") |> + charToRaw() + httr2::local_mocked_responses( + mock = function(req) { + httr2::response( + status_code = 200L, + url = req$url, + headers = list("content-type" = "application/xml"), + body = xml_fixture + ) + } + ) + tbl <- get_news() + testthat::expect_s3_class(object = tbl, class = "tbl_df", exact = FALSE) + testthat::expect_setequal( + object = names(tbl), + expected = c("title", "pub_date", "description") + ) + testthat::expect_equal(object = nrow(tbl), expected = 3L) + testthat::expect_equal( + object = tbl$title[[1L]], + expected = "TP PROD data publication delays" + ) + testthat::expect_equal( + object = tbl$description[[1L]], + expected = "Some data publications are delayed." + ) + } +) + + +testthat::test_that( + desc = "get_news() respects n parameter", + code = { + xml_fixture <- readLines( + con = testthat::test_path("fixtures", "news_feed.xml"), + encoding = "UTF-8" + ) |> + paste(collapse = "\n") |> + charToRaw() + httr2::local_mocked_responses( + mock = function(req) { + httr2::response( + status_code = 200L, + url = req$url, + headers = list("content-type" = "application/xml"), + body = xml_fixture + ) + } + ) + tbl <- get_news(n = 1L) + testthat::expect_equal(object = nrow(tbl), expected = 1L) + testthat::expect_equal( + object = tbl$title[[1L]], + expected = "TP PROD data publication delays" + ) + } +) + + +testthat::test_that( + desc = "get_news() handles n larger than available items", + code = { + xml_fixture <- readLines( + con = testthat::test_path("fixtures", "news_feed.xml"), + encoding = "UTF-8" + ) |> + paste(collapse = "\n") |> + charToRaw() + httr2::local_mocked_responses( + mock = function(req) { + httr2::response( + status_code = 200L, + url = req$url, + headers = list("content-type" = "application/xml"), + body = xml_fixture + ) + } + ) + tbl <- get_news(n = 100L) + testthat::expect_equal(object = nrow(tbl), expected = 3L) + } +) + + +testthat::test_that( + desc = "get_news() handles empty feed", + code = { + empty_rss <- paste( + '', + '', + "", + "Empty", + "", + "", + sep = "\n" + ) |> + charToRaw() + httr2::local_mocked_responses( + mock = function(req) { + httr2::response( + status_code = 200L, + url = req$url, + headers = list("content-type" = "application/xml"), + body = empty_rss + ) + } + ) + tbl <- get_news() + testthat::expect_s3_class(object = tbl, class = "tbl_df", exact = FALSE) + testthat::expect_equal(object = nrow(tbl), expected = 0L) + } +) + + +testthat::test_that( + desc = "get_news() errors on HTTP failure", + code = { + httr2::local_mocked_responses( + mock = function(req) { + httr2::response( + status_code = 500L, + url = req$url, + headers = list("content-type" = "text/html"), + body = charToRaw("Internal Server Error") + ) + } + ) + testthat::expect_error( + object = get_news(), + regexp = "500" + ) + } +) + + +testthat::test_that( + desc = "get_news() returns result invisibly", + code = { + xml_fixture <- readLines( + con = testthat::test_path("fixtures", "news_feed.xml"), + encoding = "UTF-8" + ) |> + paste(collapse = "\n") |> + charToRaw() + httr2::local_mocked_responses( + mock = function(req) { + httr2::response( + status_code = 200L, + url = req$url, + headers = list("content-type" = "application/xml"), + body = xml_fixture + ) + } + ) + testthat::expect_invisible(get_news()) + } +) + + testthat::test_that( desc = "all_allocated_eic() validates inputs", code = { @@ -339,8 +533,10 @@ testthat::test_that( desc = "all_allocated_eic() responses got and appended into a tibble", code = { testthat::skip_if_not( - condition = there_is_provider(), - message = "The Entso-e API cannot be reached" + condition = curl::nslookup(host = .pd_domain, error = FALSE) |> + is.null() |> + isFALSE(), + message = "The Entso-e download site cannot be reached" ) m$reset() testthat::expect_no_error(object = tbl <- all_allocated_eic()) @@ -560,3 +756,174 @@ testthat::test_that( testthat::expect_identical(object = tbl1, expected = tbl2) } ) + + +testthat::test_that( + desc = "all_allocated_eic() stops on curl/network error", + code = { + m$reset() + curl_err <- structure( + class = c("curl_error", "error", "condition"), + list( + message = paste( + "Could not resolve host:", + "eepublicdownloads.blob.core.windows.net" + ) + ) + ) + httr2_err <- structure( + class = c("httr2_failure", "httr2_error", "error", "condition"), + list( + message = "Failed to perform HTTP request.", + resp = NULL, + parent = curl_err + ) + ) + httr2::local_mocked_responses( + mock = function(req) stop(httr2_err) + ) + testthat::expect_error( + object = all_allocated_eic(), + regexp = "Failed to perform HTTP request" + ) + } +) + + +testthat::test_that( + desc = "all_allocated_eic() stops on HTML error response", + code = { + m$reset() + httr2::local_mocked_responses( + mock = function(req) { + httr2::response( + status_code = 403L, + url = req$url, + headers = list("content-type" = "text/html"), + body = "Access Denied" |> + charToRaw() + ) + } + ) + testthat::expect_error( + object = all_allocated_eic(), + regexp = "403" + ) + } +) + + +testthat::test_that( + desc = "all_allocated_eic() returns correct number of rows", + code = { + m$reset() + xml_fixture <- readLines( + con = testthat::test_path("fixtures", "get_allocated_eic_min.xml"), + encoding = "UTF-8" + ) |> + paste(collapse = "\n") |> + charToRaw() + httr2::local_mocked_responses( + mock = function(req) { + httr2::response( + status_code = 200L, + url = req$url, + headers = list("content-type" = "application/xml"), + body = xml_fixture + ) + } + ) + tbl <- all_allocated_eic() + testthat::expect_equal(object = nrow(tbl), expected = 2L) + } +) + + +testthat::test_that( + desc = "all_allocated_eic() handles duplicate Function_Names with separator", + code = { + m$reset() + xml_fixture <- readLines( + con = testthat::test_path("fixtures", "get_allocated_eic_dupl.xml"), + encoding = "UTF-8" + ) |> + paste(collapse = "\n") |> + charToRaw() + httr2::local_mocked_responses( + mock = function(req) { + httr2::response( + status_code = 200L, + url = req$url, + headers = list("content-type" = "application/xml"), + body = xml_fixture + ) + } + ) + tbl <- all_allocated_eic() + testthat::expect_true( + object = grepl( + pattern = " - ", + x = tbl$function_names[[1L]], + fixed = TRUE + ) + ) + } +) + + +testthat::test_that( + desc = "all_allocated_eic() collapses Function_Names correctly", + code = { + m$reset() + xml_fixture <- readLines( + con = testthat::test_path("fixtures", "get_allocated_eic_dupl.xml"), + encoding = "UTF-8" + ) |> + paste(collapse = "\n") |> + charToRaw() + httr2::local_mocked_responses( + mock = function(req) { + httr2::response( + status_code = 200L, + url = req$url, + headers = list("content-type" = "application/xml"), + body = xml_fixture + ) + } + ) + tbl <- all_allocated_eic() + testthat::expect_equal(object = nrow(tbl), expected = 1L) + } +) + + +testthat::test_that( + desc = "all_allocated_eic() stops when bind_cols fails", + code = { + m$reset() + xml_fixture <- readLines( + con = testthat::test_path("fixtures", "get_allocated_eic_min.xml"), + encoding = "UTF-8" + ) |> + paste(collapse = "\n") |> + charToRaw() + httr2::local_mocked_responses( + mock = function(req) { + httr2::response( + status_code = 200L, + url = req$url, + headers = list("content-type" = "application/xml"), + body = xml_fixture + ) + } + ) + testthat::local_mocked_bindings( + bind_cols = function(...) stop("mocked bind_cols error"), + .package = "entsoeapi" + ) + testthat::expect_error( + object = all_allocated_eic(), + regexp = "unexpected tree structure" + ) + } +) diff --git a/tests/testthat/test-en_market.R b/tests/testthat/test-en_market.R index 682c4052..02dc6030 100644 --- a/tests/testthat/test-en_market.R +++ b/tests/testthat/test-en_market.R @@ -1427,7 +1427,8 @@ testthat::test_that( ), info = "Unauthorized. Missing or invalid security token!" ) - }) + } +) testthat::test_that( @@ -2179,7 +2180,8 @@ testthat::test_that( ), regexp = "Missing or invalid security token" ) - }) + } +) testthat::test_that( diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index c7d36cee..5c664930 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,108 +1,3 @@ -testthat::test_that( - desc = "grouping_by_common_strings() works", - code = { - list_fixture <- readRDS( - file = testthat::test_path("fixtures", "input_lists_for_grouping.rds") - ) - for (l in seq_along(list_fixture)) { - value <- grouping_by_common_strings(vector_list = list_fixture[[l]]) - if (l == 17) value <- unlist(value) - assign(x = paste0("result", l), value = value) - rm(value) - } - is_in_which <- function(value, lst) { - lst |> - vapply(FUN = \(x) value %in% x, FUN.VALUE = logical(1L)) |> - which() - } - testthat::expect_equal( - object = grouping_by_common_strings(vector_list = list()), - expected = list() - ) - testthat::expect_equal( - object = grouping_by_common_strings(vector_list = list(c("a", "b"))), - expected = list(1L) - ) - testthat::expect_length(object = result1, n = 2L) - testthat::expect_true( - object = 1L %in% result1[[1L]] || 1L %in% result1[[2L]] - ) - testthat::expect_true( - object = 2L %in% result1[[1L]] || 2L %in% result1[[2L]] - ) - testthat::expect_false(object = all(c(1L, 2L) %in% result1[[1L]])) - testthat::expect_length(object = result2, n = 1L) - testthat::expect_setequal(object = result2[[1]], expected = c(1L, 2L)) - testthat::expect_length(object = result3, n = 2L) - testthat::expect_setequal( - object = result3[[is_in_which(value = 1L, lst = result3)]], - expected = c(1L, 3L, 5L) - ) - testthat::expect_setequal( - object = result3[[is_in_which(value = 2L, lst = result3)]], - expected = c(2L, 4L) - ) - testthat::expect_length(object = result4, n = 1L) - testthat::expect_setequal(object = result4[[1]], expected = 1L:4L) - testthat::expect_length(object = result5, n = 2L) - testthat::expect_equal( - object = result5[[is_in_which(value = 3L, lst = result5)]], - expected = 3L - ) - testthat::expect_setequal( - object = result5[[is_in_which(value = 1L, lst = result5)]], - expected = c(1L, 2L, 4L) - ) - testthat::expect_length(object = result6, n = 1L) - testthat::expect_setequal(object = result6[[1L]], expected = c(1L, 2L)) - testthat::expect_length(object = result7, n = 3L) - testthat::expect_length(object = result8, n = 3L) - testthat::expect_setequal( - object = result8[[is_in_which(value = 1L, lst = result8)]], - expected = c(1L, 3L) - ) - testthat::expect_length(object = result9, n = 3L) - testthat::expect_setequal( - object = result9[[is_in_which(value = 1L, lst = result9)]], - expected = c(1L, 3L, 4L, 7L) - ) - testthat::expect_setequal( - object = result9[[is_in_which(value = 2L, lst = result9)]], - expected = c(2L, 5L) - ) - testthat::expect_equal( - object = result9[[is_in_which(value = 6L, lst = result9)]], - expected = 6L - ) - testthat::expect_length(object = result10, n = 100L) - testthat::expect_length(object = result11, n = 2) - testthat::expect_length(object = result12, n = 2L) - testthat::expect_setequal( - object = result12[[is_in_which(value = 1L, lst = result12)]], - expected = c(1L, 2L) - ) - testthat::expect_length(object = result13, n = 2L) - testthat::expect_setequal( - object = result13[[is_in_which(value = 1L, lst = result13)]], - expected = c(1L, 2L) - ) - testthat::expect_length(object = result14, n = 2L) - testthat::expect_setequal( - object = result14[[is_in_which(value = 1L, lst = result14)]], - expected = c(1L, 2L) - ) - testthat::expect_length(object = result15, n = 2L) - testthat::expect_setequal( - object = result15[[is_in_which(value = 1L, lst = result15)]], - expected = c(1L, 2L) - ) - testthat::expect_true(object = is.integer(result16[[1L]])) - testthat::expect_setequal(object = result17, expected = 1L:4L) - testthat::expect_equal(object = length(result17), expected = 4L) - } -) - - testthat::test_that( desc = "extract_leaf_twig_branch() works 1", code = { @@ -206,18 +101,16 @@ testthat::test_that( "type" ) ) - testthat::expect_setequal( + testthat::expect_contains( object = xml2::xml_contents(x = content_3$result[[1L]]) |> extract_leaf_twig_branch() |> names() |> sort(), expected = c( "createdDateTime", - "docStatus.value", "mRID", "process.processType", "Reason.code", - "Reason.text", "receiver_MarketParticipant.marketRole.type", "receiver_MarketParticipant.mRID", "revisionNumber", @@ -596,8 +489,10 @@ testthat::test_that( desc = "get_eiccodes() works", code = { testthat::skip_if_not( - condition = there_is_provider(), - message = "The Entso-e API cannot be reached" + condition = curl::nslookup(host = .pd_domain, error = FALSE) |> + is.null() |> + isFALSE(), + message = "The Entso-e download site cannot be reached" ) testthat::expect_setequal( object = get_eiccodes( @@ -621,11 +516,11 @@ testthat::test_that( object = get_eiccodes(f = "Y_eicCodes.csv") |> nrow(), expected = 300 ) - testthat::expect_error( + testthat::expect_null( object = get_eiccodes(f = NULL), - info = "The argument 'f' is missing!" + info = "Cannot open the connection!" ) - testthat::expect_error( + testthat::expect_null( object = get_eiccodes(f = "ABC"), info = "Cannot open the connection!" ) @@ -808,8 +703,10 @@ testthat::test_that( desc = "add_eic_names() works", code = { testthat::skip_if_not( - condition = there_is_provider(), - message = "The Entso-e API cannot be reached" + condition = curl::nslookup(host = .pd_domain, error = FALSE) |> + is.null() |> + isFALSE(), + message = "The Entso-e download site cannot be reached" ) test_df_list <- readRDS( file = testthat::test_path("fixtures", "test_df_list.rds") @@ -1483,8 +1380,10 @@ testthat::test_that( desc = "add_eic_names() adds names for additional domain mrid columns", code = { testthat::skip_if_not( - condition = there_is_provider(), - message = "The Entso-e API cannot be reached" + condition = curl::nslookup(host = .pd_domain, error = FALSE) |> + is.null() |> + isFALSE(), + message = "The Entso-e download site cannot be reached" ) test_df_14 <- readRDS( file = testthat::test_path("fixtures", "test_df_list.rds") diff --git a/vignettes/architecture.Rmd b/vignettes/architecture.Rmd index e684f22a..8ee0fb81 100644 --- a/vignettes/architecture.Rmd +++ b/vignettes/architecture.Rmd @@ -2,27 +2,36 @@ title: "Architecture: API Pipeline, XML Engine & Caching" output: rmarkdown::html_vignette vignette: > + %\VignetteEncoding{UTF-8} %\VignetteIndexEntry{Architecture: API Pipeline, XML Engine & Caching} %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} +editor_options: + markdown: + wrap: 80 --- ```{r setup, include = FALSE} knitr::opts_chunk$set(echo = FALSE) ``` +```{r load-packages} +library(entsoeapi) +library(cli) +library(lubridate) +``` + ## Overview This document describes three interconnected internal systems that power every data-retrieval function in `entsoeapi`: -1. **API Request Pipeline** — validates user input, builds the ENTSO-E query URL, - sends the HTTP request, handles errors, and transparently paginates oversized - queries. -2. **XML-to-Table Engine** — converts raw XML (or lists of XML documents from - paginated responses) into clean, typed, enriched R tibbles. -3. **Caching System** — stores EIC code tables and lookup tables in memory for - one hour to avoid redundant downloads during a session. +1. **API Request Pipeline** — validates user input, builds the ENTSO-E query + URL, sends the HTTP request, handles errors, and transparently paginates + oversized queries. +2. **XML-to-Table Engine** — converts raw XML (or lists of XML documents from + paginated responses) into clean, typed, enriched R tibbles. +3. **Caching System** — stores EIC code tables and lookup tables in memory for + one hour to avoid redundant downloads during a session. The three systems form a strict linear pipeline. A user-facing function (e.g., `load_actual_total()`) calls the API pipeline, which produces one or more XML @@ -30,11 +39,11 @@ documents. Those documents are handed to the XML-to-table engine, which calls into the caching system to enrich results with human-readable names and definitions before returning a tibble to the user. ---- +-------------------------------------------------------------------------------- ## Architecture Diagram -``` +``` User function | v @@ -49,44 +58,52 @@ User function v | [api_req --> GET request, 60s timeout] | | | - +-- HTTP 200 / zip --> [read_zipped_xml] ---+-- [extract_response] - | | | - +-- HTTP 200 / xml --> [resp_body_xml] ----+ v - | [xml_to_table, per document] - +-- error 503 --> [cli_abort] | - | v - +-- error HTML --> [cli_abort] [extract_leaf/twig/branch] - | | - +-- code 999 / exceeds max v - --> [calc_offset_urls / pagination] [Type conversions] - | | - +-> [api_req] (loop) v - [my_snakecase / column rename] - | - v - [tidy_or_not: A01 / A03 curve] - | - v - [add_type_names / add_eic_names - add_definitions] - | - +----------+----------+ - | | - Cache hit? Cache miss - | | - [return cached] [download CSV / XML, - lookup table] cache result] - | | - +----------+----------+ - | - v - [Whitelist columns, sort rows] - | - v + | | + +-- HTTP 200 / zip --> [read_zipped_xml] ---+--> [extract_response] + | | | + +-- HTTP 200 / xml --> [resp_body_xml] -----+ v + | [xml_to_table, + | per document] + +-- error 503 -------> [req_retry, 3x / 10s] | + | | v + | v [extract_leaf/twig/branch] + | [cli_abort if all fail] | + | v + +-- error HTML ------> [cli_abort] [type conversions] + | | + +-- exceeds max -----> [calc_offset_urls, v + pagination] [my_snakecase + | column rename] + v | + [api_req] (loop)] v + [tidy_or_not: + A01 / A03 curve] + | + v + [add_type_names / + add_eic_names / + add_definitions] + | + +----------+----------+ + | | + type/eic/def type/eic/def + cache hit? cache miss? + | | + v v + [return cached [download CSV/XML, + lookup table] cache result] + | | + +--------->+<---------+ + | + v + [Whitelist columns, + sort rows] + | + v Tibble returned to user ``` ---- +-------------------------------------------------------------------------------- ## 1. API Request Pipeline @@ -100,23 +117,23 @@ Every public function follows the same four-step structure: **Step 1 — Argument validation.** Each parameter is checked with `checkmate` before any network call is made. Common checks: -- EIC codes: two-stage check — `checkmate::assert_string()` first enforces - exactly 16 characters matching `^[A-Z0-9-]*$`, then `assert_eic()` verifies - the 16th character is the correct weighted-modulo-37 checksum of the first 15 - characters (see below). -- Security token: non-empty string (sourced from `Sys.getenv("ENTSOE_PAT")`) -- Time ranges: difference between `period_end` and `period_start` must not - exceed 365 days -- Categorical parameters (e.g., `business_type`, `process_type`): validated - against allowed values via `checkmate::assert_choice()` +- EIC codes: two-stage check — `checkmate::assert_string()` first enforces + exactly 16 characters matching `^[A-Z0-9-]*$`, then `assert_eic()` verifies + the 16th character is the correct weighted-modulo-37 checksum of the first + 15 characters (see below). +- Security token: non-empty string (sourced from `Sys.getenv("ENTSOE_PAT")`) +- Time ranges: difference between `period_end` and `period_start` must not + exceed 365 days +- Categorical parameters (e.g., `business_type`, `process_type`): validated + against allowed values via `checkmate::assert_choice()` **`there_is_provider()`** (`R/utils.R`, exported): A lightweight connectivity check that sends a dummy request to the ENTSO-E API and returns `TRUE` when the server responds with HTTP 401 (meaning the endpoint is reachable but the token was rejected as expected). Returns `FALSE` when no internet connection is -available or the server is unreachable. Its primary role is as an -`@examplesIf` guard in package documentation, ensuring examples are only -executed when the API is accessible. +available or the server is unreachable. Its primary role is as an `@examplesIf` +guard in package documentation, ensuring examples are only executed when the API +is accessible. **EIC checksum validation — `assert_eic()` and `possible_eic_chars`** (`R/utils.R`): The ENTSO-E EIC standard defines a check character at position @@ -129,11 +146,11 @@ message if it does not match the actual 16th character. An optional that accept an optional EIC parameter). This function is not exported; it is called internally immediately after each `checkmate::assert_string()` EIC check. -**Step 2 — Timestamp conversion.** `url_posixct_format()` (`R/utils.R`) -converts the user-supplied `period_start` / `period_end` to the format required -by the API: `YYYYMMDDHHMM` in UTC. Accepts POSIXct objects or character strings -in nine common formats. Aborts with a clear message if the input cannot be -parsed. Warns when character input is interpreted as UTC. +**Step 2 — Timestamp conversion.** `url_posixct_format()` (`R/utils.R`) converts +the user-supplied `period_start` / `period_end` to the format required by the +API: `YYYYMMDDHHMM` in UTC. Accepts POSIXct objects or character strings in nine +common formats. Aborts with a clear message if the input cannot be parsed. Warns +when character input is interpreted as UTC. **Step 3 — Query string assembly.** Each function hard-codes the ENTSO-E document type and process type codes for its endpoint, then appends the @@ -142,7 +159,7 @@ user-supplied EIC code(s) and converted timestamps. Optional parameters (e.g., **Step 4 — Pipeline invocation.** -```r +``` r en_cont_list <- api_req_safe(query_string, security_token) extract_response(content = en_cont_list, tidy_output = tidy_output) ``` @@ -151,14 +168,14 @@ extract_response(content = en_cont_list, tidy_output = tidy_output) **Location:** `R/utils.R` -```r +``` r api_req_safe <- safely(api_req) ``` -A one-liner that wraps `api_req()` with the package-local `safely()` helper -(a lightweight `tryCatch` wrapper). All R-level exceptions are caught and -returned as `list(result = NULL, error = )` rather than halting -execution. This standardised return shape is what `extract_response()` expects. +A one-liner that wraps `api_req()` with the package-local `safely()` helper (a +lightweight `tryCatch` wrapper). All R-level exceptions are caught and returned +as `list(result = NULL, error = )` rather than halting execution. +This standardised return shape is what `extract_response()` expects. ### 1.3 `api_req()` @@ -166,41 +183,49 @@ execution. This standardised return shape is what `extract_response()` expects. The core HTTP function. Steps: -1. **URL construction.** Assembles the full URL from package-level constants - defined in `R/constants.R`: - - Scheme: `.api_scheme` (`"https://"`) - - Domain: `.api_domain` (`"web-api.tp.entsoe.eu/"`) - - Path: `.api_name` (`"api?"`) - - Appends `query_string` and `&securityToken={token}` - - Logs the URL to the console with the token replaced by `<...>` to prevent - credential leakage. +1. **URL construction.** Assembles the full URL from package-level constants + defined in `R/constants.R`: + + - Scheme: `.api_scheme` (`"https://"`) + - Domain: `.api_domain` (`"web-api.tp.entsoe.eu/"`) + - Path: `.api_name` (`"api?"`) + - Appends `query_string` and `&securityToken={token}` + - Logs the URL to the console with the token replaced by `<...>` to + prevent credential leakage. + +2. **Request configuration.** Uses `httr2::request()` with: -2. **Request configuration.** Uses `httr2::request()` with: - - Method: GET - - Verbose: response headers only (`req_verbose(header_req=FALSE, header_resp=TRUE)`) - - Timeout: `.req_timeout` seconds (60, defined in `R/constants.R`) + - Method: GET + - Verbose: response headers only + (`req_verbose(header_req=FALSE, header_resp=TRUE)`) + - Timeout: `.req_timeout` seconds (60, defined in `R/constants.R`) + - Retry: up to 3 attempts with a 10-second backoff, triggered only by HTTP + 503 (Service Unavailable) responses. Other HTTP errors are not retried. + This guards against transient server-side overload on the ENTSO-E + platform. -3. **Execution.** Sent via `safely(httr2::req_perform)` (the same package-local - wrapper) so network errors are captured, not thrown. +3. **Execution.** Sent via `safely(httr2::req_perform)` (the same package-local + wrapper) so network errors are captured, not thrown. -4. **HTTP 200 — response routing.** - - `application/zip` or `application/octet-stream`: body saved to a temp - file, then decompressed by `read_zipped_xml()`. - - `text/xml` or `application/xml`: parsed directly with - `httr2::resp_body_xml(encoding = "UTF-8")`. - - Unknown content-type: aborts with an informative message. +4. **HTTP 200 — response routing.** -5. **HTTP errors — error handling.** See section 1.4. + - `application/zip` or `application/octet-stream`: body saved to a temp + file, then decompressed by `read_zipped_xml()`. + - `text/xml` or `application/xml`: parsed directly with + `httr2::resp_body_xml(encoding = "UTF-8")`. + - Unknown content-type: aborts with an informative message. -6. **Returns** either a single `xml_document`, a list of `xml_document` objects - (paginated or zipped responses), or calls `cli::cli_abort()`. +5. **HTTP errors — error handling.** See section 1.4. + +6. **Returns** either a single `xml_document`, a list of `xml_document` objects + (paginated or zipped responses), or calls `cli::cli_abort()`. ### 1.4 Error handling | Error type | Condition | Action | -|---|---|---| +|---------------------------|---------------------------|---------------------------| | Network / R exception | `req_perform_safe()` returns `$error` | Propagated via `api_req_safe()` | -| 503 Service Unavailable | HTTP status 503 | Immediate `cli_abort()` | +| 503 Service Unavailable | HTTP status 503 | Retried up to 3 times (10 s backoff) via `req_retry()`; `cli_abort()` if all attempts fail | | HTML error page | Response body is HTML | Extract status + body, `cli_abort()` | | XML error — code 999, exceeds max | Body is XML, reason code 999, message contains "exceeds the allowed maximum" | Trigger pagination (see 1.5) | | XML error — code 999, forbidden | Same as above but query matches a forbidden pattern | `cli_abort()` with reason text | @@ -215,20 +240,20 @@ When the ENTSO-E API returns an XML error with reason code 999 and a message indicating the result set exceeds the allowed maximum, `api_req()` automatically splits the request into smaller chunks: -1. The error message is parsed with regex to extract both the *requested* and - the *allowed* document counts. -2. The number of offset requests needed is calculated: - `ceiling(docs_requested / docs_allowed)`. -3. Each offset query is built by stripping any existing `&offset=` from the - original query string and appending `&offset=0`, `&offset=N`, - `&offset=2N`, … -4. `api_req()` calls itself recursively for each offset query string. -5. All responses are collected and returned as a list, which the XML-to-table - engine processes element by element. +1. The error message is parsed with regular expression to extract both the + *requested* and the *allowed* document counts. +2. The number of offset requests needed is calculated: + `ceiling(docs_requested / docs_allowed)`. +3. Each offset query is built by stripping any existing `&offset=` from the + original query string and appending `&offset=0`, `&offset=N`, `&offset=2N`, + … +4. `api_req()` calls itself recursively for each offset query string. +5. All responses are collected and returned as a list, which the XML-to-table + engine processes element by element. Pagination is suppressed (and the request is aborted instead) for endpoints -known not to support offsets, identified by six hard-coded regex patterns -covering document types A63, A65, B09, A91, A92, and A94 with specific +known not to support offsets, identified by six hard-coded regular expression +patterns covering document types A63, A65, B09, A91, A92, and A94 with specific business or storage types. ### 1.6 `read_zipped_xml()` @@ -236,11 +261,12 @@ business or storage types. **Location:** `R/utils.R` Called when the API returns a zip archive. Decompresses the temp file with -`safely(utils::unzip)` (using the package-local wrapper), then reads each extracted XML file with -`xml2::read_xml()`. Returns a list of `xml_document` objects — the same shape -as a paginated response, so `extract_response()` handles both identically. +`safely(utils::unzip)` (using the package-local wrapper), then reads each +extracted XML file with `xml2::read_xml()`. Returns a list of `xml_document` +objects — the same shape as a paginated response, so `extract_response()` +handles both identically. ---- +-------------------------------------------------------------------------------- ## 2. XML-to-Table Engine @@ -251,12 +277,12 @@ as a paginated response, so `extract_response()` handles both identically. Entry point called by every user-facing function. Accepts the `list(result, error)` from `api_req_safe()`. -- If `$error` is not `NULL`: re-throws the error with `cli::cli_abort()`. -- If `$result` is a list (paginated or zipped): iterates with `purrr::imap()`, - calling `xml_to_table()` on each element, showing a progress bar, then - combines all results with `dplyr::bind_rows()` and converts to a tibble. -- If `$result` is a single `xml_document`: calls `xml_to_table()` directly. -- Returns a tibble, or `NULL` if the API returned no data. +- If `$error` is not `NULL`: re-throws the error with `cli::cli_abort()`. +- If `$result` is a list (paginated or zipped): iterates with `lapply()`, + calling `xml_to_table()` on each element, showing a progress bar, then + combines all results with `dplyr::bind_rows()` and converts to a tibble. +- If `$result` is a single `xml_document`: calls `xml_to_table()` directly. +- Returns a tibble, or `NULL` if the API returned no data. ### 2.2 `xml_to_table()` @@ -265,24 +291,23 @@ Entry point called by every user-facing function. Accepts the Core orchestrator. Receives a single `xml_document` and returns a tibble by running a fixed transformation sequence: -1. XML parsing → raw wide data frame -2. Date/time column merging -3. Type conversions (DateTime, numeric) -4. Column name normalization -5. Time series restructuring -6. Metadata enrichment (type names, EIC names, definitions) -7. Column whitelist filtering -8. Row ordering +1. XML parsing → raw wide data frame +2. Date/time column merging +3. Type conversions (DateTime, numeric) +4. Column name normalization +5. Time series restructuring +6. Metadata enrichment (type names, EIC names, definitions) +7. Column whitelist filtering +8. Row ordering ### 2.3 XML parsing -**Location:** `extract_leaf_twig_branch()`, `extract_nodesets()`, -`grouping_by_common_strings()` in `R/utils.R` +**Location:** `extract_leaf_twig_branch()`, `extract_nodesets()` in `R/utils.R` The ENTSO-E XML schema uses three nesting levels, which the engine labels: | Level | Definition | Example element | -|---|---|---| +|---------------------------|---------------------------|---------------------------| | Leaf | No children | `100` | | Twig | Has direct children only | `` | | Branch | Has grandchild nodes | `` | @@ -291,30 +316,26 @@ The ENTSO-E XML schema uses three nesting levels, which the engine labels: `xml2::as_list()`, constructing dotted column names from the element hierarchy (e.g., `TimeSeries.mRID`). `NULL` values become `NA_character_`. -`grouping_by_common_strings()` solves the connected-components problem: it -groups sub-tables that share column names (using a union-find algorithm) so they -can be bound horizontally with `dplyr::bind_cols()`. This handles the fact that -different ENTSO-E endpoints nest related fields at different depths. - ### 2.4 Column name normalization — `my_snakecase()` **Location:** `R/utils.R` Two-pass renaming: -**Pass 1 — domain-specific substitutions** (applied before snakecase conversion): +**Pass 1 — domain-specific substitutions** (applied before snakecase +conversion): -| Pattern | Replacement | -|---|---| -| `mRID` | `mrid` | -| `TimeSeries` | `ts` | -| `^process` | *(removed)* | -| `unavailability_Time_Period` | `unavailability` | -| XML namespace / attribute artifacts | *(removed)* | +| Pattern | Replacement | +|-------------------------------------|------------------| +| `mRID` | `mrid` | +| `TimeSeries` | `ts` | +| `^process` | *(removed)* | +| `unavailability_Time_Period` | `unavailability` | +| XML namespace / attribute artifacts | *(removed)* | **Pass 2 — standard snakecase** via `snakecase::to_snake_case()`, followed by -cleanup passes that collapse redundant fragments (e.g., -`psr_type_psr_type` → `psr_type`). +cleanup passes that collapse redundant fragments (e.g., `psr_type_psr_type` → +`psr_type`). ### 2.5 Time series handling — `tidy_or_not()` @@ -349,13 +370,13 @@ additional columns: **`add_type_names()`** (`R/utils.R`) — joins human-readable definitions from built-in package data tables (e.g., `business_types`, `asset_types`, -`process_types`) using `def_merge()`. Produces `_def` suffix columns alongside -each code column (e.g., `ts_business_type` → `ts_business_type_def`). +`process_types`) using `lookup_merge()`. Produces `_def` suffix columns +alongside each code column (e.g., `ts_business_type` → `ts_business_type_def`). **`add_eic_names()`** (`R/utils.R`) — joins EIC code long names from `area_eic()` and `resource_object_eic()` (both cached; see section 3) using -`eic_name_merge()`. Produces `_name` suffix columns alongside each `_mrid` -column (e.g., `ts_in_domain_mrid` → `ts_in_domain_name`). +`lookup_merge()`. Produces `_name` suffix columns alongside each `_mrid` column +(e.g., `ts_in_domain_mrid` → `ts_in_domain_name`). **`add_definitions()`** (`R/utils.R`) — joins further definitions: auction categories, flow directions, reason codes (with multi-code merging via `" - "` @@ -363,15 +384,15 @@ separator), and object aggregation types. ### 2.7 Column whitelist and row ordering -After enrichment, `xml_to_table()` applies a hard-coded whitelist of ~140 allowed -column names. Any column not on the list is silently dropped. This prevents -internal XML artefacts from leaking into user-visible output and keeps the API -stable across ENTSO-E schema changes. +After enrichment, `xml_to_table()` applies a hard-coded whitelist of \~140 +allowed column names. Any column not on the list is silently dropped. This +prevents internal XML artefacts from leaking into user-visible output and keeps +the API stable across ENTSO-E schema changes. Rows are then sorted by: `created_date_time`, `ts_mrid`, `ts_business_type`, `ts_mkt_psr_type`, `ts_time_interval_start`, `ts_point_dt_start` (when present). ---- +-------------------------------------------------------------------------------- ## 3. Caching System @@ -381,7 +402,7 @@ The package maintains two independent in-memory caches, both with a 1-hour maximum age: | Object | Initialised in | Caches | -|---|---|---| +|---------------------------|---------------------------|---------------------------| | `m` | `R/utils.R` (top of file) | EIC name lookup tables used during XML enrichment | | `mh` | `R/en_helpers.R` (top of file) | Full EIC code tibbles downloaded by `*_eic()` functions | @@ -393,21 +414,21 @@ not user-configurable. **Via `mh`** (one key per EIC function): -| Cache key | Source | Function | -|---|---|---| -| `party_eic_df_key` | CSV download | `party_eic()` | -| `area_eic_df_key` | CSV download | `area_eic()` | -| `accounting_point_eic_df_key` | CSV download | `accounting_point_eic()` | -| `tie_line_eic_df_key` | CSV download | `tie_line_eic()` | -| `location_eic_df_key` | CSV download | `location_eic()` | -| `resource_object_eic_df_key` | CSV download | `resource_object_eic()` | -| `substation_eic_df_key` | CSV download | `substation_eic()` | -| `all_allocated_eic_df_key` | XML download + parse | `all_allocated_eic()` | +| Cache key | Source | Function | +|-------------------------------|----------------------|--------------------------| +| `party_eic_df_key` | CSV download | `party_eic()` | +| `area_eic_df_key` | CSV download | `area_eic()` | +| `accounting_point_eic_df_key` | CSV download | `accounting_point_eic()` | +| `tie_line_eic_df_key` | CSV download | `tie_line_eic()` | +| `location_eic_df_key` | CSV download | `location_eic()` | +| `resource_object_eic_df_key` | CSV download | `resource_object_eic()` | +| `substation_eic_df_key` | CSV download | `substation_eic()` | +| `all_allocated_eic_df_key` | XML download + parse | `all_allocated_eic()` | **Via `m`** (used inside the XML-to-table engine): | Cache key | Content | -|---|---| +|----------------------------------------|----------------------------------------| | `area_eic_name_key` | Subset of `area_eic()`: EicCode + EicLongName columns | | `resource_object_eic_name_key` | Subset of `resource_object_eic()`: EicCode + EicLongName columns | @@ -419,14 +440,14 @@ reference data (EIC registries, type definitions) is cached. All EIC functions use the same template: -```r +``` r cache_key <- "unique_key_name" if (mh$exists(key = cache_key)) { res_df <- mh$get(key = cache_key, missing = fallback_expr) - cli::cli_alert_info("pulling {f} file from cache") + cli_alert_info("pulling {f} file from cache") } else { - cli::cli_alert_info("downloading {f} file ...") + cli_alert_info("downloading {f} file ...") res_df <- download_and_transform() mh$set(key = cache_key, value = res_df) } @@ -437,13 +458,13 @@ the console. ### 3.4 Double-caching during EIC name enrichment -`add_eic_names()` calls `get_area_eic_name()` and `get_resource_object_eic()`, -which use cache `m`. If `m` misses, those helpers call `area_eic()` / -`resource_object_eic()`, which use cache `mh`. This means the same underlying -data may be stored at two levels simultaneously: +`add_eic_names()` calls `get_resource_object_eic()` (cache `m`), and fetches +area EIC names inline (cache `m`, falling back to `area_eic()` on cache miss, +which uses cache `mh`). This means the same underlying data may be stored at two +levels simultaneously: -- `mh` holds the full EIC tibble (all columns). -- `m` holds a narrowed subset (EicCode + EicLongName only) ready for joining. +- `mh` holds the full EIC tibble (all columns). +- `m` holds a narrowed subset (EicCode + EicLongName only) ready for joining. After both caches are warm, subsequent API calls within the same session perform zero downloads for EIC enrichment. @@ -455,13 +476,13 @@ Invalidation is entirely automatic. `cachem` expires entries silently after to disable caching, and no cache versioning. Restarting the R session clears both caches. ---- +-------------------------------------------------------------------------------- ## 4. End-to-End Data Flow The following traces a call to `load_actual_total()`: -``` +``` load_actual_total(eic, period_start, period_end, tidy_output = TRUE) │ ├─ checkmate: assert EIC format, token presence, ≤365-day range @@ -471,7 +492,7 @@ load_actual_total(eic, period_start, period_end, tidy_output = TRUE) └─ api_req_safe(query_string, security_token) └─ api_req() ├─ Build URL: https://web-api.tp.entsoe.eu/api?{query}&securityToken=<...> - ├─ GET, 60s timeout, log masked URL + ├─ GET, 60s timeout, retry 3× on 503 (10s backoff), log masked URL ├─ HTTP 200 / text/xml → resp_body_xml() [or zip → read_zipped_xml()] └─ HTTP error → calc_offset_urls() + recurse [or cli_abort()] │ @@ -485,26 +506,27 @@ load_actual_total(eic, period_start, period_end, tidy_output = TRUE) ├─ Convert DateTime → POSIXct(UTC), numeric columns → numeric ├─ my_snakecase() → normalised column names ├─ tidy_or_not() → one row per data point (A01/A03 handled) - ├─ add_type_names() → join built-in type tables (no network) - ├─ add_eic_names() → get_area_eic_name() [cache m / mh] + ├─ add_type_names() → join built-in type tables (no network) + ├─ add_eic_names() → get_resource_object_eic() [cache m] ├─ add_definitions() → join built-in definition tables (no network) ├─ Filter to whitelist columns └─ Sort rows │ - └─ rbindlist() + as_tibble() if multiple XML docs + └─ dplyr::bind_rows() + as_tbl() if multiple XML docs │ └─ tibble returned to user (or NULL) ``` ---- +-------------------------------------------------------------------------------- ## 5. Configuration Reference | Setting | Value | Location | -|---|---|---| +|---------------------------|---------------------------|---------------------------| | API base URL | `https://web-api.tp.entsoe.eu/api?` | `.api_scheme`, `.api_domain`, `.api_name` in `R/constants.R` | | HTTP method | GET | `api_req()` in `R/utils.R` | | HTTP timeout | 60 seconds (`.req_timeout`) | `R/constants.R`, applied in `api_req()` | +| Retry on 503 | Up to 3 attempts, 10-second backoff | `req_retry()` in `api_req()` | | Security token env var | `ENTSOE_PAT` | All user-facing functions | | Verbose logging | Response headers only | `api_req()` in `R/utils.R` | | Cache max age | 3600 seconds / 1 hour (`.max_age`) | `R/constants.R`, applied in `R/utils.R` and `R/en_helpers.R` | @@ -513,12 +535,12 @@ load_actual_total(eic, period_start, period_end, tidy_output = TRUE) | XML encoding | UTF-8 | `api_req()` and `xml_to_table()` | | ZIP content types | `application/zip`, `application/octet-stream` | `api_req()` in `R/utils.R` | ---- +-------------------------------------------------------------------------------- ## 6. Code References | Component | File | Key Symbols | -|---|---|---| +|---------------------------|---------------------------|---------------------------| | Package constants | `R/constants.R` | `.api_scheme`, `.api_domain`, `.api_name`, `.req_timeout`, `.max_age` | | EIC checksum validation | `R/utils.R` | `assert_eic()`, `possible_eic_chars` | | Provider check | `R/utils.R` | `there_is_provider()` | @@ -530,21 +552,21 @@ load_actual_total(eic, period_start, period_end, tidy_output = TRUE) | Pagination | `R/utils.R` | `calc_offset_urls()` | | XML engine entry | `R/utils.R` | `extract_response()` | | XML engine core | `R/utils.R` | `xml_to_table()` | -| XML parsing | `R/utils.R` | `extract_leaf_twig_branch()`, `extract_nodesets()`, `grouping_by_common_strings()` | +| XML parsing | `R/utils.R` | `extract_leaf_twig_branch()`, `extract_nodesets()` | | Column naming | `R/utils.R` | `my_snakecase()` | | Time series | `R/utils.R` | `tidy_or_not()` | -| Type enrichment | `R/utils.R` | `add_type_names()`, `def_merge()` | -| EIC enrichment | `R/utils.R` | `add_eic_names()`, `eic_name_merge()`, `get_area_eic_name()`, `get_resource_object_eic()` | +| Type enrichment | `R/utils.R` | `add_type_names()`, `lookup_merge()` | +| EIC enrichment | `R/utils.R` | `add_eic_names()`, `lookup_merge()`, `get_resource_object_eic()` | | Definition enrichment | `R/utils.R` | `add_definitions()` | | EIC download functions | `R/en_helpers.R` | `party_eic()`, `area_eic()`, `resource_object_eic()`, `all_allocated_eic()`, et al. | | Built-in type tables | `R/data.R` | `asset_types`, `business_types`, `process_types`, `message_types`, et al. | ---- +-------------------------------------------------------------------------------- ## 7. Glossary | Term | Definition | -|---|---| +|----------------------------------------|----------------------------------------| | EIC | Energy Identification Code — a 16-character alphanumeric code (digits, uppercase letters, `-`) identifying market participants, bidding zones, transmission lines, etc. on the ENTSO-E platform; the 16th character is a weighted-modulo-37 checksum of the first 15 | | Document type | A 3-character ENTSO-E code (e.g., A65) identifying the category of data being requested | | Process type | A 3-character ENTSO-E code (e.g., A16) qualifying the sub-type of a document type | diff --git a/vignettes/da-price-spread-vignette.Rmd b/vignettes/da-price-spread-vignette.Rmd index e8926e2d..e1f09142 100644 --- a/vignettes/da-price-spread-vignette.Rmd +++ b/vignettes/da-price-spread-vignette.Rmd @@ -5,11 +5,15 @@ vignette: > %\VignetteIndexEntry{Day-Ahead Price Spread Vignette} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} +editor_options: + markdown: + wrap: 80 --- ```{r setup, include = FALSE} run_chunks <- identical(Sys.getenv("NOT_CRAN"), "true") && - nchar(Sys.getenv("ENTSOE_PAT")) > 0L + nchar(Sys.getenv("ENTSOE_PAT")) > 0L && + entsoeapi::there_is_provider() knitr::opts_chunk$set( collapse = TRUE, @@ -22,22 +26,23 @@ knitr::opts_chunk$set( ) ``` -```{r load packages} +```{r load-packages} library(entsoeapi) suppressPackageStartupMessages(library(dplyr)) suppressPackageStartupMessages(library(lubridate)) +library(kableExtra) library(cli) library(ggplot2) ``` ### Look for the Polish market EIC and set the start and the end of scope dates -```{r preset constants} +```{r preset-constants} pl_eic <- all_approved_eic() |> filter(eic_long_name == "Poland") |> pull(eic_code) -from_ts <- force_tz(time = as.Date("2026-01-01"), tzone = "CET") +from_ts <- ymd(x = "2026-01-01", tz = "CET") till_ts <- from_ts + weeks(x = 1L) cli_inform("Polish EIC: '{pl_eic}'") @@ -47,7 +52,7 @@ cli_inform("till: {till_ts}") ### Query the Polish DA prices within the pre-set period -```{r query da prices} +```{r query-da-prices} da_prices <- energy_prices( eic = pl_eic, period_start = from_ts, @@ -60,25 +65,29 @@ glimpse(da_prices) ### Calculate the daily minimum and maximum prices and the spread -```{r calculate the daily spreads} +```{r calculate-the-daily-spreads} da_spreads <- da_prices |> - select(c(ts_point_dt_start, ts_point_price_amount)) |> mutate( ts_point_dt_start = with_tz(time = ts_point_dt_start, tzone = "CET") ) |> - mutate(ts_point_date = as.Date(x = ts_point_dt_start, tz = "CET")) |> - group_by(ts_point_date) |> + mutate( + ts_point_date = as.Date(x = ts_point_dt_start, tz = "CET") + ) |> summarise( min_price = min(ts_point_price_amount, na.rm = TRUE), max_price = max(ts_point_price_amount, na.rm = TRUE), - .groups = "drop" + .by = ts_point_date ) |> mutate(price_spread = max_price - min_price) + +da_spreads |> + kbl(format = "pipe") |> + cat(sep = "\n") ``` ### Plot the daily minimum and maximum prices and the spread -```{r plot the daily spreads} +```{r plot-the-daily-spreads} print( ggplot(data = da_spreads) + geom_segment( diff --git a/vignettes/generation-mix-analysis.Rmd b/vignettes/generation-mix-analysis.Rmd new file mode 100644 index 00000000..1852850d --- /dev/null +++ b/vignettes/generation-mix-analysis.Rmd @@ -0,0 +1,419 @@ +--- +title: "Generation Mix Analysis" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Generation Mix Analysis} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +editor_options: + markdown: + wrap: 80 +--- + +```{r setup, include = FALSE} +run_chunks <- identical(Sys.getenv("NOT_CRAN"), "true") && + nchar(Sys.getenv("ENTSOE_PAT")) > 0L && + entsoeapi::there_is_provider() + +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + eval = run_chunks, + fig.width = 10, + fig.height = 6, + out.width = "98%", + out.height = "98%" +) +``` + +```{r load-packages} +library(entsoeapi) +suppressPackageStartupMessages(library(dplyr)) +suppressPackageStartupMessages(library(lubridate)) +library(cli) +suppressPackageStartupMessages(library(kableExtra)) +library(ggplot2) +``` + +## Introduction + +This vignette demonstrates how to analyze electricity generation mix data using +the entsoeapi package. We'll fetch generation by production type for Germany and +create visualizations to understand the composition of the power supply. + +This vignette covers: + +- Fetching generation data by production type +- Understanding production type codes +- Visualizing the generation mix over time +- Calculating renewable penetration +- Comparing different time periods + +## Generation Data Overview + +The entsoeapi package provides several functions for generation data: + +| Function | Description | Typical Use | +|----|----|----| +| `gen_per_prod_type()` | Generation by production type | Main analysis function | +| `gen_installed_capacity_per_pt()` | Installed capacity by type | Capacity analysis | +| `gen_installed_capacity_per_pu()` | Capacity per production unit | Detailed analysis | +| `gen_per_gen_unit()` | Generation per unit | Unit-level data | +| `gen_day_ahead_forecast()` | Day-ahead forecasts | Forecasting | +| `gen_wind_solar_forecasts()` | Wind/solar forecasts | Renewable forecasting | +| `gen_storage_mean_filling_rate()` | Storage filling rates | Storage analysis | + +## Fetching Generation Data + +### Finding Germany's EIC Code + +First, let's get Germany's bidding zone EIC code: + +```{r find-german-eic} +# Germany's bidding zone EIC +de_zone <- area_eic() |> + filter(eic_long_name == "Germany") |> + pull(eic_code) + +cli_h1("Germany Bidding Zone") +cli_text("EIC: {de_zone}") +``` + +### Production Type Codes + +ENTSO-E uses standard production type codes: + +```{r list-production-types} +asset_types |> + filter(startsWith(x = code, prefix = "B")) |> + select(c(code, title)) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### Fetching Generation by Production Type + +Use `gen_per_prod_type()` to get generation data: + +```{r fetch-generation, warning = FALSE} +cli_h1("Fetching Generation Data") + +# Define time range +from_ts <- ymd(x = "2024-06-01", tz = "CET") +till_ts <- from_ts + days(7) + +cli_inform("Period: {from_ts} to {till_ts}") + +# Fetch generation by production type +de_generation <- gen_per_prod_type( + eic = de_zone, + period_start = from_ts, + period_end = till_ts, + tidy_output = TRUE +) + +cli_alert_success("Retrieved {nrow(de_generation)} data points") +``` + +## Exploring the Data + +### Understanding Output Columns + +The output includes many columns with production type information: + +```{r explore-columns} +glimpse(de_generation) +``` + +Key columns for generation mix analysis: + +| Column | Description | +|-----------------------|-------------------------------------------| +| `ts_point_dt_start` | Timestamp | +| `ts_point_quantity` | Generation quantity | +| `ts_mkt_psr_type` | Power system resource type code (B01-B20) | +| `ts_mkt_psr_type_def` | Human-readable power system resource type | + +### Production Types in the Data + +Check which production types are present: + +```{r production-types} +cli_h1("Production Types in Dataset") + +de_generation |> + summarize( + n_points = n(), + total_mwh = sum(x = ts_point_quantity, na.rm = TRUE) |> round(), + .by = c(ts_mkt_psr_type, ts_mkt_psr_type_def) + ) |> + arrange(desc(total_mwh)) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +## Visualization + +### Stacked Area Chart of Generation Mix + +Visualize the composition of the generation mix over time: + +```{r stacked-area-chart} +# Prepare data for visualization +gen_plot_data <- de_generation |> + mutate( + ts_point_dt_start = with_tz(ts_point_dt_start, tzone = "CET"), + date = as_date(x = ts_point_dt_start, tz = "CET"), + hour = with_tz(time = ts_point_dt_start, tzone = "CET") |> hour() + ) |> + summarize( + total_mwh = sum(ts_point_quantity, na.rm = TRUE), + .by = c(date, hour, ts_mkt_psr_type_def) + ) |> + mutate(datetime = ymd_h(paste(date, hour), tz = "CET")) + +# Define colors for production types +production_colors <- c( + "Biomass" = "#FF6347", + "Fossil Brown coal/Lignite" = "#8B4513", + "Fossil Coal-derived gas" = "#D93333", + "Fossil Gas" = "#4682B4", + "Fossil Hard coal" = "#2F4F4F", + "Fossil Oil" = "#256232", + "Geothermal" = "#00FFFF", + "Hydro Run-of-river head installation" = "#4169E1", + "Hydro-electric pure pumped storage head installation" = "#1E90FF", + "Hydro-electric storage head installation" = "#87CEEB", + "Other renewable" = "#90EE90", + "Other unspecified" = "#808080", + "Solar unspecified" = "#FFD700", + "Waste" = "#A9A9A9", + "Wind Offshore" = "#00CED1", + "Wind Onshore" = "#32CD32" +) + +# Create stacked area plot +ggplot( + data = gen_plot_data, + mapping = aes( + x = datetime, + y = total_mwh, + fill = ts_mkt_psr_type_def + ) +) + + geom_area(position = "stack") + + scale_fill_manual( + values = production_colors, + name = NULL + ) + + labs( + title = "Germany: Daily Generation Mix Over Time (Stacked Area)", + subtitle = "June 2024 - first 7 days", + x = "Date", + y = "Generation (MWh)" + ) + + theme_minimal() + + theme(legend.position = "bottom") + + scale_x_datetime(date_breaks = "1 day", date_labels = "%b %d") + + scale_y_continuous(labels = scales::comma) +``` + +### Daily Average Generation by Type + +Compare average daily generation by production type: + +```{r daily-average} +# Calculate daily averages +daily_avg <- de_generation |> + mutate(date = as.Date(x = ts_point_dt_start, tz = "CET")) |> + summarize( + avg_mw = mean(ts_point_quantity, na.rm = TRUE), + .by = c(date, ts_mkt_psr_type_def) + ) |> + summarize( + avg_daily_mwh = mean(avg_mw) * 24, + .by = ts_mkt_psr_type_def + ) |> + arrange(desc(avg_daily_mwh)) + +# Bar chart +ggplot( + data = daily_avg, + mapping = aes( + x = reorder(ts_mkt_psr_type_def, avg_daily_mwh), + y = avg_daily_mwh + ) +) + + geom_col(fill = "steelblue") + + labs( + title = "Germany: Average Daily Generation by Production Type", + subtitle = "June 2024", + x = "Production Type", + y = "Average Daily Generation (MWh)" + ) + + scale_y_continuous(labels = scales::comma) + + theme_minimal() + + coord_flip() +``` + +## Renewable Penetration Analysis + +Calculate the share of renewable generation: + +```{r renewable-penetration} +# Define renewable types +renewable_types <- c( + "Biomass", + "Solar unspecified", + "Wind Offshore", + "Wind Onshore", + "Hydro Run-of-river head installation", + "Hydro-electric pure pumped storage head installation", + "Hydro-electric storage head installation", + "Geothermal", + "Other renewable" +) + +# Calculate penetration +gen_summary <- de_generation |> + mutate( + is_renewable = ts_mkt_psr_type_def %in% renewable_types, + date = as.Date(x = ts_point_dt_start, tz = "CET") + ) |> + summarize( + total_gen = sum(ts_point_quantity, na.rm = TRUE), + renewable_gen = sum(ts_point_quantity[is_renewable], na.rm = TRUE), + renewable_pct = renewable_gen / total_gen * 100, + .by = date + ) + +print(gen_summary) + +# Daily renewable share +ggplot( + data = gen_summary, + mapping = aes(x = date, y = renewable_pct) +) + + geom_col(fill = "forestgreen", alpha = 0.7) + + labs( + title = "Germany: Daily Renewable Penetration", + subtitle = "June 2024 - first 7 days", + x = "Date", + y = "Renewable Share (%)" + ) + + ylim(0, 100) + + scale_x_date(date_breaks = "1 day", date_labels = "%b %d") + + theme_minimal() +``` + +## Capacity Factor Analysis + +Calculate capacity factors for renewable generation: + +```{r capacity-factor} +# Get installed capacity (simplified - uses example values) +cli_inform( + "Note: Full capacity analysis requires matching installed capacity data" +) + +# Estimate capacity factors for solar and wind +capacity_estimates <- de_generation |> + filter( + ts_mkt_psr_type_def %in% c( + "Solar unspecified", "Wind Onshore", "Wind Offshore" + ) + ) |> + mutate(date = as.Date(x = ts_point_dt_start, tz = "CET")) |> + summarize( + max_mw = max(ts_point_quantity, na.rm = TRUE), + avg_mw = mean(ts_point_quantity, na.rm = TRUE), + .by = c(date, ts_mkt_psr_type_def) + ) |> + mutate(capacity_factor = avg_mw / max_mw) + +ggplot( + data = capacity_estimates, + mapping = aes( + x = date, + y = capacity_factor, + color = ts_mkt_psr_type_def + ) +) + + geom_line() + + facet_wrap(~ts_mkt_psr_type_def, scales = "free_y") + + labs( + title = "Germany: Estimated Capacity Factors by Day", + subtitle = "Ratio of average to peak generation", + x = "Date", + y = "Capacity Factor" + ) + + theme_minimal() + + theme( + legend.position = "bottom", + legend.title = element_blank() + ) + + ylim(0, 1) +``` + +## Comparing Time Periods + +Compare generation mix between different time periods: + +```{r compare-periods, warning = FALSE} +# Fetch data for two weeks +week1 <- gen_per_prod_type( + eic = de_zone, + period_start = lubridate::ymd(x = "2024-06-01", tz = "CET"), + period_end = lubridate::ymd(x = "2024-06-08", tz = "CET"), + tidy_output = TRUE +) |> + mutate(period = "Week 1") + +week2 <- gen_per_prod_type( + eic = de_zone, + period_start = lubridate::ymd(x = "2024-06-15", tz = "CET"), + period_end = lubridate::ymd(x = "2024-06-22", tz = "CET"), + tidy_output = TRUE +) |> + mutate(period = "Week 3") + +# Combine and compare +combined <- bind_rows(week1, week2) |> + summarize( + total_gen = sum(ts_point_quantity, na.rm = TRUE), + .by = c(period, ts_mkt_psr_type_def) + ) |> + mutate( + pct = total_gen / sum(total_gen) * 100, + .by = period + ) + +# Plot comparison +ggplot( + data = combined, + mapping = aes(x = ts_mkt_psr_type_def, y = pct, fill = period) +) + + geom_col(position = "dodge") + + coord_flip() + + labs( + title = "Germany: Generation Mix Comparison", + subtitle = "Week 1st vs Week 3rd of June 2024", + x = "Production Type", + y = "Share of Generation (%)" + ) + + theme_minimal() +``` + +## Summary + +This vignette demonstrated: + +1. **Fetching generation data** with `gen_per_prod_type()` +2. **Understanding production type codes** from B01 to B24 +3. **Visualizing the generation mix** with stacked area charts +4. **Calculating renewable penetration** over time +5. **Capacity factor estimation** for renewable sources +6. **Comparing time periods** to identify trends + +The entsoeapi package makes it straightforward to analyse European electricity +generation data for research, policy analysis, or business intelligence. diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd new file mode 100644 index 00000000..e2e3267e --- /dev/null +++ b/vignettes/getting-started.Rmd @@ -0,0 +1,284 @@ +--- +title: "Getting Started with entsoeapi" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Getting Started with entsoeapi} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +editor_options: + markdown: + wrap: 80 +--- + +```{r setup, include = FALSE} +run_chunks <- identical(Sys.getenv("NOT_CRAN"), "true") && + nchar(Sys.getenv("ENTSOE_PAT")) > 0L && + entsoeapi::there_is_provider() + +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + eval = run_chunks, + fig.width = 10, + fig.height = 6, + out.width = "98%", + out.height = "98%" +) +``` + +## Introduction + +The `entsoeapi` package provides a standardized R interface to the ENTSO-E +(European Network of Transmission System Operators for Electricity) Transparency +Platform API. This platform publishes electricity market, generation, load, +transmission, outage, and balancing data for European countries. + +This vignette will guide you through: + +- Setting up the package and obtaining API access +- Checking connectivity and platform news +- Finding Energy Identification Codes (EICs) for areas of interest +- Making your first API call +- Understanding the output structure + +## Prerequisites + +### Obtaining an API Token + +To use the ENTSO-E Transparency Platform API, you need a security token: + +1. Register at +2. After registration, your token should be available in your profile +3. Set the `ENTSOE_PAT` environment variable in your .Renviron file: + +```{r set-env-token, eval = FALSE} +usethis::edit_r_environ() +``` + +For persistent access, add this line to your `.Renviron` file: + +```{bash, eval = FALSE} +# In your ~/.Renviron file: +ENTSOE_PAT=your_token_here +``` + +## Installation + +Install the development version from GitHub: + +```{r install-pkg, eval = FALSE} +# Install from GitHub +remotes::install_github("krose/entsoeapi") + +# Or install from CRAN (when available) +install.packages("entsoeapi") +``` + +Load the packages: + +```{r load-packages} +library(entsoeapi) +suppressPackageStartupMessages(library(dplyr)) +library(cli) +library(lubridate) +library(kableExtra) +``` + +### Testing Connectivity + +The `there_is_provider()` function checks if the ENTSO-E API is reachable: + +```{r test-connectivity} +# Check if the API is accessible +there_is_provider() +``` + +### Checking Platform News + +The `get_news()` function fetches the latest announcements from the ENTSO-E +Transparency Platform RSS feed. This is useful for checking planned maintenance +windows or data publication delays before running a batch of queries: + +```{r check-news} +# Show the latest 3 news items +get_news(n = 3L) +``` + +The result is returned invisibly as a tibble, so you can also capture and filter +it: + +```{r capture-news, eval = FALSE} +news <- get_news(n = 10L) +news |> + subset(subset = grepl(pattern = "maintenance", x = title, ignore.case = TRUE)) +``` + +## Finding EIC Codes + +Energy Identification Codes (EICs) are 16-character codes that uniquely identify +market participants, bidding zones, transmission lines, and other entities on +the ENTSO-E platform. + +The `all_approved_eic()` function returns a comprehensive list of approved EIC +codes: + +```{r find-germany-eic} +# Find Germany's bidding zone EIC +germany_eic <- all_approved_eic() |> + filter(eic_long_name == "Germany_Luxemburg") |> + pull(eic_code) + +cli_h1("Germany Bidding Zone EIC") +cli_text(germany_eic) +``` + +The `area_eic()` function provides a focused lookup for bidding zones: + +```{r area-eic} +# Get all bidding zones +bidding_zones <- area_eic() +cli_h1("Available Bidding Zones") +cli_text("Total zones: {nrow(bidding_zones)}") +bidding_zones |> + select(eic_code, eic_long_name) |> + head(10) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +## Your First API Call + +Let's fetch day-ahead energy prices for Germany. The `energy_prices()` function +queries the market data endpoint: + +```{r first-api-call} +# Define time range (one week of data) +from_ts <- ymd("2026-01-01", tz = "CET") +till_ts <- from_ts + weeks(1L) + +cli_h1("Querying Day-Ahead Energy Prices") +cli_inform("Period: {from_ts} to {till_ts}") +cli_inform("Area: {germany_eic}") + +# Fetch the data +da_prices <- energy_prices( + eic = germany_eic, + period_start = from_ts, + period_end = till_ts, + contract_type = "A01", # Day-ahead + tidy_output = TRUE, + security_token = Sys.getenv("ENTSOE_PAT") +) + +cli_alert_success("Retrieved {nrow(da_prices)} rows") +``` + +### Understanding the Output + +The `tidy_output = TRUE` format returns one row per data point: + +```{r explore-output} +# Examine the structure +glimpse(da_prices) +``` + +Key columns in the output: + +| Column | Description | +|----------------------------|------------------------------------------| +| `ts_point_dt_start` | Timestamp for each data point | +| `ts_point_price_amount` | Price value | +| `ts_resolution` | Time resolution (e.g., PT60M for hourly) | +| `ts_business_type` | Type of price (Spot price = A62) | +| `ts_market_agreement_type` | Contract type (A01 = Daily contract) | + +### The Nested Alternative + +With `tidy_output = FALSE`, the data is structured differently: + +```{r nested-output} +# Same query with nested output +da_prices_nested <- energy_prices( + eic = germany_eic, + period_start = from_ts, + period_end = till_ts, + contract_type = "A01", + tidy_output = FALSE +) + +glimpse(da_prices_nested) +``` + +With nested output, each row represents a time period, and all data points are +stored in the `ts_point` list-column. This format preserves the original API +structure and can be more efficient for certain operations. + +## Common Patterns + +### Setting Date Ranges + +Use lubridate for robust date handling: + +```{r date-ranges} +# Various date range examples +last_week <- ymd(Sys.Date()) - days(7L) +today <- ymd(Sys.Date()) + +# Month of data +month_start <- floor_date(Sys.Date(), "month") - months(1L) +month_end <- floor_date(Sys.Date(), "month") - days(1L) +``` + +### The One-Year Limit + +Most ENTSO-E endpoints enforce a maximum query range of one year. The package +handles this transparently through automatic pagination: + +```{r one-year-query, warning = FALSE} +# Query close to the one-year limit +year_start <- ymd("2024-01-01", tz = "CET") +year_end <- ymd("2024-12-31", tz = "CET") + +# The package will handle pagination automatically if needed +# This may take longer for large datasets +cli_warn("Querying a full year - this may take a moment") + +da_prices_year <- energy_prices( + eic = germany_eic, + period_start = year_start, + period_end = year_end, + contract_type = "A01", + tidy_output = TRUE +) + +cli_alert_success("Retrieved {nrow(da_prices_year)} rows for the year") +``` + +### Timezone Considerations + +All timestamps are returned in UTC. Use lubridate to convert: + +```{r timezones} +# Convert timestamps to CET/CEST +da_prices |> + mutate( + ts_point_dt_start_cet = with_tz(time = ts_point_dt_start, tzone = "CET") + ) |> + select(ts_point_dt_start, ts_point_dt_start_cet, ts_point_price_amount) |> + head(n = 5L) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +## What's Next? + +Now that you've made your first API call, explore these topics: + +- **EIC Codes**: The `area_eic()`, `tie_line_eic()`, and other EIC functions + help you find codes for specific regions and entities +- **Time Series Restructuring**: Understanding the `tidy_output` parameter + helps you work with the data effectively +- **Generation Mix**: Analyze generation by production type with + `gen_per_prod_type()` +- **Architecture**: See the package architecture vignette for details on how + the package works internally diff --git a/vignettes/time-series-restructuring.Rmd b/vignettes/time-series-restructuring.Rmd new file mode 100644 index 00000000..09a9c6d9 --- /dev/null +++ b/vignettes/time-series-restructuring.Rmd @@ -0,0 +1,445 @@ +--- +title: "Time Series Restructuring Explained" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Time Series Restructuring Explained} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + markdown: + wrap: 80 +--- + +```{r setup, include = FALSE} +run_chunks <- identical(Sys.getenv("NOT_CRAN"), "true") && + nchar(Sys.getenv("ENTSOE_PAT")) > 0L && + entsoeapi::there_is_provider() + +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + eval = run_chunks, + fig.width = 10, + fig.height = 6, + out.width = "98%", + out.height = "98%" +) +``` + +```{r load-packages} +library(entsoeapi) +suppressPackageStartupMessages(library(dplyr)) +library(cli) +library(lubridate) +library(kableExtra) +library(tidyr) +``` + +## Introduction + +The ENTSO-E API encodes time series data in a compact XML format that differs +from the typical "one row per observation" structure used in most R analysis +workflows. This vignette explains how the entsoeapi package transforms this +compact representation into analysis-ready tibbles. + +This vignette covers: + +- The compact time series encoding problem +- The `tidy_output` parameter and its two formats +- Curve types A01 (fix sized blocks) and A03 (variable sized blocks) +- Supported time resolutions +- Practical examples for both output formats + +## The Compact Encoding Problem + +ENTSO-E XML responses encode time series efficiently: + +``` xml + + PT60M + + 2024010100000 + 2024010200000 + + + 1 + 150.25 + + + 2 + 152.10 + + + +``` + +Instead of storing 24 timestamps, the API stores: + +1. A start time (`2024-01-01 00:00:00`) +2. A resolution (`PT60M` = 60 minutes = 1 hour) +3. Positions (1, 2, 3, ...) mapped to actual times +4. Values at each position + +This compact format is efficient for data transmission but requires +reconstruction for analysis in R. + +## The tidy_output Parameter + +Most functions in entsoeapi accept a `tidy_output` parameter: + +| Setting | Output Format | Best For | +|----------------------|----------------------------------|------------------------| +| `tidy_output = TRUE` (default) | One row per data point | Analysis, plotting, aggregation | +| `tidy_output = FALSE` | One row per period, nested points | Large datasets, preserving structure | + +### tidy_output = TRUE: One Row Per Point + +With `tidy_output = TRUE`, each row represents a single data point: + +```{r tidy-output-true, warning = FALSE} +# Define parameters +es_zone <- "10YES-REE------0" +from_ts <- ymd(x = "2024-01-01", tz = "CET") +till_ts <- from_ts + days(1L) + +cli_h1("tidy_output = TRUE (Default)") + +# Fetch with tidy output +da_prices_tidy <- energy_prices( + eic = es_zone, + period_start = from_ts, + period_end = till_ts, + contract_type = "A01", + tidy_output = TRUE +) + +cli_text("Rows: {nrow(da_prices_tidy)}") +cli_text("Columns: {ncol(da_prices_tidy)}") + +# Examine structure +da_prices_tidy |> + mutate( + ts_point_dt_start = with_tz(time = ts_point_dt_start, tzone = "CET") + ) |> + select( + ts_point_dt_start, + ts_point_price_amount, + ts_resolution + ) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +Key columns in tidy output: + +| Column | Description | +|-------------------------|--------------------------------------------| +| `ts_point_dt_start` | Reconstructed timestamp for each point | +| `ts_point_price_amount` | The actual value | +| `ts_point_position` | Original position in the series (internal) | + +### tidy_output = FALSE: Nested Output + +With `tidy_output = FALSE`, each row represents a time period with all data +points nested in a list-column: + +```{r tidy-output-false, warning = FALSE} +cli_h1("tidy_output = FALSE (Nested)") + +# Fetch with nested output +da_prices_nested <- energy_prices( + eic = es_zone, + period_start = from_ts, + period_end = till_ts, + contract_type = "A01", + tidy_output = FALSE +) + +cli_text("Rows: {nrow(da_prices_nested)}") +cli_text("Columns: {ncol(da_prices_nested)}") + +# Examine structure +da_prices_nested |> + mutate( + ts_time_interval_start = with_tz( + time = ts_time_interval_start, + tzone = "CET" + ) + ) |> + select( + ts_time_interval_start, + ts_resolution, + ts_point + ) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +Each row contains a `ts_point` list-column with nested data: + +```{r nested-detail} +# Extract first period's points +da_prices_nested$ts_point[[1]] |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### When to Use Each Format + +Use **tidy output** when you need to: + +- Plot time series directly with ggplot2 +- Aggregate to different time grains (hourly, daily, weekly) +- Filter by specific timestamps +- Join with other tidy data +- Perform standard dplyr operations + +Use **nested output** when you need to: + +- Work with large datasets efficiently +- Preserve the original API structure +- Perform operations on entire periods at once +- Work with hierarchical/nested data structures + +## Curve Types + +ENTSO-E distinguishes between five curve types that affect how data is +structured. + +- A01 – SEQUENTIAL FIXED SIZE BLOCKS +- A02 – POINT +- A03 – VARIABLE SIZED BLOCK +- A04 – OVERLAPPING BREAKPOINT +- A05 – NON-OVERLAPPING BREAKPOINT + +See details +[here](https://eepublicdownloads.entsoe.eu/clean-documents/EDI/Library/cim_based/Introduction_of_different_Timeseries_possibilities__curvetypes__with_ENTSO-E_electronic_document_v1.4.pdf). + +We implemented the processing of A01 and A03 curve types so far. These 2 proved +to be sufficient till this point. + +### A01: Sequential Fixed Size Blocks + +Curve type A01 represents regular, evenly-spaced data points: + +``` xml +1 +A01 +A08 +10YFR-RTE------C +MAW +A01 + B10 + + 2020-01-31T23:00Z 2020-02-01T23:00Z + PT60M + 1 1466 + 2 2023 + 3 2365 + 4 3027 + 5 3247 + 6 3179 + 7 2871 + 8 2179 + 9 1152 + 10 395 + 11 67 + 12 75 + 13 494 + 14 1297 + 15 2230 + 16 2798 + 17 2897 + 18 1572 + 19 1033 + 20 1070 + 21 1587 + 22 2058 + 23 1641 + 24 547 + +``` + +A01 data has consistent intervals between points (e.g., every 60 minutes). + +### A03: Variable Sized Blocks + +Curve type A03 represents variable sized block data where some positions may be +absent: + +``` xml +1 +A01 +A08 +10YFR-RTE------C +MAW +A03 + B10 + + 2020-01-31T23:00Z 2020-02-01T23:00Z + PT60M + 1 1466 + 2 2023 + 3 2365 + 4 3027 + 7 2871 + 8 2179 + 9 1152 + 10 395 + 11 67 + 13 494 + 14 1297 + 15 2230 + 16 2798 + 17 2897 + 18 1572 + 19 1033 + 21 1587 + 22 2058 + 23 1641 + 24 547 + +``` + +A03 data may have gaps at certain positions + +The package automatically handles A03 data by: + +1. Building a complete positional frame +2. Performing a full join to identify gaps +3. Carrying forward the last observed value (LOCF) to fill gaps + +## Supported Time Resolutions + +The ENTSO-E API supports various time resolutions: + +| Resolution Code | Duration | Typical Use | +|-----------------|------------|------------------------------| +| `PT4S` | 4 seconds | Automatic generation control | +| `PT1M` | 1 minute | Fast frequency response | +| `PT15M` | 15 minutes | Intraday markets | +| `PT30M` | 30 minutes | Half-hourly markets | +| `PT60M` | 1 hour | Hourly day-ahead | +| `P1D` | 1 day | Daily data | +| `P7D` | 1 week | Weekly data | +| `P1M` | 1 month | Monthly data | +| `P1Y` | 1 year | Yearly data | + +The package automatically calculates timestamps based on: + +``` +timestamp = period_start + (position - 1) × resolution +``` + +### Resolution Examples + +```{r resolution-examples} +cli_h1("Time Resolution Examples") + +# Show data grouped by resolution +da_prices_tidy |> + summarize( + points = n(), + start = min(ts_point_dt_start), + end = max(ts_point_dt_start), + .by = ts_resolution + ) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +## Practical Examples + +### Aggregating Hourly to Daily + +With tidy output, aggregation is straightforward: + +```{r aggregate-daily} +cli_h1("Aggregating to Daily Values") + +da_prices_tidy |> + mutate(date = as.Date(x = ts_point_dt_start, tz = "CET")) |> + summarize( + min_price = min(ts_point_price_amount), + max_price = max(ts_point_price_amount), + mean_price = mean(ts_point_price_amount), + n_points = n(), + .by = date + ) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### Working with Nested Output + +Extract and process nested points: + +```{r process-nested} +cli_h1("Processing Nested Points") + +# Get first period's points +first_period_points <- da_prices_nested |> + mutate(n_points = lengths(ts_point)) |> + select(ts_time_interval_start, n_points, ts_point) |> + slice(1) + +# Unnest the points +first_period_points |> + unnest(ts_point) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### Timezone Conversions + +All timestamps are returned in UTC. Convert to your timezone: + +```{r timezone-conversion} +library(lubridate) + +cli_h1("Timezone Conversions") + +da_prices_tidy |> + mutate( + utc = ts_point_dt_start, + cet = with_tz(time = ts_point_dt_start, tzone = "CET"), + est = with_tz(time = ts_point_dt_start, tzone = "America/New_York") + ) |> + select(utc, cet, est, ts_point_price_amount) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### Handling Missing Points + +With tidy output, missing points are already handled: + +```{r check-missings} +cli_h1("Checking for Missing Data") + +# Check for NA values +na_count <- is.na(da_prices_tidy$ts_point_price_amount) |> + sum() +cli_text("NA values in price column: {na_count}") + +# For A03 data, the package fills gaps with LOCF +# Check if any positions had gaps filled +if ("ts_curve_type" %in% names(da_prices_tidy)) { + a03_count <- sum(da_prices_tidy$ts_curve_type == "A03", na.rm = TRUE) + if (a03_count > 0) { + cli_inform("A03 data: gaps filled using last observation carried forward") + } +} +``` + +## Summary + +The `tidy_output` parameter controls how time series data is structured: + +| Format | Rows | Best For | +|---------|----------------|----------------------------------| +| `TRUE` | One per point | Analysis, plotting, aggregation | +| `FALSE` | One per period | Large data, preserving structure | + +The package handles curve types (A01 fix sized blocks, A03 variable sized +blocks) and all common time resolutions automatically. diff --git a/vignettes/working-with-eic-codes.Rmd b/vignettes/working-with-eic-codes.Rmd new file mode 100644 index 00000000..9f5e524d --- /dev/null +++ b/vignettes/working-with-eic-codes.Rmd @@ -0,0 +1,489 @@ +--- +title: "Working with EIC Codes" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Working with EIC Codes} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +editor_options: + markdown: + wrap: 80 +--- + +```{r setup, include = FALSE} +run_chunks <- identical(Sys.getenv("NOT_CRAN"), "true") && + nchar(Sys.getenv("ENTSOE_PAT")) > 0L && + entsoeapi::there_is_provider() + +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + eval = run_chunks, + fig.width = 10, + fig.height = 6, + out.width = "98%", + out.height = "98%" +) +``` + +```{r load-packages} +library(entsoeapi) +suppressPackageStartupMessages(library(dplyr)) +library(cli) +library(lubridate) +library(kableExtra) +``` + +## Introduction + +Energy Identification Codes (EICs) are 16-character alphanumeric codes that +uniquely identify market participants, bidding zones, transmission lines, and +other entities on the ENTSO-E platform. Understanding EICs is essential for +querying data through the entsoeapi package. + +This vignette covers: + +- EIC code structure and validation +- The eight EIC lookup functions +- Choosing between `all_approved_eic()` and `all_allocated_eic()` +- Practical examples for finding and using EIC codes + +## EIC Code Structure + +### The 16-Character Format + +EIC codes follow a specific format: + +``` +10YDE-VE-------2 +^^^^ ^^ ^^^^^^^ + || || || + || || ++-- Checksum (weighted-modulo-37) + || ++-------- Country/area code + ++------------- Type code +``` + +Example: `10YDE-VE-------2` + +- `10Y` - Type code for bidding zone (Y = area) +- `DE` - Country code (Germany) +- `VE` - Area code +- `-------` - Filler characters +- `2` - Checksum character + +### EIC Type Codes + +The first three characters indicate the EIC type: + +| Type Code | Description | Example | +|-----------|-------------------------------|-------------------------| +| `10Y` | Bidding zones / control areas | `10YDE-VE-------2` | +| `10X` | Market participants | `10X1001A1001A42F` | +| `10Z` | Interconnectors / tie lines | `10Z-DE-AT---------W` | +| `10W` | Power system resources | `10WGRAR-10ZDE-ENBW--Q` | +| `10V` | Locations | `10V1001A1001A48H` | +| `10T` | Substations | `10T-DE-VE-------Q` | +| `11Y` | Accounting points | `11YDE-1001A0001K` | + +### Checksum Validation + +The package automatically validates EIC codes using the ENTSO-E +weighted-modulo-37 algorithm. Invalid codes will produce an error: + +```{r checksum-validation} +# EIC with not valid checksum - will fail +try( + expr = energy_prices( + eic = "ABCDEF1234567890", + period_start = ymd("2026-01-01", tz = "CET"), + period_end = ymd("2026-01-02", tz = "CET"), + contract_type = "A07", + tidy_output = TRUE + ) +) +``` + +## The Eight EIC Lookup Functions + +The entsoeapi package provides eight functions to look up EIC codes, organized +by entity type: + +### area_eic() - Bidding Zones + +Returns all bidding zones and control areas: + +```{r area-eic} +# Get all bidding zones +zones <- area_eic() + +cli_h1("Bidding Zones") +cli_text("Total zones: {nrow(zones)}") + +# Find specific countries +zones |> + filter( + grepl( + pattern = "Switzerland|Schweiz|Swiss", + x = eic_long_name, + ignore.case = TRUE + ) + ) |> + select(eic_code, eic_long_name, type) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### party_eic() - Market Participants + +Returns market participants (generators, traders, etc.): + +```{r party-eic} +# Get market participants +parties <- party_eic() + +cli_h1("Market Participants") +cli_text("Total parties: {nrow(parties)}") + +# Find German TSOs +parties |> + filter( + grepl( + pattern = "Switzerland|Schweiz|Swiss", + x = eic_long_name, + ignore.case = TRUE + ) + ) |> + select(eic_code, eic_long_name, type) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### accounting_point_eic() - Accounting Points + +Returns accounting point EICs: + +```{r accounting-point-eic} +acc_points <- accounting_point_eic() + +cli_h1("Accounting Points") +cli_text("Total accounting points: {nrow(acc_points)}") + +# Sample entries +acc_points |> + filter( + grepl( + pattern = "Switzerland|Schweiz|Swiss", + x = eic_long_name, + ignore.case = TRUE + ) + ) |> + select(eic_code, eic_long_name, type) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### tie_line_eic() - Interconnectors + +Returns transmission lines between bidding zones: + +```{r tie-line-eic} +tie_lines <- tie_line_eic() + +cli_h1("Tie Lines (Interconnectors)") +cli_text("Total interconnectors: {nrow(tie_lines)}") + +# Find German interconnectors +tie_lines |> + filter( + grepl( + pattern = "Switzerland|Schweiz|Swiss", + x = eic_long_name, + ignore.case = TRUE + ) + ) |> + select(eic_code, eic_long_name, type) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### location_eic() - Locations + +Returns location EICs (V codes): + +```{r location-eic} +locations <- location_eic() + +cli_h1("Locations") +cli_text("Total locations: {nrow(locations)}") + +locations |> + filter( + grepl( + pattern = "Switzerland|Schweiz|Swiss", + x = eic_long_name, + ignore.case = TRUE + ) + ) |> + select(eic_code, eic_long_name, type) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### resource_object_eic() - Power Resources + +Returns power system resources (generating units, loads): + +```{r resource-object-eic} +resources <- resource_object_eic() + +cli_h1("Power Resources") +cli_text("Total resources: {nrow(resources)}") + +# Find German power plants +resources |> + filter( + grepl( + pattern = "KW-DE|DE-TU", + x = eic_long_name, + ignore.case = TRUE + ) + ) |> + select(eic_code, eic_long_name, type) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### substation_eic() - Substations + +Returns substation EICs (A codes): + +```{r substation-eic} +substations <- substation_eic() + +cli_h1("Substations") +cli_text("Total substations: {nrow(substations)}") + +# Sample entries +substations |> + head(12L) |> + select(eic_code, eic_long_name, type) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### all_approved_eic() - All Approved EICs + +Combines all approved EICs into a single tibble: + +```{r all-approved-eic} +all_eic <- all_approved_eic() + +cli_h1("All Approved EICs") +cli_text("Total EICs: {nrow(all_eic)}") + +# Count by type +all_eic |> + count(type, sort = TRUE) |> + mutate(pct = round(x = n / sum(n) * 100, digits = 2L)) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +## all_approved_eic() vs all_allocated_eic() + +The package provides two comprehensive EIC functions with different +characteristics: + +### Comparison Table + +| Feature | all_approved_eic() | all_allocated_eic() | +|------------------|--------------------|---------------------| +| Data source | CSV downloads | XML download | +| Speed | Fast (seconds) | Slow (minutes) | +| Update frequency | Less frequent | More current | +| Columns | Standardized | Extended details | +| Columns returned | 11 columns | 18 columns | + +### all_approved_eic() + +Fast and efficient for most use cases: + +```{r all-approved-detail} +cli_h1("all_approved_eic() - Column Details") +approved_eic <- all_approved_eic() +glimpse(approved_eic) +``` + +Use this function when you need: + +- Quick lookups +- Basic EIC information +- Standard column structure + +### all_allocated_eic() + +Provides more detailed information but is slower: + +```{r all-allocated-detail, warning = FALSE} +cli_h1("all_allocated_eic() - Column Details") +cli_inform("Note: This function is slow (downloads ~70MB XML)") + +allocated_eic <- all_allocated_eic() +glimpse(allocated_eic) +``` + +Column comparison between `all_allocated_eic()` and `all_approced_eic`: + +```{r} +cli_h1("all_allocated_eic()") +allocated_eic |> + filter(eic_code == "50WG00000001997X") |> + t() |> + kbl(format = "pipe") |> + cat(sep = "\n") + +cli_h1("all_approved_eic()") + +approved_eic |> + filter(eic_code == "50WG00000001997X") |> + t() |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +Use `all_allocated_eic()` function when you need: + +- More detailed participant information +- Current allocation status +- Extended metadata + +## Practical Examples + +### Finding Germany's EIC Codes + +```{r find-germany-codes} +cli_h1("German bidding zones") +all_eic |> + filter( + grepl( + pattern = "Germany.*bidding|area.*germany", + x = eic_long_name, + ignore.case = TRUE + ) + ) |> + select(eic_code, eic_long_name, type) |> + kbl(format = "pipe") |> + cat(sep = "\n") + +cli_h1("German TSOs") +party_eic() |> + filter( + grepl( + pattern = "50Hertz|Amprion|TenneT|TransnetBW", + x = eic_long_name, + ignore.case = TRUE + ) + ) |> + filter( + grepl( + pattern = "System Operator", + x = eic_type_function_list, + ignore.case = TRUE + ) + ) |> + filter( + grepl( + pattern = "GmbH", + x = eic_long_name, + ignore.case = TRUE + ) + ) |> + select(eic_code, eic_long_name, type) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### Finding Nordic Bidding Zones + +```{r nordic-zones} +cli_h1("Nordic Bidding Zones") + +nordic_countries <- c("Sweden", "Norway", "Denmark", "Finland") +zones |> + filter(eic_long_name %in% nordic_countries) |> + select(eic_code, eic_long_name, type) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### Finding Transmission Lines Between Countries + +```{r cross-border} +cli_h1("Cross-Border Transmission Lines") + +# Find Germany-France interconnector +tie_line_eic() |> + filter( + grepl( + pattern = "DE.*FR|FR.*DE|Germany.*France", + x = eic_long_name, + ignore.case = TRUE + ) + ) |> + select(eic_code, eic_long_name, type) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +### Combining EIC Data with API Queries + +Use EIC lookups to construct queries: + +```{r combine-with-query, warning = FALSE} +cli_h1("Using EICs in Queries") +# Query intraday prices for Spain +from_ts <- ymd("2026-01-01", tz = "CET") +till_ts <- from_ts + days(3L) + +es_prices <- energy_prices( + eic = "10YES-REE------0", + period_start = from_ts, + period_end = till_ts, + contract_type = "A07", + tidy_output = TRUE +) + +cli_alert_success("Retrieved {nrow(es_prices)} intraday price points for Spain") + +# Show sample data +es_prices |> + filter(ts_classification_sequence_position == 1L) |> + select( + market_agreement_type_def, + ts_point_dt_start, + ts_point_price_amount, + ts_currency_unit_name + ) |> + head(12) |> + kbl(format = "pipe") |> + cat(sep = "\n") +``` + +## Summary + +The entsoeapi package provides eight EIC lookup functions to help you find the +codes needed for your queries: + +| Function | Returns | EIC Type | +|--------------------------|---------------------|-----------| +| `area_eic()` | Bidding zones | Y codes | +| `party_eic()` | Market participants | X codes | +| `accounting_point_eic()` | Accounting points | Y codes | +| `tie_line_eic()` | Interconnectors | Z codes | +| `location_eic()` | Locations | V codes | +| `resource_object_eic()` | Power resources | W codes | +| `substation_eic()` | Substations | A codes | +| `all_approved_eic()` | All approved EICs | All types | + +For comprehensive lookups, use `all_approved_eic()` for speed or +`all_allocated_eic()` for extended details.