From 69d14e0dce2fe9e134a7c72e81566708aca5653b Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sat, 21 Feb 2026 17:02:44 -0700
Subject: [PATCH 01/45] refactor: initial rust rewrite from python

---
 dragon-runner-rs/Cargo.lock            | 761 +++++++++++++++++++++++++
 dragon-runner-rs/Cargo.toml            |  16 +
 dragon-runner-rs/src/cli.rs            | 191 +++++++
 dragon-runner-rs/src/config.rs         | 430 ++++++++++++++
 dragon-runner-rs/src/error.rs          |  67 +++
 dragon-runner-rs/src/harness.rs        | 464 +++++++++++++++
 dragon-runner-rs/src/lib.rs            |   9 +
 dragon-runner-rs/src/log.rs            |  44 ++
 dragon-runner-rs/src/main.rs           |  85 +++
 dragon-runner-rs/src/runner.rs         | 437 ++++++++++++++
 dragon-runner-rs/src/testfile.rs       | 244 ++++++++
 dragon-runner-rs/src/toolchain.rs      | 116 ++++
 dragon-runner-rs/src/util.rs           |  71 +++
 dragon-runner-rs/tests/test_config.rs  | 107 ++++
 dragon-runner-rs/tests/test_grader.rs  |  42 ++
 dragon-runner-rs/tests/test_runner.rs  |  64 +++
 dragon-runner-rs/tests/test_runtime.rs |  76 +++
 17 files changed, 3224 insertions(+)
 create mode 100644 dragon-runner-rs/Cargo.lock
 create mode 100644 dragon-runner-rs/Cargo.toml
 create mode 100644 dragon-runner-rs/src/cli.rs
 create mode 100644 dragon-runner-rs/src/config.rs
 create mode 100644 dragon-runner-rs/src/error.rs
 create mode 100644 dragon-runner-rs/src/harness.rs
 create mode 100644 dragon-runner-rs/src/lib.rs
 create mode 100644 dragon-runner-rs/src/log.rs
 create mode 100644 dragon-runner-rs/src/main.rs
 create mode 100644 dragon-runner-rs/src/runner.rs
 create mode 100644 dragon-runner-rs/src/testfile.rs
 create mode 100644 dragon-runner-rs/src/toolchain.rs
 create mode 100644 dragon-runner-rs/src/util.rs
 create mode 100644 dragon-runner-rs/tests/test_config.rs
 create mode 100644 dragon-runner-rs/tests/test_grader.rs
 create mode 100644 dragon-runner-rs/tests/test_runner.rs
 create mode 100644 dragon-runner-rs/tests/test_runtime.rs

diff --git a/dragon-runner-rs/Cargo.lock b/dragon-runner-rs/Cargo.lock
new file mode 100644
index 0000000..9d821f6
--- /dev/null
+++ b/dragon-runner-rs/Cargo.lock
@@ -0,0 +1,761 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+
+[[package]]
+name = "bitflags"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "clap"
+version = "4.5.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "colored"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c"
+dependencies = [
+ "lazy_static",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "csv"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde_core",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "dragon-runner-rs"
+version = "0.1.0"
+dependencies = [
+ "clap",
+ "colored",
+ "csv",
+ "glob",
+ "regex",
+ "serde",
+ "serde_json",
+ "tempfile",
+ "thiserror",
+ "wait-timeout",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
+[[package]]
+name = "getrandom"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+ "wasip3",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "foldhash",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "id-arena"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
+
+[[package]]
+name = "indexmap"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.16.1",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "itoa"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "leb128fmt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
+
+[[package]]
+name = "libc"
+version = "0.2.182"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
+
+[[package]]
+name = "rustix"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
+
+[[package]]
+name = "semver"
+version = "1.0.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
+dependencies = [
+ "fastrand",
+ "getrandom",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "wait-timeout"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "wasip2"
+version = "1.0.2+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasip3"
+version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasm-encoder"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
+dependencies = [
+ "leb128fmt",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasm-metadata"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
+dependencies = [
+ "anyhow",
+ "indexmap",
+ "wasm-encoder",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasmparser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
+dependencies = [
+ "bitflags",
+ "hashbrown 0.15.5",
+ "indexmap",
+ "semver",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "wit-bindgen"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+dependencies = [
+ "wit-bindgen-rust-macro",
+]
+
+[[package]]
+name = "wit-bindgen-core"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
+dependencies = [
+ "anyhow",
+ "heck",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-bindgen-rust"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
+dependencies = [
+ "anyhow",
+ "heck",
+ "indexmap",
+ "prettyplease",
+ "syn",
+ "wasm-metadata",
+ "wit-bindgen-core",
+ "wit-component",
+]
+
+[[package]]
+name = "wit-bindgen-rust-macro"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
+dependencies = [
+ "anyhow",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wit-bindgen-core",
+ "wit-bindgen-rust",
+]
+
+[[package]]
+name = "wit-component"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
+dependencies = [
+ "anyhow",
+ "bitflags",
+ "indexmap",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "wasm-encoder",
+ "wasm-metadata",
+ "wasmparser",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-parser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
+dependencies = [
+ "anyhow",
+ "id-arena",
+ "indexmap",
+ "log",
+ "semver",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "unicode-xid",
+ "wasmparser",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/dragon-runner-rs/Cargo.toml b/dragon-runner-rs/Cargo.toml
new file mode 100644
index 0000000..65bcd8c
--- /dev/null
+++ b/dragon-runner-rs/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "dragon-runner-rs"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+clap = { version = "4", features = ["derive"] }
+thiserror = "2"
+glob = "0.3"
+tempfile = "3"
+colored = "2"
+csv = "1"
+regex = "1"
+wait-timeout = "0.2"
diff --git a/dragon-runner-rs/src/cli.rs b/dragon-runner-rs/src/cli.rs
new file mode 100644
index 0000000..29ce0e7
--- /dev/null
+++ b/dragon-runner-rs/src/cli.rs
@@ -0,0 +1,191 @@
+use std::fmt;
+
+use clap::{Args, Parser, Subcommand};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Mode {
+    Regular,
+    Tournament,
+    Perf,
+    Memcheck,
+}
+
+impl fmt::Display for Mode {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Mode::Regular => write!(f, "regular"),
+            Mode::Tournament => write!(f, "tournament"),
+            Mode::Perf => write!(f, "perf"),
+            Mode::Memcheck => write!(f, "memcheck"),
+        }
+    }
+}
+
+/// Shared flags available in all modes.
+#[derive(Args, Debug, Clone)]
+pub struct CommonFlags {
+    /// Path to the JSON configuration file
+    pub config_file: String,
+
+    /// Path to write failure log
+    #[arg(long = "fail-log", default_value = "")]
+    pub failure_log: String,
+
+    /// Timeout in seconds for each step
+    #[arg(long, default_value_t = 2.0)]
+    pub timeout: f64,
+
+    /// Verify CCID in packages
+    #[arg(long)]
+    pub verify: bool,
+
+    /// Debug a specific package path
+    #[arg(long = "debug-package", default_value = "")]
+    pub debug_package: String,
+
+    /// Filter packages by glob pattern (case insensitive)
+    #[arg(short = 'p', long = "package", default_value = "")]
+    pub package_filter: String,
+
+    /// Show timing information
+    #[arg(short = 't', long = "time")]
+    pub time: bool,
+
+    /// Increase verbosity (can be repeated: -v, -vv, -vvv)
+    #[arg(short = 'v', long = "verbosity", action = clap::ArgAction::Count)]
+    pub verbosity: u8,
+
+    /// Show test case contents on failure
+    #[arg(short = 's', long = "show-testcase")]
+    pub show_testcase: bool,
+
+    /// Output file path
+    #[arg(short = 'o', long = "output", default_value = "")]
+    pub output: String,
+
+    /// Stop on first failure
+    #[arg(short = 'f', long = "fast-fail")]
+    pub fast_fail: bool,
+}
+
+/// CMPUT 415 testing utility
+#[derive(Parser, Debug)]
+#[command(name = "dragon-runner", about = "CMPUT 415 testing utility")]
+pub struct Cli {
+    #[command(subcommand)]
+    pub command: Commands,
+}
+
+#[derive(Subcommand, Debug)]
+pub enum Commands {
+    /// Run in regular mode (default)
+    Regular {
+        #[command(flatten)]
+        flags: CommonFlags,
+    },
+    /// Run in tournament/grading mode
+    Tournament {
+        #[command(flatten)]
+        flags: CommonFlags,
+    },
+    /// Run performance tests
+    Perf {
+        #[command(flatten)]
+        flags: CommonFlags,
+    },
+    /// Run with memory checking (valgrind)
+    Memcheck {
+        #[command(flatten)]
+        flags: CommonFlags,
+    },
+}
+
+#[derive(Debug, Clone)]
+pub struct RunnerArgs {
+    pub mode: Mode,
+    pub config_file: String,
+    pub output: String,
+    pub failure_log: String,
+    pub debug_package: String,
+    pub package_filter: String,
+    pub timeout: f64,
+    pub time: bool,
+    pub verbosity: u32,
+    pub verify: bool,
+    pub show_testcase: bool,
+    pub fast_fail: bool,
+}
+
+impl Default for RunnerArgs {
+    fn default() -> Self {
+        Self {
+            mode: Mode::Regular,
+            config_file: String::new(),
+            output: String::new(),
+            failure_log: String::new(),
+            debug_package: String::new(),
+            package_filter: String::new(),
+            timeout: 2.0,
+            time: false,
+            verbosity: 0,
+            verify: false,
+            show_testcase: false,
+            fast_fail: false,
+        }
+    }
+}
+
+impl RunnerArgs {
+    fn from_flags(mode: Mode, flags: CommonFlags) -> Self {
+        Self {
+            mode,
+            config_file: flags.config_file,
+            output: flags.output,
+            failure_log: flags.failure_log,
+            debug_package: flags.debug_package,
+            package_filter: flags.package_filter,
+            timeout: flags.timeout,
+            time: flags.time,
+            verbosity: flags.verbosity as u32,
+            verify: flags.verify,
+            show_testcase: flags.show_testcase,
+            fast_fail: flags.fast_fail,
+        }
+    }
+}
+
+/// Parse CLI arguments into RunnerArgs.
+///
+/// Supports: `dragon-runner <mode> config.json [flags...]`
+/// If no recognized mode is given, inserts "regular" so clap can parse it.
+pub fn parse_cli_args() -> RunnerArgs {
+    let raw_args: Vec<String> = std::env::args().collect();
+
+    // If the user omits the mode subcommand, default to "regular".
+    // Detect this by checking whether the second arg is a known subcommand.
+    let known_modes = ["regular", "tournament", "perf", "memcheck"];
+    let args_to_parse = if raw_args.len() >= 2 && !known_modes.contains(&raw_args[1].as_str()) && !raw_args[1].starts_with('-') {
+        // Insert "regular" as the subcommand
+        let mut patched = vec![raw_args[0].clone(), "regular".to_string()];
+        patched.extend_from_slice(&raw_args[1..]);
+        patched
+    } else {
+        raw_args
+    };
+
+    let cli = Cli::parse_from(args_to_parse);
+
+    let (mode, flags) = match cli.command {
+        Commands::Regular { flags } => (Mode::Regular, flags),
+        Commands::Tournament { flags } => (Mode::Tournament, flags),
+        Commands::Perf { flags } => (Mode::Perf, flags),
+        Commands::Memcheck { flags } => (Mode::Memcheck, flags),
+    };
+
+    let args = RunnerArgs::from_flags(mode, flags);
+
+    // Set debug environment variable
+    std::env::set_var("DRAGON_RUNNER_DEBUG", args.verbosity.to_string());
+
+    args
+}
diff --git a/dragon-runner-rs/src/config.rs b/dragon-runner-rs/src/config.rs
new file mode 100644
index 0000000..a9b54d1
--- /dev/null
+++ b/dragon-runner-rs/src/config.rs
@@ -0,0 +1,430 @@
+use std::env;
+use std::fs;
+use std::path::{Path, PathBuf};
+
+use crate::cli::RunnerArgs;
+use crate::error::{Error, ErrorCollection, Verifiable};
+use crate::log::log;
+use crate::testfile::TestFile;
+use crate::toolchain::ToolChain;
+use crate::util::resolve_relative;
+
+/// Represents a set of tests in a directory.
+#[derive(Debug, Clone)]
+pub struct SubPackage {
+    pub path: String,
+    pub name: String,
+    pub tests: Vec<TestFile>,
+}
+
+impl SubPackage {
+    pub fn new(path: &str) -> Self {
+        let name = Path::new(path)
+            .file_name()
+            .unwrap_or_default()
+            .to_string_lossy()
+            .into_owned();
+
+        let tests = if Path::new(path).is_dir() {
+            Self::gather_tests(path)
+        } else {
+            vec![TestFile::new(path)]
+        };
+
+        Self {
+            path: path.to_string(),
+            name,
+            tests,
+        }
+    }
+
+    fn gather_tests(dir: &str) -> Vec<TestFile> {
+        let mut tests = Vec::new();
+        if let Ok(entries) = fs::read_dir(dir) {
+            for entry in entries.flatten() {
+                let entry_path = entry.path();
+                if TestFile::is_test(&entry_path) {
+                    tests.push(TestFile::new(&entry_path.to_string_lossy()));
+                }
+            }
+        }
+        tests.sort_by(|a, b| a.file.cmp(&b.file));
+        tests
+    }
+}
+
+impl Verifiable for SubPackage {
+    fn verify(&self) -> ErrorCollection {
+        let mut ec = ErrorCollection::new();
+        for test in &self.tests {
+            ec.extend(&test.verify());
+        }
+        ec
+    }
+}
+
+/// Represents a single test package.
+#[derive(Debug, Clone)]
+pub struct Package {
+    pub path: String,
+    pub name: String,
+    pub n_tests: usize,
+    pub subpackages: Vec<SubPackage>,
+}
+
+impl Package {
+    pub fn new(path: &str) -> Self {
+        let name = Path::new(path)
+            .file_name()
+            .unwrap_or_default()
+            .to_string_lossy()
+            .into_owned();
+
+        let mut pkg = Self {
+            path: path.to_string(),
+            name,
+            n_tests: 0,
+            subpackages: Vec::new(),
+        };
+
+        if Path::new(path).is_dir() {
+            pkg.gather_subpackages();
+        } else {
+            let spkg = SubPackage::new(path);
+            pkg.add_subpackage(spkg);
+        }
+
+        pkg
+    }
+
+    fn add_subpackage(&mut self, spkg: SubPackage) {
+        self.n_tests += spkg.tests.len();
+        self.subpackages.push(spkg);
+    }
+
+    fn gather_subpackages(&mut self) {
+        // Check for top-level tests in the package dir itself
+        let top_level = SubPackage::new(&self.path);
+        if !top_level.tests.is_empty() {
+            self.add_subpackage(top_level);
+        }
+
+        // Collect all subdirectory subpackages first, then add them
+        let path = self.path.clone();
+        let collected = Self::collect_subpackages(&path);
+        for spkg in collected {
+            self.add_subpackage(spkg);
+        }
+    }
+
+    fn collect_subpackages(dir: &str) -> Vec<SubPackage> {
+        let mut result = Vec::new();
+        if let Ok(entries) = fs::read_dir(dir) {
+            for entry in entries.flatten() {
+                let entry_path = entry.path();
+                if entry_path.is_dir() {
+                    let spkg = SubPackage::new(&entry_path.to_string_lossy());
+                    if !spkg.tests.is_empty() {
+                        result.push(spkg);
+                    }
+                    result.extend(Self::collect_subpackages(&entry_path.to_string_lossy()));
+                }
+            }
+        }
+        result
+    }
+}
+
+impl Verifiable for Package {
+    fn verify(&self) -> ErrorCollection {
+        let mut ec = ErrorCollection::new();
+        for spkg in &self.subpackages {
+            ec.extend(&spkg.verify());
+        }
+        ec
+    }
+}
+
+/// Represents a tested executable with an optional runtime.
+#[derive(Debug, Clone)]
+pub struct Executable {
+    pub id: String,
+    pub exe_path: String,
+    pub runtime: String,
+}
+
+impl Executable {
+    pub fn new(id: &str, exe_path: &str, runtime: &str) -> Self {
+        Self {
+            id: id.to_string(),
+            exe_path: exe_path.to_string(),
+            runtime: runtime.to_string(),
+        }
+    }
+
+    /// Set environment variables for runtime library injection.
+    pub fn source_env(&self) {
+        if self.runtime.is_empty() {
+            return;
+        }
+        let runtime_path = Path::new(&self.runtime);
+        let runtime_dir = runtime_path
+            .parent()
+            .unwrap_or(Path::new(""))
+            .to_string_lossy()
+            .into_owned();
+        let rt_filename = runtime_path
+            .file_stem()
+            .unwrap_or_default()
+            .to_string_lossy()
+            .into_owned();
+
+        // Strip leading "lib" prefix for linker flag
+        let rt_lib = if rt_filename.starts_with("lib") {
+            rt_filename[3..].to_string()
+        } else {
+            rt_filename
+        };
+
+        if cfg!(target_os = "macos") {
+            env::set_var("DYLD_LIBRARY_PATH", &runtime_dir);
+            env::set_var("DYLD_INSERT_LIBRARIES", &self.runtime);
+        } else {
+            env::set_var("LD_LIBRARY_PATH", &runtime_dir);
+            env::set_var("LD_PRELOAD", &self.runtime);
+        }
+
+        env::set_var("RT_PATH", &runtime_dir);
+        env::set_var("RT_LIB", &rt_lib);
+    }
+}
+
+impl Verifiable for Executable {
+    fn verify(&self) -> ErrorCollection {
+        let mut errors = ErrorCollection::new();
+        if !Path::new(&self.exe_path).exists() {
+            errors.add(Error::Config(format!(
+                "Cannot find binary file: {} in Executable: {}",
+                self.exe_path, self.id
+            )));
+        }
+        if !self.runtime.is_empty() && !Path::new(&self.runtime).exists() {
+            errors.add(Error::Config(format!(
+                "Cannot find runtime file: {} in Executable: {}",
+                self.runtime, self.id
+            )));
+        }
+        errors
+    }
+}
+
+/// In-memory representation of a JSON configuration file.
+#[derive(Debug, Clone)]
+pub struct Config {
+    pub name: String,
+    pub config_path: String,
+    pub test_dir: String,
+    pub executables: Vec<Executable>,
+    pub solution_exe: Option<String>,
+    pub toolchains: Vec<ToolChain>,
+    pub packages: Vec<Package>,
+    pub package_filter: String,
+    pub error_collection: ErrorCollection,
+}
+
+impl Config {
+    pub fn new(
+        config_path: &str,
+        config_data: &serde_json::Value,
+        debug_package: Option<&str>,
+        package_filter: &str,
+    ) -> Self {
+        let abs_config = fs::canonicalize(config_path)
+            .unwrap_or_else(|_| PathBuf::from(config_path));
+        let abs_config_str = abs_config.to_string_lossy().into_owned();
+
+        let name = Path::new(config_path)
+            .file_stem()
+            .unwrap_or_default()
+            .to_string_lossy()
+            .into_owned();
+
+        let test_dir_rel = config_data
+            .get("testDir")
+            .and_then(|v| v.as_str())
+            .unwrap_or("");
+        let test_dir = resolve_relative(test_dir_rel, &abs_config_str)
+            .to_string_lossy()
+            .into_owned();
+
+        let executables = Self::parse_executables(
+            config_data.get("testedExecutablePaths"),
+            config_data.get("runtimes"),
+            &abs_config_str,
+        );
+
+        let solution_exe = config_data
+            .get("solutionExecutable")
+            .and_then(|v| v.as_str())
+            .map(String::from);
+
+        let toolchains = Self::parse_toolchains(config_data.get("toolchains"));
+
+        let packages = Self::gather_packages(&test_dir, debug_package);
+
+        let mut cfg = Self {
+            name,
+            config_path: abs_config_str,
+            test_dir,
+            executables,
+            solution_exe,
+            toolchains,
+            packages,
+            package_filter: package_filter.to_string(),
+            error_collection: ErrorCollection::new(),
+        };
+        cfg.error_collection = cfg.do_verify();
+        cfg
+    }
+
+    fn parse_executables(
+        exe_data: Option<&serde_json::Value>,
+        runtime_data: Option<&serde_json::Value>,
+        abs_config_path: &str,
+    ) -> Vec<Executable> {
+        let exe_map = match exe_data.and_then(|v| v.as_object()) {
+            Some(m) => m,
+            None => return Vec::new(),
+        };
+        let rt_map = runtime_data.and_then(|v| v.as_object());
+
+        exe_map
+            .iter()
+            .map(|(id, path_val)| {
+                let path_str = path_val.as_str().unwrap_or("");
+                let exe_path = resolve_relative(path_str, abs_config_path)
+                    .to_string_lossy()
+                    .into_owned();
+
+                let runtime = rt_map
+                    .and_then(|rts| rts.get(id.as_str()))
+                    .and_then(|v| v.as_str())
+                    .map(|rt_path| {
+                        let resolved = resolve_relative(rt_path, abs_config_path);
+                        fs::canonicalize(&resolved)
+                            .unwrap_or(resolved)
+                            .to_string_lossy()
+                            .into_owned()
+                    })
+                    .unwrap_or_default();
+
+                Executable::new(id, &exe_path, &runtime)
+            })
+            .collect()
+    }
+
+    fn parse_toolchains(tc_data: Option<&serde_json::Value>) -> Vec<ToolChain> {
+        let tc_map = match tc_data.and_then(|v| v.as_object()) {
+            Some(m) => m,
+            None => return Vec::new(),
+        };
+        tc_map
+            .iter()
+            .map(|(name, steps_val)| {
+                let steps = steps_val
+                    .as_array()
+                    .map(|arr| arr.as_slice())
+                    .unwrap_or(&[]);
+                ToolChain::new(name, steps)
+            })
+            .collect()
+    }
+
+    fn gather_packages(test_dir: &str, debug_package: Option<&str>) -> Vec<Package> {
+        if let Some(debug_pkg) = debug_package {
+            if !debug_pkg.is_empty() {
+                return vec![Package::new(debug_pkg)];
+            }
+        }
+
+        let mut packages = Vec::new();
+        if let Ok(entries) = fs::read_dir(test_dir) {
+            for entry in entries.flatten() {
+                let entry_path = entry.path();
+                if entry_path.is_dir() {
+                    packages.push(Package::new(&entry_path.to_string_lossy()));
+                }
+            }
+        }
+        packages
+    }
+
+    fn do_verify(&self) -> ErrorCollection {
+        let mut ec = ErrorCollection::new();
+        if !Path::new(&self.test_dir).exists() {
+            // Use the raw testDir value from config for the error message
+            ec.add(Error::Config(format!(
+                "Cannot find test directory: {}",
+                self.test_dir
+            )));
+        }
+        for exe in &self.executables {
+            ec.extend(&exe.verify());
+        }
+        for tc in &self.toolchains {
+            ec.extend(&tc.verify());
+        }
+        for pkg in &self.packages {
+            ec.extend(&pkg.verify());
+        }
+        ec
+    }
+
+    pub fn log_test_info(&self) {
+        log(1, 0, "\nPackages:");
+        for pkg in &self.packages {
+            log(1, 2, &format!("-- ({})", pkg.name));
+            for spkg in &pkg.subpackages {
+                log(2, 4, &format!("-- ({})", spkg.name));
+                for test in &spkg.tests {
+                    log(3, 6, &format!("-- ({})", test.file));
+                }
+            }
+        }
+    }
+}
+
+/// Load and parse a JSON configuration file.
+pub fn load_config(config_path: &str, args: Option<&RunnerArgs>) -> Option<Config> {
+    if !Path::new(config_path).exists() {
+        return None;
+    }
+
+    let content = match fs::read_to_string(config_path) {
+        Ok(c) => c,
+        Err(_) => {
+            log(0, 0, &format!("Config Error: Failed to parse config: {config_path}"));
+            return None;
+        }
+    };
+
+    let config_data: serde_json::Value = match serde_json::from_str(&content) {
+        Ok(v) => v,
+        Err(_) => {
+            log(0, 0, &format!("Config Error: Failed to parse config: {config_path}"));
+            return None;
+        }
+    };
+
+    let debug_package = args
+        .and_then(|a| {
+            if a.debug_package.is_empty() {
+                None
+            } else {
+                Some(a.debug_package.as_str())
+            }
+        });
+    let package_filter = args.map(|a| a.package_filter.as_str()).unwrap_or("");
+
+    Some(Config::new(config_path, &config_data, debug_package, package_filter))
+}
diff --git a/dragon-runner-rs/src/error.rs b/dragon-runner-rs/src/error.rs
new file mode 100644
index 0000000..d600851
--- /dev/null
+++ b/dragon-runner-rs/src/error.rs
@@ -0,0 +1,67 @@
+use std::fmt;
+
+#[derive(Debug, Clone)]
+pub enum Error {
+    Config(String),
+    TestFile(String),
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Error::Config(msg) => write!(f, "Config Error: {msg}"),
+            Error::TestFile(msg) => write!(f, "Testfile Error: {msg}"),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct ErrorCollection {
+    pub errors: Vec<Error>,
+}
+
+impl ErrorCollection {
+    pub fn new() -> Self {
+        Self { errors: Vec::new() }
+    }
+
+    pub fn has_errors(&self) -> bool {
+        !self.errors.is_empty()
+    }
+
+    pub fn add(&mut self, error: Error) {
+        self.errors.push(error);
+    }
+
+    pub fn extend(&mut self, other: &ErrorCollection) {
+        self.errors.extend(other.errors.iter().cloned());
+    }
+
+    pub fn extend_errors(&mut self, errors: &[Error]) {
+        self.errors.extend(errors.iter().cloned());
+    }
+
+    pub fn len(&self) -> usize {
+        self.errors.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.errors.is_empty()
+    }
+}
+
+impl fmt::Display for ErrorCollection {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        for (i, err) in self.errors.iter().enumerate() {
+            if i > 0 {
+                writeln!(f)?;
+            }
+            write!(f, "{err}")?;
+        }
+        Ok(())
+    }
+}
+
+pub trait Verifiable {
+    fn verify(&self) -> ErrorCollection;
+}
diff --git a/dragon-runner-rs/src/harness.rs b/dragon-runner-rs/src/harness.rs
new file mode 100644
index 0000000..c40092a
--- /dev/null
+++ b/dragon-runner-rs/src/harness.rs
@@ -0,0 +1,464 @@
+use std::fs::{self, OpenOptions};
+use std::io::Write;
+
+use colored::Colorize;
+
+use crate::cli::RunnerArgs;
+use crate::config::{Config, Executable, Package};
+use crate::log::log;
+use crate::runner::{TestResult, ToolChainRunner};
+
+/// Counters passed through hooks during iteration.
+pub struct SubPackageCounters {
+    pub pass_count: usize,
+    pub test_count: usize,
+}
+
+/// Base harness logic — iterate over executables, toolchains, packages, subpackages, tests.
+/// Concrete harnesses implement the hooks.
+pub trait TestHarness {
+    fn config(&self) -> &Config;
+    fn cli_args(&self) -> &RunnerArgs;
+    fn run_passed(&self) -> bool;
+    fn set_run_passed(&mut self, val: bool);
+
+    fn process_test_result(&mut self, result: TestResult, counters: &mut SubPackageCounters);
+
+    fn pre_run_hook(&mut self) {}
+    fn post_run_hook(&mut self) {}
+    fn pre_executable_hook(&mut self, _exe_id: &str) {}
+    fn post_executable_hook(&mut self) {}
+    fn pre_subpackage_hook(&mut self, _spkg: &crate::config::SubPackage) {}
+    fn post_subpackage_hook(&mut self, _counters: &SubPackageCounters) {}
+
+    fn iterate(&mut self) {
+        self.pre_run_hook();
+
+        let config = self.config().clone();
+        let cli_args = self.cli_args().clone();
+
+        for exe in &config.executables {
+            self.pre_executable_hook(&exe.id);
+            log(0, 0, &format!("Running executable: {}", exe.id));
+            exe.source_env();
+            let mut exe_pass = 0;
+            let mut exe_total = 0;
+
+            for tc in &config.toolchains {
+                let runner = ToolChainRunner::new(tc.clone(), cli_args.timeout);
+                log(0, 1, &format!("Running Toolchain: {}", tc.name));
+                let mut tc_pass = 0;
+                let mut tc_total = 0;
+
+                for pkg in &config.packages {
+                    let mut pkg_pass = 0;
+                    let mut pkg_total = 0;
+                    log(0, 2, &format!("Entering package {}", pkg.name));
+
+                    for spkg in &pkg.subpackages {
+                        // Glob filter
+                        if !config.package_filter.is_empty() {
+                            let pat = glob::Pattern::new(&config.package_filter.to_lowercase());
+                            if let Ok(pat) = pat {
+                                if !pat.matches(&spkg.path.to_lowercase()) {
+                                    continue;
+                                }
+                            }
+                        }
+
+                        log(0, 3, &format!("Entering subpackage {}", spkg.name));
+                        let mut counters = SubPackageCounters {
+                            pass_count: 0,
+                            test_count: 0,
+                        };
+                        self.pre_subpackage_hook(spkg);
+
+                        for test in &spkg.tests {
+                            let result = runner.run(test, exe);
+                            let fast_fail = cli_args.fast_fail && !result.did_pass;
+                            self.process_test_result(result, &mut counters);
+                            if fast_fail {
+                                self.post_subpackage_hook(&counters);
+                                self.post_executable_hook();
+                                self.post_run_hook();
+                                return;
+                            }
+                        }
+
+                        self.post_subpackage_hook(&counters);
+                        log(
+                            0,
+                            3,
+                            &format!(
+                                "Subpackage Passed:  {} / {}",
+                                counters.pass_count, counters.test_count
+                            ),
+                        );
+                        pkg_pass += counters.pass_count;
+                        pkg_total += counters.test_count;
+                    }
+
+                    log(0, 2, &format!("Packaged Passed:  {} / {}", pkg_pass, pkg_total));
+                    tc_pass += pkg_pass;
+                    tc_total += pkg_total;
+                }
+
+                log(0, 1, &format!("Toolchain Passed:  {} / {}", tc_pass, tc_total));
+                exe_pass += tc_pass;
+                exe_total += tc_total;
+            }
+
+            log(0, 0, &format!("Executable Passed:  {} / {}", exe_pass, exe_total));
+            self.post_executable_hook();
+        }
+
+        self.post_run_hook();
+    }
+
+    fn run(&mut self) -> bool {
+        self.iterate();
+        self.run_passed()
+    }
+}
+
+// ---------------------------------------------------------------------------
+// RegularHarness
+// ---------------------------------------------------------------------------
+
+pub struct RegularHarness {
+    pub config: Config,
+    pub cli_args: RunnerArgs,
+    pub failures: Vec<TestResult>,
+    pub passed: bool,
+}
+
+impl RegularHarness {
+    pub fn new(config: Config, cli_args: RunnerArgs) -> Self {
+        Self {
+            config,
+            cli_args,
+            failures: Vec::new(),
+            passed: true,
+        }
+    }
+}
+
+impl TestHarness for RegularHarness {
+    fn config(&self) -> &Config { &self.config }
+    fn cli_args(&self) -> &RunnerArgs { &self.cli_args }
+    fn run_passed(&self) -> bool { self.passed }
+    fn set_run_passed(&mut self, val: bool) { self.passed = val; }
+
+    fn process_test_result(&mut self, result: TestResult, counters: &mut SubPackageCounters) {
+        let test_name = result.test.file.clone();
+        if result.did_pass {
+            let tag = if result.error_test { "[E-PASS] " } else { "[PASS] " };
+            log(0, 4, &format!("{}{}", tag.green(), test_name));
+        } else {
+            let tag = if result.error_test { "[E-FAIL] " } else { "[FAIL] " };
+            log(0, 4, &format!("{}{}", tag.red(), test_name));
+            self.passed = false;
+            self.failures.push(result);
+        }
+        counters.test_count += 1;
+        if counters.test_count > 0 && counters.test_count > counters.pass_count {
+            // already counted pass below
+        }
+        // Re-check: pass counting
+        counters.pass_count += if self.failures.last().map(|f| f.test.file == test_name).unwrap_or(false) {
+            0
+        } else {
+            // the test we just processed passed
+            1
+        };
+    }
+
+    fn post_executable_hook(&mut self) {
+        self.failures.clear();
+    }
+}
+
+// ---------------------------------------------------------------------------
+// TournamentHarness
+// ---------------------------------------------------------------------------
+
+pub struct TournamentHarness {
+    pub config: Config,
+    pub cli_args: RunnerArgs,
+    pub passed: bool,
+}
+
+impl TournamentHarness {
+    pub fn new(config: Config, cli_args: RunnerArgs) -> Self {
+        Self {
+            config,
+            cli_args,
+            passed: true,
+        }
+    }
+
+    fn log_failure_to_file(file: &str, result: &TestResult) {
+        if result.did_pass {
+            return;
+        }
+        let mut f = OpenOptions::new()
+            .create(true)
+            .append(true)
+            .open(file)
+            .unwrap_or_else(|_| panic!("Cannot open feedback file: {}", file));
+
+        let exp_out = result.test.get_expected_out();
+        let gen_out = result.gen_output.as_deref().unwrap_or(b"");
+
+        let _ = writeln!(f, "{}", "=".repeat(80));
+        let _ = writeln!(f, "Test: {}", result.test.file);
+        let _ = writeln!(f, "\nExpected Output: {:?}", String::from_utf8_lossy(exp_out));
+        let _ = writeln!(f, "Generated Output: {:?}", String::from_utf8_lossy(gen_out));
+    }
+}
+
+impl TestHarness for TournamentHarness {
+    fn config(&self) -> &Config { &self.config }
+    fn cli_args(&self) -> &RunnerArgs { &self.cli_args }
+    fn run_passed(&self) -> bool { self.passed }
+    fn set_run_passed(&mut self, val: bool) { self.passed = val; }
+
+    fn process_test_result(&mut self, _result: TestResult, _counters: &mut SubPackageCounters) {
+        // Tournament uses its own iterate, this is unused
+    }
+
+    fn iterate(&mut self) {
+        let config = self.config.clone();
+        let cli_args = self.cli_args.clone();
+
+        let mut attacking_pkgs: Vec<&Package> = config.packages.iter().collect();
+        attacking_pkgs.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
+
+        let mut defending_exes: Vec<&Executable> = config.executables.iter().collect();
+        defending_exes.sort_by(|a, b| a.id.to_lowercase().cmp(&b.id.to_lowercase()));
+
+        let solution_exe = config.solution_exe.as_deref();
+        let failure_log = &cli_args.failure_log;
+
+        for tc in &config.toolchains {
+            let runner = ToolChainRunner::new(tc.clone(), cli_args.timeout);
+
+            let csv_filename = format!("toolchain_{}.csv", tc.name);
+            let mut csv_file = fs::File::create(&csv_filename).expect("cannot create CSV");
+
+            // Header row
+            let header: Vec<&str> = std::iter::once(tc.name.as_str())
+                .chain(attacking_pkgs.iter().map(|p| p.name.as_str()))
+                .collect();
+            let _ = writeln!(csv_file, "{}", header.join(","));
+
+            println!("\nToolchain: {}", tc.name);
+
+            for def_exe in &defending_exes {
+                def_exe.source_env();
+                let feedback_file = format!("{}-{}feedback.txt", def_exe.id, tc.name);
+                let mut row_cells: Vec<String> = vec![def_exe.id.clone()];
+
+                for a_pkg in &attacking_pkgs {
+                    print!("\n  {:<12} --> {:<12}", a_pkg.name, def_exe.id);
+                    let mut pass_count = 0;
+                    let mut test_count = 0;
+
+                    for a_spkg in &a_pkg.subpackages {
+                        for test in &a_spkg.tests {
+                            let result = runner.run(test, def_exe);
+                            if result.did_pass {
+                                print!("{}", ".".green());
+                                pass_count += 1;
+                                if solution_exe == Some(&def_exe.id) && !failure_log.is_empty() {
+                                    let mut f = OpenOptions::new()
+                                        .create(true)
+                                        .append(true)
+                                        .open("pass_log.txt")
+                                        .ok();
+                                    if let Some(ref mut f) = f {
+                                        let _ = writeln!(
+                                            f,
+                                            "{} {} {}",
+                                            tc.name, a_pkg.name, result.test.path
+                                        );
+                                    }
+                                }
+                            } else {
+                                print!("{}", ".".red());
+                                Self::log_failure_to_file(&feedback_file, &result);
+                                if solution_exe == Some(&def_exe.id) && !failure_log.is_empty() {
+                                    let mut f = OpenOptions::new()
+                                        .create(true)
+                                        .append(true)
+                                        .open(failure_log)
+                                        .ok();
+                                    if let Some(ref mut f) = f {
+                                        let _ = writeln!(
+                                            f,
+                                            "{} {} {}",
+                                            tc.name, a_pkg.name, result.test.path
+                                        );
+                                    }
+                                }
+                            }
+                            test_count += 1;
+                        }
+                    }
+
+                    row_cells.push(format!("{}/{}", pass_count, test_count));
+                }
+
+                let _ = writeln!(csv_file, "{}", row_cells.join(","));
+            }
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// MemoryCheckHarness
+// ---------------------------------------------------------------------------
+
+pub struct MemoryCheckHarness {
+    pub config: Config,
+    pub cli_args: RunnerArgs,
+    pub passed: bool,
+    pub leak_tests: Vec<TestResult>,
+    pub test_count: usize,
+}
+
+impl MemoryCheckHarness {
+    pub fn new(config: Config, cli_args: RunnerArgs) -> Self {
+        Self {
+            config,
+            cli_args,
+            passed: true,
+            leak_tests: Vec::new(),
+            test_count: 0,
+        }
+    }
+}
+
+impl TestHarness for MemoryCheckHarness {
+    fn config(&self) -> &Config { &self.config }
+    fn cli_args(&self) -> &RunnerArgs { &self.cli_args }
+    fn run_passed(&self) -> bool { self.passed }
+    fn set_run_passed(&mut self, val: bool) { self.passed = val; }
+
+    fn process_test_result(&mut self, result: TestResult, counters: &mut SubPackageCounters) {
+        self.test_count += 1;
+        counters.test_count += 1;
+
+        let test_name = result.test.file.clone();
+        if result.did_pass {
+            let tag = "[PASS] ";
+            log(0, 4, &format!("{}{}", tag.green(), test_name));
+            counters.pass_count += 1;
+        } else {
+            let tag = "[FAIL] ";
+            log(0, 4, &format!("{}{}", tag.red(), test_name));
+        }
+
+        if result.memory_leak {
+            self.leak_tests.push(result);
+        }
+    }
+
+    fn post_executable_hook(&mut self) {
+        log(0, 0, &format!("Leak Summary: ({} tests)", self.leak_tests.len()));
+        for result in &self.leak_tests {
+            log(
+                0,
+                4,
+                &format!("{}{}", "[LEAK] ".yellow(), result.test.file),
+            );
+        }
+        self.leak_tests.clear();
+        self.test_count = 0;
+    }
+}
+
+// ---------------------------------------------------------------------------
+// PerformanceTestingHarness
+// ---------------------------------------------------------------------------
+
+pub struct PerformanceTestingHarness {
+    pub config: Config,
+    pub cli_args: RunnerArgs,
+    pub passed: bool,
+    pub csv_cols: Vec<Vec<String>>,
+    pub cur_col: Vec<String>,
+    pub testfile_col: Vec<String>,
+    pub first_exec: bool,
+    pub failures: Vec<TestResult>,
+}
+
+impl PerformanceTestingHarness {
+    pub fn new(config: Config, cli_args: RunnerArgs) -> Self {
+        Self {
+            config,
+            cli_args,
+            passed: true,
+            csv_cols: Vec::new(),
+            cur_col: Vec::new(),
+            testfile_col: vec!["Test".to_string()],
+            first_exec: true,
+            failures: Vec::new(),
+        }
+    }
+}
+
+impl TestHarness for PerformanceTestingHarness {
+    fn config(&self) -> &Config { &self.config }
+    fn cli_args(&self) -> &RunnerArgs { &self.cli_args }
+    fn run_passed(&self) -> bool { self.passed }
+    fn set_run_passed(&mut self, val: bool) { self.passed = val; }
+
+    fn process_test_result(&mut self, result: TestResult, counters: &mut SubPackageCounters) {
+        if self.first_exec {
+            self.testfile_col.push(result.test.file.clone());
+        }
+
+        let test_name = result.test.file.clone();
+        if result.did_pass {
+            counters.pass_count += 1;
+            log(0, 4, &format!("{}{}", "[PASS] ".green(), test_name));
+            self.cur_col
+                .push(result.time.map(|t| format!("{:.4}", t)).unwrap_or_default());
+        } else {
+            self.cur_col
+                .push(format!("{:.4}", self.cli_args.timeout));
+            self.failures.push(result);
+        }
+        counters.test_count += 1;
+    }
+
+    fn pre_executable_hook(&mut self, exe_id: &str) {
+        self.cur_col.push(exe_id.to_string());
+    }
+
+    fn post_executable_hook(&mut self) {
+        if self.first_exec {
+            self.csv_cols.push(self.testfile_col.clone());
+            self.first_exec = false;
+        }
+        self.csv_cols.push(self.cur_col.clone());
+        self.cur_col.clear();
+    }
+
+    fn post_run_hook(&mut self) {
+        // Transpose columns into rows
+        let max_len = self.csv_cols.iter().map(|c| c.len()).max().unwrap_or(0);
+        let mut f = fs::File::create("perf.csv").expect("cannot create perf.csv");
+        for row_idx in 0..max_len {
+            let row: Vec<&str> = self
+                .csv_cols
+                .iter()
+                .map(|col| {
+                    col.get(row_idx).map(|s| s.as_str()).unwrap_or("")
+                })
+                .collect();
+            let _ = writeln!(f, "{}", row.join(","));
+        }
+    }
+}
diff --git a/dragon-runner-rs/src/lib.rs b/dragon-runner-rs/src/lib.rs
new file mode 100644
index 0000000..e23f7a6
--- /dev/null
+++ b/dragon-runner-rs/src/lib.rs
@@ -0,0 +1,9 @@
+pub mod cli;
+pub mod config;
+pub mod error;
+pub mod harness;
+pub mod log;
+pub mod runner;
+pub mod testfile;
+pub mod toolchain;
+pub mod util;
diff --git a/dragon-runner-rs/src/log.rs b/dragon-runner-rs/src/log.rs
new file mode 100644
index 0000000..b403475
--- /dev/null
+++ b/dragon-runner-rs/src/log.rs
@@ -0,0 +1,44 @@
+use std::env;
+use std::sync::OnceLock;
+
+static LOGGER: OnceLock<Logger> = OnceLock::new();
+
+struct Logger {
+    debug_level: u32,
+}
+
+impl Logger {
+    fn new() -> Self {
+        let debug_level = env::var("DRAGON_RUNNER_DEBUG")
+            .ok()
+            .and_then(|v| v.parse().ok())
+            .unwrap_or(0);
+        Self { debug_level }
+    }
+}
+
+fn get_logger() -> &'static Logger {
+    LOGGER.get_or_init(Logger::new)
+}
+
+/// Log a message at a given verbosity level with indentation.
+pub fn log(level: u32, indent: usize, msg: &str) {
+    let logger = get_logger();
+    if logger.debug_level >= level {
+        let prefix = " ".repeat(indent);
+        println!("{prefix}{msg}");
+    }
+}
+
+/// Log multiline content with indentation.
+pub fn log_multiline(content: &str, level: u32, indent: usize) {
+    for line in content.lines() {
+        log(level, indent, line.trim_end());
+    }
+}
+
+/// Log a delimiter line.
+pub fn log_delimiter(title: &str, level: u32, indent: usize) {
+    let delimiter = "-".repeat(20);
+    log(level, indent, &format!("{delimiter} {title} {delimiter}"));
+}
diff --git a/dragon-runner-rs/src/main.rs b/dragon-runner-rs/src/main.rs
new file mode 100644
index 0000000..8ada199
--- /dev/null
+++ b/dragon-runner-rs/src/main.rs
@@ -0,0 +1,85 @@
+use colored::Colorize;
+
+use dragon_runner_rs::cli::{parse_cli_args, Mode};
+use dragon_runner_rs::config::load_config;
+use dragon_runner_rs::harness::*;
+use dragon_runner_rs::log::log;
+
+fn main() {
+    let cli_args = parse_cli_args();
+    log(1, 0, &format!("{:?}", cli_args));
+
+    let config = match load_config(&cli_args.config_file, Some(&cli_args)) {
+        Some(c) => c,
+        None => {
+            log(0, 0, &format!("Could not open config file: {}", cli_args.config_file));
+            std::process::exit(1);
+        }
+    };
+
+    if config.error_collection.has_errors() {
+        log(
+            0,
+            0,
+            &format!(
+                "Found Config {} error(s):",
+                config.error_collection.len()
+            ),
+        );
+        log(
+            0,
+            0,
+            &format!("Parsed {} below:", cli_args.config_file),
+        );
+        // TODO: config Display impl for pretty printing
+        log(
+            0,
+            0,
+            &format!("{}", config.error_collection).red().to_string(),
+        );
+        std::process::exit(1);
+    }
+
+    if cli_args.verify {
+        // CCID verification
+        let mut input = String::new();
+        println!("Enter your CCID/Github Team Name: ");
+        std::io::stdin()
+            .read_line(&mut input)
+            .expect("Failed to read input");
+        let ccid = input.trim();
+
+        let found = config.packages.iter().any(|pkg| {
+            log(0, 2, &format!("Searching..  {}", pkg.name));
+            pkg.name == ccid
+        });
+
+        if !found {
+            println!("Could not find package named after CCID: {}", ccid);
+            std::process::exit(1);
+        }
+    }
+
+    config.log_test_info();
+
+    let success = match cli_args.mode {
+        Mode::Regular => {
+            let mut harness = RegularHarness::new(config, cli_args);
+            harness.run()
+        }
+        Mode::Tournament => {
+            let mut harness = TournamentHarness::new(config, cli_args);
+            harness.run()
+        }
+        Mode::Memcheck => {
+            let mut harness = MemoryCheckHarness::new(config, cli_args);
+            harness.run()
+        }
+        Mode::Perf => {
+            let mut harness = PerformanceTestingHarness::new(config, cli_args);
+            harness.run()
+        }
+    };
+
+    std::process::exit(if success { 0 } else { 1 });
+}
diff --git a/dragon-runner-rs/src/runner.rs b/dragon-runner-rs/src/runner.rs
new file mode 100644
index 0000000..95f1722
--- /dev/null
+++ b/dragon-runner-rs/src/runner.rs
@@ -0,0 +1,437 @@
+use std::env;
+use std::fs;
+use std::path::Path;
+use std::process;
+use std::time::{Duration, Instant};
+
+use regex::Regex;
+use wait_timeout::ChildExt;
+
+use crate::config::Executable;
+use crate::testfile::TestFile;
+use crate::toolchain::{Step, ToolChain};
+use crate::util::{file_to_bytes, make_tmp_file};
+
+/// Reserved exit code for valgrind leak detection.
+pub const VALGRIND_EXIT_CODE: i32 = 111;
+
+/// Magic parameter values substituted into toolchain step arguments.
+pub struct MagicParams {
+    pub exe_path: String,
+    pub input_file: String,
+    pub output_file: Option<String>,
+}
+
+/// A resolved command ready to execute.
+pub struct Command {
+    pub args: Vec<String>,
+    pub cmd: String,
+}
+
+impl Command {
+    pub fn new(args: Vec<String>) -> Self {
+        let cmd = args.first().cloned().unwrap_or_default();
+        Self { args, cmd }
+    }
+}
+
+/// Result of executing a single subprocess.
+pub struct CommandResult {
+    pub cmd: String,
+    pub exit_status: i32,
+    pub stdout: Vec<u8>,
+    pub stderr: Vec<u8>,
+    pub time: f64,
+    pub timed_out: bool,
+}
+
+impl CommandResult {
+    pub fn new(cmd: &str) -> Self {
+        Self {
+            cmd: cmd.to_string(),
+            exit_status: 0,
+            stdout: Vec::new(),
+            stderr: Vec::new(),
+            time: 0.0,
+            timed_out: false,
+        }
+    }
+}
+
+/// Result of running a complete test case through a toolchain.
+pub struct TestResult {
+    pub test: TestFile,
+    pub did_pass: bool,
+    pub did_timeout: bool,
+    pub error_test: bool,
+    pub memory_leak: bool,
+    pub command_history: Vec<CommandResult>,
+    pub gen_output: Option<Vec<u8>>,
+    pub time: Option<f64>,
+    pub failing_step: Option<String>,
+}
+
+impl TestResult {
+    pub fn new(test: TestFile) -> Self {
+        Self {
+            test,
+            did_pass: false,
+            did_timeout: false,
+            error_test: false,
+            memory_leak: false,
+            command_history: Vec::new(),
+            gen_output: None,
+            time: None,
+            failing_step: None,
+        }
+    }
+}
+
+/// Runs a toolchain against a test file and executable.
+pub struct ToolChainRunner {
+    pub tc: ToolChain,
+    pub timeout: f64,
+    reserved_exit_codes: Vec<i32>,
+    runtime_errors: Vec<&'static str>,
+}
+
+impl ToolChainRunner {
+    pub fn new(tc: ToolChain, timeout: f64) -> Self {
+        Self {
+            tc,
+            timeout,
+            reserved_exit_codes: vec![VALGRIND_EXIT_CODE],
+            runtime_errors: vec!["SizeError", "IndexError", "MathError", "StrideError"],
+        }
+    }
+
+    /// Run each step of the toolchain for a given test and executable.
+    pub fn run(&self, test: &TestFile, exe: &Executable) -> TestResult {
+        let mut input_file = test.path.clone();
+        let expected = test.get_expected_out().to_vec();
+        let mut tr = TestResult::new(test.clone());
+        let tc_len = self.tc.len();
+
+        for (index, step) in self.tc.iter().enumerate() {
+            let last_step = index == tc_len - 1;
+            let input_stream = if step.uses_ins {
+                test.get_input_stream().to_vec()
+            } else {
+                Vec::new()
+            };
+
+            let output_file = self.resolve_output_file(step);
+            let magic = MagicParams {
+                exe_path: exe.exe_path.clone(),
+                input_file: input_file.clone(),
+                output_file: output_file.clone(),
+            };
+
+            let command = self.resolve_command(step, &magic);
+            let cr = self.run_command(&command, &input_stream);
+
+            // Check timeout
+            if cr.timed_out {
+                tr.did_pass = false;
+                tr.did_timeout = true;
+                tr.failing_step = Some(step.name.clone());
+                tr.time = Some(self.timeout);
+                tr.command_history.push(cr);
+                return tr;
+            }
+
+            // Check if OS failed to exec
+            if cr.exit_status == -1 {
+                tr.did_pass = false;
+                tr.command_history.push(cr);
+                return tr;
+            }
+
+            let stdout = cr.stdout.clone();
+            let stderr = cr.stderr.clone();
+            let step_time = (cr.time * 10000.0).round() / 10000.0;
+
+            // Check reserved exit codes (e.g., valgrind)
+            if self.reserved_exit_codes.contains(&cr.exit_status) {
+                if cr.exit_status == VALGRIND_EXIT_CODE {
+                    tr.memory_leak = true;
+                }
+            }
+
+            if cr.exit_status != 0
+                && !self.reserved_exit_codes.contains(&cr.exit_status)
+            {
+                tr.gen_output = Some(stderr.clone());
+                tr.failing_step = Some(step.name.clone());
+                tr.error_test = true;
+
+                if step.allow_error {
+                    self.handle_error_test(&mut tr, &stderr, &expected);
+                    tr.command_history.push(cr);
+                    return tr;
+                } else {
+                    tr.did_pass = false;
+                    tr.command_history.push(cr);
+                    return tr;
+                }
+            } else if last_step {
+                let final_stdout = if let Some(ref out_path) = output_file {
+                    if !Path::new(out_path).exists() {
+                        tr.command_history.push(cr);
+                        tr.did_pass = false;
+                        return tr;
+                    }
+                    file_to_bytes(out_path).unwrap_or_default()
+                } else {
+                    stdout
+                };
+
+                tr.time = Some(step_time);
+                tr.gen_output = Some(final_stdout.clone());
+                tr.did_pass = precise_diff(&final_stdout, &expected).is_empty();
+                tr.command_history.push(cr);
+                return tr;
+            } else {
+                // Set up next step's input
+                input_file = output_file.unwrap_or_else(|| {
+                    make_tmp_file(&stdout).unwrap_or_default()
+                });
+                tr.command_history.push(cr);
+            }
+        }
+
+        // Unreachable for well-defined toolchains
+        panic!("Toolchain reached undefined conditions during execution.");
+    }
+
+    fn run_command(&self, command: &Command, stdin: &[u8]) -> CommandResult {
+        let mut cr = CommandResult::new(&command.cmd);
+        let start = Instant::now();
+
+        // Use subprocess::run with timeout, mirroring the Python approach
+        let result = process::Command::new(&command.args[0])
+            .args(&command.args[1..])
+            .stdin(process::Stdio::piped())
+            .stdout(process::Stdio::piped())
+            .stderr(process::Stdio::piped())
+            .spawn();
+
+        match result {
+            Ok(mut child) => {
+                // Write stdin then close it
+                if let Some(mut child_stdin) = child.stdin.take() {
+                    use std::io::Write;
+                    let _ = child_stdin.write_all(stdin);
+                }
+
+                let timeout_dur = Duration::from_secs_f64(self.timeout);
+                match child.wait_timeout(timeout_dur) {
+                    Ok(Some(status)) => {
+                        // Process exited within timeout — read remaining output
+                        cr.time = start.elapsed().as_secs_f64();
+                        cr.exit_status = status.code().unwrap_or(1);
+
+                        // Read stdout and stderr from the pipes
+                        use std::io::Read;
+                        if let Some(mut out) = child.stdout.take() {
+                            let _ = out.read_to_end(&mut cr.stdout);
+                        }
+                        if let Some(mut err) = child.stderr.take() {
+                            let _ = err.read_to_end(&mut cr.stderr);
+                        }
+                    }
+                    Ok(None) => {
+                        // Still running — timeout
+                        let _ = child.kill();
+                        let _ = child.wait();
+                        cr.timed_out = true;
+                        cr.time = self.timeout;
+                        cr.exit_status = 255;
+                    }
+                    Err(_) => {
+                        cr.exit_status = 1;
+                        cr.time = start.elapsed().as_secs_f64();
+                    }
+                }
+            }
+            Err(_) => {
+                cr.exit_status = -1;
+                cr.time = start.elapsed().as_secs_f64();
+            }
+        }
+
+        cr
+    }
+
+    fn resolve_output_file(&self, step: &Step) -> Option<String> {
+        step.output.as_ref().map(|output| {
+            let cwd = env::current_dir()
+                .unwrap_or_default()
+                .to_string_lossy()
+                .into_owned();
+            if Path::new(output).is_absolute() {
+                output.clone()
+            } else {
+                Path::new(&cwd).join(output).to_string_lossy().into_owned()
+            }
+        })
+    }
+
+    fn resolve_command(&self, step: &Step, params: &MagicParams) -> Command {
+        let mut args = vec![step.exe_path.clone()];
+        args.extend(step.arguments.iter().cloned());
+        let mut command = Command::new(args);
+        self.replace_magic_args(&mut command, params);
+        self.replace_env_vars(&mut command);
+        // Make exe path absolute if relative
+        if !command.args.is_empty() && !Path::new(&command.args[0]).is_absolute() {
+            if let Ok(abs) = fs::canonicalize(&command.args[0]) {
+                command.args[0] = abs.to_string_lossy().into_owned();
+            } else if let Ok(cwd) = env::current_dir() {
+                let abs = cwd.join(&command.args[0]);
+                command.args[0] = abs.to_string_lossy().into_owned();
+            }
+        }
+        command.cmd = command.args[0].clone();
+        command
+    }
+
+    fn replace_magic_args(&self, command: &mut Command, params: &MagicParams) {
+        for arg in command.args.iter_mut() {
+            if arg.contains("$EXE") {
+                *arg = arg.replace("$EXE", &params.exe_path);
+            } else if arg.contains("$INPUT") && !params.input_file.is_empty() {
+                *arg = arg.replace("$INPUT", &params.input_file);
+            } else if arg.contains("$OUTPUT") {
+                if let Some(ref out) = params.output_file {
+                    *arg = arg.replace("$OUTPUT", out);
+                }
+            }
+        }
+        if let Some(first) = command.args.first() {
+            command.cmd = first.clone();
+        }
+    }
+
+    fn replace_env_vars(&self, command: &mut Command) {
+        let re = Regex::new(r"\$(\w+)|\$\{(\w+)\}").unwrap();
+        for arg in command.args.iter_mut() {
+            let original = arg.clone();
+            for caps in re.captures_iter(&original) {
+                let var_name = caps
+                    .get(1)
+                    .or_else(|| caps.get(2))
+                    .map(|m| m.as_str())
+                    .unwrap_or("");
+                if let Ok(val) = env::var(var_name) {
+                    *arg = arg
+                        .replace(&format!("${var_name}"), &val)
+                        .replace(&format!("${{{var_name}}}"), &val);
+                }
+            }
+        }
+    }
+
+    fn handle_error_test(&self, tr: &mut TestResult, produced: &[u8], expected: &[u8]) {
+        let produced_str = match std::str::from_utf8(produced) {
+            Ok(s) => s.trim().to_string(),
+            Err(_) => {
+                tr.did_pass = false;
+                return;
+            }
+        };
+        let expected_str = match std::str::from_utf8(expected) {
+            Ok(s) => s.trim().to_string(),
+            Err(_) => {
+                tr.did_pass = false;
+                return;
+            }
+        };
+
+        if produced_str.is_empty() || expected_str.is_empty() {
+            tr.did_pass = false;
+            return;
+        }
+
+        let rt_error = self
+            .runtime_errors
+            .iter()
+            .find(|e| expected_str.contains(**e))
+            .copied();
+        let did_raise_rt = self
+            .runtime_errors
+            .iter()
+            .any(|e| produced_str.contains(e));
+
+        if did_raise_rt {
+            if let Some(rt_err) = rt_error {
+                let pattern = format!(r"{}(\s+on\s+Line\s+\d+)?(:.+)?", rt_err);
+                let re = Regex::new(&pattern).unwrap();
+                tr.did_pass = re.is_match(&produced_str) && re.is_match(&expected_str);
+            } else {
+                tr.did_pass = false;
+            }
+        } else {
+            let error_re = Regex::new(r"(?i)(\w+Error)").unwrap();
+            let line_re = Regex::new(r"(?i)on\s+Line\s+(\d+)").unwrap();
+
+            let prod_error = error_re.captures(&produced_str);
+            let exp_error = error_re.captures(&expected_str);
+            let prod_line = line_re.captures(&produced_str);
+            let exp_line = line_re.captures(&expected_str);
+
+            // MainError hack
+            if let (Some(ref pe), Some(ref ee)) = (&prod_error, &exp_error) {
+                if pe.get(1).map(|m| m.as_str()) == Some("MainError")
+                    && ee.get(1).map(|m| m.as_str()) == Some("MainError")
+                {
+                    tr.did_pass = true;
+                    return;
+                }
+            }
+
+            if prod_error.is_some() && exp_error.is_some() && prod_line.is_some() && exp_line.is_some()
+            {
+                tr.did_pass = prod_line.unwrap().get(1).map(|m| m.as_str())
+                    == exp_line.unwrap().get(1).map(|m| m.as_str());
+            } else {
+                tr.did_pass = false;
+            }
+        }
+    }
+}
+
+/// Byte-level diff between two byte slices.
+pub fn diff_bytes(s1: &[u8], s2: &[u8]) -> String {
+    let mut result = String::new();
+    let mut i = 0;
+    let mut j = 0;
+    while i < s1.len() && j < s2.len() {
+        if s1[i] != s2[j] {
+            result.push_str(&format!("-{}", s1[i]));
+            result.push_str(&format!("+{}", s2[j]));
+        } else {
+            result.push_str(&format!(" {}", s1[i]));
+        }
+        i += 1;
+        j += 1;
+    }
+    while i < s1.len() {
+        result.push_str(&format!("-{}", s1[i]));
+        i += 1;
+    }
+    while j < s2.len() {
+        result.push_str(&format!("+{}", s2[j]));
+        j += 1;
+    }
+    result
+}
+
+/// Return a diff string if produced != expected, empty string if equal.
+pub fn precise_diff(produced: &[u8], expected: &[u8]) -> String {
+    if produced == expected {
+        String::new()
+    } else {
+        diff_bytes(produced, expected)
+    }
+}
diff --git a/dragon-runner-rs/src/testfile.rs b/dragon-runner-rs/src/testfile.rs
new file mode 100644
index 0000000..decf8a5
--- /dev/null
+++ b/dragon-runner-rs/src/testfile.rs
@@ -0,0 +1,244 @@
+use std::fs;
+use std::io::{self, BufRead};
+use std::path::Path;
+
+use crate::error::{Error, ErrorCollection, Verifiable};
+use crate::util::{file_to_bytes, str_to_bytes};
+
+/// Represents a single test case file with parsed directives.
+#[derive(Debug, Clone)]
+pub struct TestFile {
+    pub path: String,
+    pub stem: String,
+    pub extension: String,
+    pub file: String,
+    pub comment_syntax: String,
+    pub expected_out: DirectiveResult,
+    pub input_stream: DirectiveResult,
+}
+
+/// Result of parsing a directive — either successfully read bytes, or an error message.
+#[derive(Debug, Clone)]
+pub enum DirectiveResult {
+    Ok(Vec<u8>),
+    Err(String),
+}
+
+impl DirectiveResult {
+    pub fn as_bytes(&self) -> &[u8] {
+        match self {
+            DirectiveResult::Ok(bytes) => bytes,
+            DirectiveResult::Err(_) => b"",
+        }
+    }
+
+    pub fn is_err(&self) -> bool {
+        matches!(self, DirectiveResult::Err(_))
+    }
+}
+
+impl TestFile {
+    pub fn new(test_path: &str) -> Self {
+        let path_obj = Path::new(test_path);
+        let stem = path_obj
+            .file_stem()
+            .unwrap_or_default()
+            .to_string_lossy()
+            .into_owned();
+        let extension = path_obj
+            .extension()
+            .map(|e| format!(".{}", e.to_string_lossy()))
+            .unwrap_or_default();
+        let file = format!("{}{}", stem, extension);
+        let comment_syntax = "//".to_string();
+
+        let expected_out = Self::get_content_static(
+            test_path,
+            &comment_syntax,
+            "CHECK:",
+            "CHECK_FILE:",
+        );
+        let input_stream = Self::get_content_static(
+            test_path,
+            &comment_syntax,
+            "INPUT:",
+            "INPUT_FILE:",
+        );
+
+        Self {
+            path: test_path.to_string(),
+            stem,
+            extension,
+            file,
+            comment_syntax,
+            expected_out,
+            input_stream,
+        }
+    }
+
+    pub fn get_expected_out(&self) -> &[u8] {
+        self.expected_out.as_bytes()
+    }
+
+    pub fn get_input_stream(&self) -> &[u8] {
+        self.input_stream.as_bytes()
+    }
+
+    /// Generic method to get content based on inline and file directives.
+    fn get_content_static(
+        test_path: &str,
+        comment_syntax: &str,
+        inline_directive: &str,
+        file_directive: &str,
+    ) -> DirectiveResult {
+        let inline_contents = Self::get_directive_contents(test_path, comment_syntax, inline_directive);
+        let file_contents = Self::get_directive_contents(test_path, comment_syntax, file_directive);
+
+        match (&inline_contents, &file_contents) {
+            // Both directives present — conflict
+            (Some(Ok(_)), Some(Ok(_))) => DirectiveResult::Err(format!(
+                "Directive Conflict for test {}: Supplied both {} and {}",
+                Path::new(test_path)
+                    .file_name()
+                    .unwrap_or_default()
+                    .to_string_lossy(),
+                inline_directive,
+                file_directive,
+            )),
+
+            // Only inline directive
+            (Some(Ok(bytes)), _) => DirectiveResult::Ok(bytes.clone()),
+            (Some(Err(e)), _) => DirectiveResult::Err(e.clone()),
+
+            // Only file directive — read referenced file
+            (None, Some(Ok(file_ref_bytes))) => {
+                let file_str = String::from_utf8_lossy(file_ref_bytes).trim().to_string();
+                let parent = Path::new(test_path).parent().unwrap_or(Path::new(""));
+                let full_path = parent.join(&file_str);
+
+                if !full_path.exists() {
+                    return DirectiveResult::Err(format!(
+                        "Failed to locate path supplied to {}\n\tTest:{}\n\tPath:{}\n",
+                        file_directive,
+                        test_path,
+                        full_path.display(),
+                    ));
+                }
+
+                match file_to_bytes(&full_path.to_string_lossy()) {
+                    Some(bytes) => DirectiveResult::Ok(bytes),
+                    None => DirectiveResult::Err(format!(
+                        "Failed to convert file {} to bytes",
+                        full_path.display()
+                    )),
+                }
+            }
+            (None, Some(Err(e))) => DirectiveResult::Err(e.clone()),
+
+            // Neither directive — empty
+            (None, None) => DirectiveResult::Ok(Vec::new()),
+        }
+    }
+
+    /// Parse directive contents from the test file.
+    /// Returns None if no directive found, Some(Ok(bytes)) for content,
+    /// or Some(Err(msg)) on parse error.
+    fn get_directive_contents(
+        test_path: &str,
+        comment_syntax: &str,
+        directive_prefix: &str,
+    ) -> Option<Result<Vec<u8>, String>> {
+        let file = match fs::File::open(test_path) {
+            Ok(f) => f,
+            Err(_) => {
+                return Some(Err(format!(
+                    "Unkown error occured while parsing testfile: {}",
+                    test_path
+                )));
+            }
+        };
+
+        let reader = io::BufReader::new(file);
+        let mut contents: Vec<u8> = Vec::new();
+        let mut first_match = true;
+
+        for line_result in reader.lines() {
+            let line = match line_result {
+                Ok(l) => l,
+                Err(_) => {
+                    return Some(Err(format!(
+                        "Unkown error occured while parsing testfile: {}",
+                        test_path
+                    )));
+                }
+            };
+
+            let comment_index = match line.find(comment_syntax) {
+                Some(i) => i,
+                None => continue,
+            };
+            let directive_index = match line.find(directive_prefix) {
+                Some(i) => i,
+                None => continue,
+            };
+
+            // Comment must appear before directive
+            if comment_index > directive_index {
+                continue;
+            }
+
+            // Extract the right-hand side after the directive
+            let rhs = match line.split_once(directive_prefix) {
+                Some((_, rhs)) => rhs,
+                None => continue,
+            };
+
+            let rhs_bytes = str_to_bytes(rhs, true);
+
+            if !first_match {
+                contents.push(b'\n');
+            }
+            contents.extend_from_slice(&rhs_bytes);
+            first_match = false;
+        }
+
+        if first_match {
+            // No matches found
+            None
+        } else {
+            Some(Ok(contents))
+        }
+    }
+
+    /// Check if a path is a valid test file (not hidden, not .out/.ins extension).
+    pub fn is_test(test_path: &Path) -> bool {
+        if !test_path.is_file() {
+            return false;
+        }
+        let name = test_path
+            .file_name()
+            .unwrap_or_default()
+            .to_string_lossy();
+        if name.starts_with('.') {
+            return false;
+        }
+        let ext = test_path
+            .extension()
+            .unwrap_or_default()
+            .to_string_lossy();
+        ext != "out" && ext != "ins"
+    }
+}
+
+impl Verifiable for TestFile {
+    fn verify(&self) -> ErrorCollection {
+        let mut ec = ErrorCollection::new();
+        if let DirectiveResult::Err(msg) = &self.expected_out {
+            ec.add(Error::TestFile(msg.clone()));
+        }
+        if let DirectiveResult::Err(msg) = &self.input_stream {
+            ec.add(Error::TestFile(msg.clone()));
+        }
+        ec
+    }
+}
diff --git a/dragon-runner-rs/src/toolchain.rs b/dragon-runner-rs/src/toolchain.rs
new file mode 100644
index 0000000..ea088f0
--- /dev/null
+++ b/dragon-runner-rs/src/toolchain.rs
@@ -0,0 +1,116 @@
+use std::path::Path;
+
+use crate::error::{Error, ErrorCollection, Verifiable};
+
+/// A single step in a toolchain (e.g., compile, link, run).
+#[derive(Debug, Clone)]
+pub struct Step {
+    pub name: String,
+    pub exe_path: String,
+    pub arguments: Vec<String>,
+    pub output: Option<String>,
+    pub allow_error: bool,
+    pub uses_ins: bool,
+    pub uses_runtime: bool,
+}
+
+impl Step {
+    pub fn from_json(data: &serde_json::Value) -> Self {
+        Self {
+            name: data
+                .get("stepName")
+                .and_then(|v| v.as_str())
+                .unwrap_or("")
+                .to_string(),
+            exe_path: data
+                .get("executablePath")
+                .and_then(|v| v.as_str())
+                .unwrap_or("")
+                .to_string(),
+            arguments: data
+                .get("arguments")
+                .and_then(|v| v.as_array())
+                .map(|arr| {
+                    arr.iter()
+                        .filter_map(|v| v.as_str().map(String::from))
+                        .collect()
+                })
+                .unwrap_or_default(),
+            output: data
+                .get("output")
+                .and_then(|v| v.as_str())
+                .map(String::from),
+            allow_error: data
+                .get("allowError")
+                .and_then(|v| v.as_bool())
+                .unwrap_or(false),
+            uses_ins: data
+                .get("usesInStr")
+                .and_then(|v| v.as_bool())
+                .unwrap_or(false),
+            uses_runtime: data
+                .get("usesRuntime")
+                .and_then(|v| v.as_bool())
+                .unwrap_or(false),
+        }
+    }
+}
+
+impl Verifiable for Step {
+    fn verify(&self) -> ErrorCollection {
+        let mut errors = ErrorCollection::new();
+        if self.name.is_empty() {
+            errors.add(Error::Config(format!(
+                "Missing required filed 'stepName' in Step {}",
+                self.name
+            )));
+        }
+        if self.exe_path.is_empty() {
+            errors.add(Error::Config(format!(
+                "Missing required field 'exe_path' in Step: {}",
+                self.name
+            )));
+        } else if !self.exe_path.starts_with('$') && !Path::new(&self.exe_path).exists() {
+            errors.add(Error::Config(format!(
+                "Cannot find exe_path '{}' in Step: {}",
+                self.exe_path, self.name
+            )));
+        }
+        errors
+    }
+}
+
+/// An ordered sequence of Steps that form a compilation/execution pipeline.
+#[derive(Debug, Clone)]
+pub struct ToolChain {
+    pub name: String,
+    pub steps: Vec<Step>,
+}
+
+impl ToolChain {
+    pub fn new(name: &str, steps_data: &[serde_json::Value]) -> Self {
+        let steps = steps_data.iter().map(Step::from_json).collect();
+        Self {
+            name: name.to_string(),
+            steps,
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        self.steps.len()
+    }
+
+    pub fn iter(&self) -> std::slice::Iter<'_, Step> {
+        self.steps.iter()
+    }
+}
+
+impl Verifiable for ToolChain {
+    fn verify(&self) -> ErrorCollection {
+        let mut errors = ErrorCollection::new();
+        for step in &self.steps {
+            errors.extend(&step.verify());
+        }
+        errors
+    }
+}
diff --git a/dragon-runner-rs/src/util.rs b/dragon-runner-rs/src/util.rs
new file mode 100644
index 0000000..4520279
--- /dev/null
+++ b/dragon-runner-rs/src/util.rs
@@ -0,0 +1,71 @@
+use std::path::{Path, PathBuf};
+use std::fs;
+use std::io::Write;
+use std::os::unix::fs::PermissionsExt;
+
+/// Resolve a relative path against an absolute path.
+/// If abs_path points to a file, resolve relative to its parent directory.
+pub fn resolve_relative(relative_dir: &str, abs_path: &str) -> PathBuf {
+    let abs = Path::new(abs_path);
+    let base = if abs.is_file() {
+        abs.parent().unwrap_or(abs)
+    } else {
+        abs
+    };
+    base.join(relative_dir)
+}
+
+/// Convert a string to bytes, optionally chopping trailing newline.
+pub fn str_to_bytes(s: &str, chop_newline: bool) -> Vec<u8> {
+    let s = if chop_newline && s.ends_with('\n') {
+        &s[..s.len() - 1]
+    } else {
+        s
+    };
+    s.as_bytes().to_vec()
+}
+
+/// Read a file as bytes, returning None on error.
+pub fn file_to_bytes(path: &str) -> Option<Vec<u8>> {
+    fs::read(path).ok()
+}
+
+/// Read a file as a UTF-8 string, returning None on error.
+pub fn file_to_str(path: &str) -> Option<String> {
+    fs::read_to_string(path).ok()
+}
+
+/// Create a temporary file with the given content and execute permissions.
+/// Returns the path to the temp file, or None on error.
+pub fn make_tmp_file(content: &[u8]) -> Option<String> {
+    let mut tmp = tempfile::NamedTempFile::new().ok()?;
+    tmp.write_all(content).ok()?;
+    let path = tmp.into_temp_path();
+    // Set execute permissions
+    let perms = fs::Permissions::from_mode(0o700);
+    fs::set_permissions(&path, perms).ok()?;
+    let path_str = path.to_string_lossy().to_string();
+    // Leak the temp path so it persists (matches Python behavior)
+    std::mem::forget(path);
+    path_str.into()
+}
+
+/// Truncate bytes in the middle if they exceed max_bytes.
+pub fn truncated_bytes(data: &[u8], max_bytes: usize) -> Vec<u8> {
+    if data.len() <= max_bytes {
+        return data.to_vec();
+    }
+    let omission = b"\n{{ omitted for brevity }}\n";
+    let available = max_bytes.saturating_sub(omission.len());
+    let half = available / 2;
+    let mut result = Vec::with_capacity(max_bytes);
+    result.extend_from_slice(&data[..half]);
+    result.extend_from_slice(omission);
+    result.extend_from_slice(&data[data.len() - half..]);
+    result
+}
+
+/// Convert bytes to string with lossy UTF-8 fallback.
+pub fn bytes_to_str(data: &[u8]) -> String {
+    String::from_utf8_lossy(data).into_owned()
+}
diff --git a/dragon-runner-rs/tests/test_config.rs b/dragon-runner-rs/tests/test_config.rs
new file mode 100644
index 0000000..9ecc4f5
--- /dev/null
+++ b/dragon-runner-rs/tests/test_config.rs
@@ -0,0 +1,107 @@
+use std::path::{Path, PathBuf};
+
+use dragon_runner_rs::config::load_config;
+
+fn configs_dir() -> PathBuf {
+    // tests/configs/ lives at the repo root level, two levels up from dragon-runner-rs/tests/
+    let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
+    manifest.parent().unwrap().join("tests").join("configs")
+}
+
+fn config_path(name: &str) -> String {
+    configs_dir().join(name).to_string_lossy().into_owned()
+}
+
+#[test]
+fn test_valid_config() {
+    let path = config_path("gccPassConfig.json");
+    let config = load_config(&path, None).expect("config should load");
+
+    assert!(
+        Path::new(&config.test_dir).exists(),
+        "test_dir should exist: {}",
+        config.test_dir
+    );
+    assert!(!config.packages.is_empty(), "should have packages");
+
+    for pkg in &config.packages {
+        assert!(!pkg.subpackages.is_empty(), "package {} should have subpackages", pkg.name);
+        for spkg in &pkg.subpackages {
+            assert!(!spkg.tests.is_empty(), "subpackage {} should have tests", spkg.name);
+        }
+    }
+
+    assert!(
+        !config.error_collection.has_errors(),
+        "should have no errors, got: {}",
+        config.error_collection
+    );
+}
+
+#[test]
+fn test_package_filter() {
+    let path = config_path("gccPassConfig.json");
+    let config = load_config(&path, None).expect("config should load");
+
+    // Collect all subpackage paths
+    let all_subpackages: Vec<&str> = config
+        .packages
+        .iter()
+        .flat_map(|pkg| pkg.subpackages.iter())
+        .map(|spkg| spkg.path.as_str())
+        .collect();
+
+    assert!(!all_subpackages.is_empty(), "should have subpackages");
+
+    // Test filter pattern "*ErrorPass*"
+    let filter_pattern = "*ErrorPass*";
+    let filtered: Vec<&&str> = all_subpackages
+        .iter()
+        .filter(|path| {
+            glob::Pattern::new(&filter_pattern.to_lowercase())
+                .map(|pat| pat.matches(&path.to_lowercase()))
+                .unwrap_or(false)
+        })
+        .collect();
+
+    assert!(!filtered.is_empty(), "filter should match some subpackages");
+
+    for path in &filtered {
+        assert!(
+            path.to_lowercase().contains("errorpass"),
+            "filtered path should contain 'errorpass': {}",
+            path
+        );
+    }
+}
+
+#[test]
+fn test_invalid_dir_config() {
+    let path = config_path("invalidDirConfig.json");
+    let config = load_config(&path, None).expect("config should load");
+
+    assert!(
+        config.error_collection.has_errors(),
+        "should have errors for invalid dir"
+    );
+    assert!(
+        !Path::new(&config.test_dir).exists(),
+        "test_dir should not exist"
+    );
+}
+
+#[test]
+fn test_invalid_exe_config() {
+    let path = config_path("invalidExeConfig.json");
+    let config = load_config(&path, None).expect("config should load");
+
+    assert!(
+        config.error_collection.has_errors(),
+        "should have errors for invalid exe"
+    );
+    assert_eq!(config.executables.len(), 1);
+    assert!(
+        !Path::new(&config.executables[0].exe_path).exists(),
+        "exe_path should not exist"
+    );
+}
diff --git a/dragon-runner-rs/tests/test_grader.rs b/dragon-runner-rs/tests/test_grader.rs
new file mode 100644
index 0000000..64de1cd
--- /dev/null
+++ b/dragon-runner-rs/tests/test_grader.rs
@@ -0,0 +1,42 @@
+use std::path::Path;
+
+use dragon_runner_rs::cli::{Mode, RunnerArgs};
+use dragon_runner_rs::config::load_config;
+use dragon_runner_rs::harness::{TestHarness, TournamentHarness};
+
+fn configs_dir() -> std::path::PathBuf {
+    let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
+    manifest.parent().unwrap().join("tests").join("configs")
+}
+
+fn config_path(name: &str) -> String {
+    configs_dir().join(name).to_string_lossy().into_owned()
+}
+
+#[test]
+fn test_grader_config() {
+    let path = config_path("ConfigGrade.json");
+    let config = load_config(&path, None).expect("config should load");
+
+    let failure_log = "Failures_rs.txt";
+    // Clean up from previous runs
+    let _ = std::fs::remove_file(failure_log);
+
+    let args = RunnerArgs {
+        mode: Mode::Tournament,
+        failure_log: failure_log.to_string(),
+        timeout: 2.0,
+        ..Default::default()
+    };
+
+    let mut harness = TournamentHarness::new(config, args);
+    harness.run();
+
+    assert!(
+        Path::new(failure_log).exists(),
+        "failure log should have been created"
+    );
+
+    // Clean up
+    let _ = std::fs::remove_file(failure_log);
+}
diff --git a/dragon-runner-rs/tests/test_runner.rs b/dragon-runner-rs/tests/test_runner.rs
new file mode 100644
index 0000000..92256c4
--- /dev/null
+++ b/dragon-runner-rs/tests/test_runner.rs
@@ -0,0 +1,64 @@
+use std::path::Path;
+
+use dragon_runner_rs::config::{load_config, Config};
+use dragon_runner_rs::runner::ToolChainRunner;
+
+fn configs_dir() -> std::path::PathBuf {
+    let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
+    manifest.parent().unwrap().join("tests").join("configs")
+}
+
+fn config_path(name: &str) -> String {
+    configs_dir().join(name).to_string_lossy().into_owned()
+}
+
+fn create_config(name: &str) -> Config {
+    let path = config_path(name);
+    load_config(&path, None).expect("config should load")
+}
+
+/// Run all tests for a config and assert they match expected_result.
+fn run_tests_for_config(config: &Config, expected_result: bool) {
+    for exe in &config.executables {
+        exe.source_env();
+        for tc in &config.toolchains {
+            let runner = ToolChainRunner::new(tc.clone(), 10.0);
+            for pkg in &config.packages {
+                for spkg in &pkg.subpackages {
+                    for test in &spkg.tests {
+                        let result = runner.run(test, exe);
+                        assert_eq!(
+                            result.did_pass, expected_result,
+                            "Test {} expected {} but got {}",
+                            test.file,
+                            if expected_result { "PASS" } else { "FAIL" },
+                            if result.did_pass { "PASS" } else { "FAIL" },
+                        );
+                    }
+                }
+            }
+        }
+    }
+}
+
+#[test]
+fn test_gcc_pass() {
+    let config = create_config("gccPassConfig.json");
+    assert!(
+        !config.error_collection.has_errors(),
+        "config errors: {}",
+        config.error_collection
+    );
+    run_tests_for_config(&config, true);
+}
+
+#[test]
+fn test_gcc_fail() {
+    let config = create_config("gccFailConfig.json");
+    assert!(
+        !config.error_collection.has_errors(),
+        "config errors: {}",
+        config.error_collection
+    );
+    run_tests_for_config(&config, false);
+}
diff --git a/dragon-runner-rs/tests/test_runtime.rs b/dragon-runner-rs/tests/test_runtime.rs
new file mode 100644
index 0000000..89b8f12
--- /dev/null
+++ b/dragon-runner-rs/tests/test_runtime.rs
@@ -0,0 +1,76 @@
+use std::path::Path;
+use std::process;
+
+use dragon_runner_rs::config::{load_config, Config};
+use dragon_runner_rs::runner::ToolChainRunner;
+
+fn configs_dir() -> std::path::PathBuf {
+    let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
+    manifest.parent().unwrap().join("tests").join("configs")
+}
+
+fn config_path(name: &str) -> String {
+    configs_dir().join(name).to_string_lossy().into_owned()
+}
+
+fn tests_dir() -> std::path::PathBuf {
+    let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
+    manifest.parent().unwrap().join("tests")
+}
+
+fn run_tests_for_config(config: &Config, expected_result: bool) {
+    for exe in &config.executables {
+        exe.source_env();
+        for tc in &config.toolchains {
+            let runner = ToolChainRunner::new(tc.clone(), 3.0);
+            for pkg in &config.packages {
+                for spkg in &pkg.subpackages {
+                    for test in &spkg.tests {
+                        let result = runner.run(test, exe);
+                        assert_eq!(
+                            result.did_pass, expected_result,
+                            "Test {} expected {} but got {}",
+                            test.file,
+                            if expected_result { "PASS" } else { "FAIL" },
+                            if result.did_pass { "PASS" } else { "FAIL" },
+                        );
+                    }
+                }
+            }
+        }
+    }
+}
+
+#[test]
+fn test_gcc_toolchain_success() {
+    let test_dir = tests_dir();
+    let compile_script = test_dir.join("scripts/test-scripts/compile_lib.py");
+    let lib_src_dir = test_dir.join("lib/src");
+    let lib_out_dir = test_dir.join("lib");
+
+    assert!(compile_script.exists(), "missing compile_lib.py");
+
+    let expected_lib = test_dir.join("lib/libfib.so");
+    if !expected_lib.exists() {
+        let status = process::Command::new("python3")
+            .args([
+                compile_script.to_str().unwrap(),
+                lib_src_dir.to_str().unwrap(),
+                lib_out_dir.to_str().unwrap(),
+            ])
+            .status()
+            .expect("failed to run compile_lib.py");
+        assert!(status.success(), "shared object compilation failed");
+        assert!(expected_lib.exists(), "failed to create shared object");
+    }
+
+    let path = config_path("runtimeConfigLinux.json");
+    let config = load_config(&path, None).expect("config should load");
+    assert!(
+        !config.error_collection.has_errors(),
+        "config errors: {}",
+        config.error_collection
+    );
+
+    run_tests_for_config(&config, true);
+}

From fa6f5a3e1f366a101fa25840ab2d5686327c9902 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sat, 21 Feb 2026 17:24:12 -0700
Subject: [PATCH 02/45] refactor: apply rust idioms

---
 dragon-runner-rs/src/cli.rs            |   3 +-
 dragon-runner-rs/src/config.rs         | 331 +++++++++++--------------
 dragon-runner-rs/src/error.rs          |  50 ++--
 dragon-runner-rs/src/harness.rs        | 276 +++++++--------------
 dragon-runner-rs/src/log.rs            |  45 ++--
 dragon-runner-rs/src/main.rs           |  21 +-
 dragon-runner-rs/src/runner.rs         |  31 ++-
 dragon-runner-rs/src/testfile.rs       | 212 ++++++----------
 dragon-runner-rs/src/toolchain.rs      |  79 ++----
 dragon-runner-rs/tests/test_grader.rs  |   6 +-
 dragon-runner-rs/tests/test_runner.rs  |   4 +-
 dragon-runner-rs/tests/test_runtime.rs |   4 +-
 12 files changed, 422 insertions(+), 640 deletions(-)

diff --git a/dragon-runner-rs/src/cli.rs b/dragon-runner-rs/src/cli.rs
index 29ce0e7..4be409f 100644
--- a/dragon-runner-rs/src/cli.rs
+++ b/dragon-runner-rs/src/cli.rs
@@ -184,8 +184,9 @@ pub fn parse_cli_args() -> RunnerArgs {
 
     let args = RunnerArgs::from_flags(mode, flags);
 
-    // Set debug environment variable
+    // Set debug environment variable and refresh the logger
     std::env::set_var("DRAGON_RUNNER_DEBUG", args.verbosity.to_string());
+    crate::log::refresh_debug_level();
 
     args
 }
diff --git a/dragon-runner-rs/src/config.rs b/dragon-runner-rs/src/config.rs
index a9b54d1..c6c12da 100644
--- a/dragon-runner-rs/src/config.rs
+++ b/dragon-runner-rs/src/config.rs
@@ -1,20 +1,26 @@
-use std::env;
+use std::collections::HashMap;
+use std::fmt;
 use std::fs;
 use std::path::{Path, PathBuf};
+use std::sync::Arc;
 
 use crate::cli::RunnerArgs;
-use crate::error::{Error, ErrorCollection, Verifiable};
+use crate::error::{DragonError, Errors, Verifiable};
 use crate::log::log;
 use crate::testfile::TestFile;
 use crate::toolchain::ToolChain;
 use crate::util::resolve_relative;
 
+// ---------------------------------------------------------------------------
+// SubPackage
+// ---------------------------------------------------------------------------
+
 /// Represents a set of tests in a directory.
 #[derive(Debug, Clone)]
 pub struct SubPackage {
     pub path: String,
     pub name: String,
-    pub tests: Vec<TestFile>,
+    pub tests: Vec<Arc<TestFile>>,
 }
 
 impl SubPackage {
@@ -28,41 +34,38 @@ impl SubPackage {
         let tests = if Path::new(path).is_dir() {
             Self::gather_tests(path)
         } else {
-            vec![TestFile::new(path)]
+            vec![Arc::new(TestFile::new(path))]
         };
 
-        Self {
-            path: path.to_string(),
-            name,
-            tests,
-        }
+        Self { path: path.into(), name, tests }
     }
 
-    fn gather_tests(dir: &str) -> Vec<TestFile> {
-        let mut tests = Vec::new();
-        if let Ok(entries) = fs::read_dir(dir) {
-            for entry in entries.flatten() {
-                let entry_path = entry.path();
-                if TestFile::is_test(&entry_path) {
-                    tests.push(TestFile::new(&entry_path.to_string_lossy()));
-                }
-            }
-        }
+    fn gather_tests(dir: &str) -> Vec<Arc<TestFile>> {
+        let mut tests: Vec<Arc<TestFile>> = fs::read_dir(dir)
+            .into_iter()
+            .flatten()
+            .filter_map(|e| e.ok())
+            .filter(|e| TestFile::is_test(&e.path()))
+            .map(|e| Arc::new(TestFile::new(&e.path().to_string_lossy())))
+            .collect();
         tests.sort_by(|a, b| a.file.cmp(&b.file));
         tests
     }
 }
 
 impl Verifiable for SubPackage {
-    fn verify(&self) -> ErrorCollection {
-        let mut ec = ErrorCollection::new();
-        for test in &self.tests {
-            ec.extend(&test.verify());
-        }
-        ec
+    fn verify(&self) -> Errors {
+        self.tests.iter().fold(Errors::new(), |mut acc, t| {
+            acc.extend(&t.verify());
+            acc
+        })
     }
 }
 
+// ---------------------------------------------------------------------------
+// Package
+// ---------------------------------------------------------------------------
+
 /// Represents a single test package.
 #[derive(Debug, Clone)]
 pub struct Package {
@@ -81,7 +84,7 @@ impl Package {
             .into_owned();
 
         let mut pkg = Self {
-            path: path.to_string(),
+            path: path.into(),
             name,
             n_tests: 0,
             subpackages: Vec::new(),
@@ -90,61 +93,58 @@ impl Package {
         if Path::new(path).is_dir() {
             pkg.gather_subpackages();
         } else {
-            let spkg = SubPackage::new(path);
-            pkg.add_subpackage(spkg);
+            pkg.push_subpackage(SubPackage::new(path));
         }
 
         pkg
     }
 
-    fn add_subpackage(&mut self, spkg: SubPackage) {
+    fn push_subpackage(&mut self, spkg: SubPackage) {
         self.n_tests += spkg.tests.len();
         self.subpackages.push(spkg);
     }
 
     fn gather_subpackages(&mut self) {
-        // Check for top-level tests in the package dir itself
         let top_level = SubPackage::new(&self.path);
         if !top_level.tests.is_empty() {
-            self.add_subpackage(top_level);
+            self.push_subpackage(top_level);
         }
-
-        // Collect all subdirectory subpackages first, then add them
         let path = self.path.clone();
-        let collected = Self::collect_subpackages(&path);
-        for spkg in collected {
-            self.add_subpackage(spkg);
+        for spkg in Self::collect_subpackages_recursive(&path) {
+            self.push_subpackage(spkg);
         }
     }
 
-    fn collect_subpackages(dir: &str) -> Vec<SubPackage> {
-        let mut result = Vec::new();
-        if let Ok(entries) = fs::read_dir(dir) {
-            for entry in entries.flatten() {
-                let entry_path = entry.path();
-                if entry_path.is_dir() {
-                    let spkg = SubPackage::new(&entry_path.to_string_lossy());
-                    if !spkg.tests.is_empty() {
-                        result.push(spkg);
-                    }
-                    result.extend(Self::collect_subpackages(&entry_path.to_string_lossy()));
-                }
-            }
-        }
-        result
+    fn collect_subpackages_recursive(dir: &str) -> Vec<SubPackage> {
+        fs::read_dir(dir)
+            .into_iter()
+            .flatten()
+            .filter_map(|e| e.ok())
+            .filter(|e| e.path().is_dir())
+            .flat_map(|e| {
+                let path_str = e.path().to_string_lossy().into_owned();
+                let spkg = SubPackage::new(&path_str);
+                let children = Self::collect_subpackages_recursive(&path_str);
+                let head = if spkg.tests.is_empty() { None } else { Some(spkg) };
+                head.into_iter().chain(children)
+            })
+            .collect()
     }
 }
 
 impl Verifiable for Package {
-    fn verify(&self) -> ErrorCollection {
-        let mut ec = ErrorCollection::new();
-        for spkg in &self.subpackages {
-            ec.extend(&spkg.verify());
-        }
-        ec
+    fn verify(&self) -> Errors {
+        self.subpackages.iter().fold(Errors::new(), |mut acc, spkg| {
+            acc.extend(&spkg.verify());
+            acc
+        })
     }
 }
 
+// ---------------------------------------------------------------------------
+// Executable
+// ---------------------------------------------------------------------------
+
 /// Represents a tested executable with an optional runtime.
 #[derive(Debug, Clone)]
 pub struct Executable {
@@ -155,69 +155,55 @@ pub struct Executable {
 
 impl Executable {
     pub fn new(id: &str, exe_path: &str, runtime: &str) -> Self {
-        Self {
-            id: id.to_string(),
-            exe_path: exe_path.to_string(),
-            runtime: runtime.to_string(),
-        }
+        Self { id: id.into(), exe_path: exe_path.into(), runtime: runtime.into() }
     }
 
-    /// Set environment variables for runtime library injection.
-    pub fn source_env(&self) {
+    /// Build environment variables needed for runtime library injection.
+    /// Returns an empty map if no runtime is configured.
+    pub fn runtime_env(&self) -> HashMap<String, String> {
+        let mut env = HashMap::new();
         if self.runtime.is_empty() {
-            return;
+            return env;
         }
-        let runtime_path = Path::new(&self.runtime);
-        let runtime_dir = runtime_path
-            .parent()
-            .unwrap_or(Path::new(""))
-            .to_string_lossy()
-            .into_owned();
-        let rt_filename = runtime_path
-            .file_stem()
-            .unwrap_or_default()
-            .to_string_lossy()
-            .into_owned();
-
-        // Strip leading "lib" prefix for linker flag
-        let rt_lib = if rt_filename.starts_with("lib") {
-            rt_filename[3..].to_string()
-        } else {
-            rt_filename
-        };
+        let rt = Path::new(&self.runtime);
+        let rt_dir = rt.parent().unwrap_or(Path::new("")).to_string_lossy().into_owned();
+        let rt_stem = rt.file_stem().unwrap_or_default().to_string_lossy();
+        let rt_lib = rt_stem.strip_prefix("lib").unwrap_or(&rt_stem).to_string();
 
         if cfg!(target_os = "macos") {
-            env::set_var("DYLD_LIBRARY_PATH", &runtime_dir);
-            env::set_var("DYLD_INSERT_LIBRARIES", &self.runtime);
+            env.insert("DYLD_LIBRARY_PATH".into(), rt_dir.clone());
+            env.insert("DYLD_INSERT_LIBRARIES".into(), self.runtime.clone());
         } else {
-            env::set_var("LD_LIBRARY_PATH", &runtime_dir);
-            env::set_var("LD_PRELOAD", &self.runtime);
+            env.insert("LD_LIBRARY_PATH".into(), rt_dir.clone());
+            env.insert("LD_PRELOAD".into(), self.runtime.clone());
         }
-
-        env::set_var("RT_PATH", &runtime_dir);
-        env::set_var("RT_LIB", &rt_lib);
+        env.insert("RT_PATH".into(), rt_dir);
+        env.insert("RT_LIB".into(), rt_lib);
+        env
     }
 }
 
 impl Verifiable for Executable {
-    fn verify(&self) -> ErrorCollection {
-        let mut errors = ErrorCollection::new();
+    fn verify(&self) -> Errors {
+        let mut errors = Errors::new();
         if !Path::new(&self.exe_path).exists() {
-            errors.add(Error::Config(format!(
-                "Cannot find binary file: {} in Executable: {}",
-                self.exe_path, self.id
+            errors.push(DragonError::Config(format!(
+                "Cannot find binary file: {} in Executable: {}", self.exe_path, self.id
             )));
         }
         if !self.runtime.is_empty() && !Path::new(&self.runtime).exists() {
-            errors.add(Error::Config(format!(
-                "Cannot find runtime file: {} in Executable: {}",
-                self.runtime, self.id
+            errors.push(DragonError::Config(format!(
+                "Cannot find runtime file: {} in Executable: {}", self.runtime, self.id
             )));
         }
         errors
     }
 }
 
+// ---------------------------------------------------------------------------
+// Config
+// ---------------------------------------------------------------------------
+
 /// In-memory representation of a JSON configuration file.
 #[derive(Debug, Clone)]
 pub struct Config {
@@ -229,7 +215,7 @@ pub struct Config {
     pub toolchains: Vec<ToolChain>,
     pub packages: Vec<Package>,
     pub package_filter: String,
-    pub error_collection: ErrorCollection,
+    pub error_collection: Errors,
 }
 
 impl Config {
@@ -249,10 +235,7 @@ impl Config {
             .to_string_lossy()
             .into_owned();
 
-        let test_dir_rel = config_data
-            .get("testDir")
-            .and_then(|v| v.as_str())
-            .unwrap_or("");
+        let test_dir_rel = config_data["testDir"].as_str().unwrap_or("");
         let test_dir = resolve_relative(test_dir_rel, &abs_config_str)
             .to_string_lossy()
             .into_owned();
@@ -262,14 +245,8 @@ impl Config {
             config_data.get("runtimes"),
             &abs_config_str,
         );
-
-        let solution_exe = config_data
-            .get("solutionExecutable")
-            .and_then(|v| v.as_str())
-            .map(String::from);
-
+        let solution_exe = config_data["solutionExecutable"].as_str().map(Into::into);
         let toolchains = Self::parse_toolchains(config_data.get("toolchains"));
-
         let packages = Self::gather_packages(&test_dir, debug_package);
 
         let mut cfg = Self {
@@ -280,10 +257,10 @@ impl Config {
             solution_exe,
             toolchains,
             packages,
-            package_filter: package_filter.to_string(),
-            error_collection: ErrorCollection::new(),
+            package_filter: package_filter.into(),
+            error_collection: Errors::new(),
         };
-        cfg.error_collection = cfg.do_verify();
+        cfg.error_collection = cfg.collect_errors();
         cfg
     }
 
@@ -301,10 +278,10 @@ impl Config {
         exe_map
             .iter()
             .map(|(id, path_val)| {
-                let path_str = path_val.as_str().unwrap_or("");
-                let exe_path = resolve_relative(path_str, abs_config_path)
-                    .to_string_lossy()
-                    .into_owned();
+                let exe_path = resolve_relative(
+                    path_val.as_str().unwrap_or(""),
+                    abs_config_path,
+                ).to_string_lossy().into_owned();
 
                 let runtime = rt_map
                     .and_then(|rts| rts.get(id.as_str()))
@@ -324,58 +301,43 @@ impl Config {
     }
 
     fn parse_toolchains(tc_data: Option<&serde_json::Value>) -> Vec<ToolChain> {
-        let tc_map = match tc_data.and_then(|v| v.as_object()) {
-            Some(m) => m,
-            None => return Vec::new(),
-        };
-        tc_map
-            .iter()
-            .map(|(name, steps_val)| {
-                let steps = steps_val
-                    .as_array()
-                    .map(|arr| arr.as_slice())
-                    .unwrap_or(&[]);
-                ToolChain::new(name, steps)
+        tc_data
+            .and_then(|v| v.as_object())
+            .map(|map| {
+                map.iter()
+                    .map(|(name, steps)| {
+                        ToolChain::new(name, steps.as_array().map(|a| a.as_slice()).unwrap_or(&[]))
+                    })
+                    .collect()
             })
-            .collect()
+            .unwrap_or_default()
     }
 
     fn gather_packages(test_dir: &str, debug_package: Option<&str>) -> Vec<Package> {
-        if let Some(debug_pkg) = debug_package {
-            if !debug_pkg.is_empty() {
-                return vec![Package::new(debug_pkg)];
-            }
-        }
-
-        let mut packages = Vec::new();
-        if let Ok(entries) = fs::read_dir(test_dir) {
-            for entry in entries.flatten() {
-                let entry_path = entry.path();
-                if entry_path.is_dir() {
-                    packages.push(Package::new(&entry_path.to_string_lossy()));
-                }
-            }
+        if let Some(pkg) = debug_package.filter(|p| !p.is_empty()) {
+            return vec![Package::new(pkg)];
         }
-        packages
+        fs::read_dir(test_dir)
+            .into_iter()
+            .flatten()
+            .filter_map(|e| e.ok())
+            .filter(|e| e.path().is_dir())
+            .map(|e| Package::new(&e.path().to_string_lossy()))
+            .collect()
     }
 
-    fn do_verify(&self) -> ErrorCollection {
-        let mut ec = ErrorCollection::new();
+    fn collect_errors(&self) -> Errors {
+        let mut ec = Errors::new();
         if !Path::new(&self.test_dir).exists() {
-            // Use the raw testDir value from config for the error message
-            ec.add(Error::Config(format!(
-                "Cannot find test directory: {}",
-                self.test_dir
+            ec.push(DragonError::Config(format!(
+                "Cannot find test directory: {}", self.test_dir
             )));
         }
-        for exe in &self.executables {
-            ec.extend(&exe.verify());
-        }
-        for tc in &self.toolchains {
-            ec.extend(&tc.verify());
-        }
-        for pkg in &self.packages {
-            ec.extend(&pkg.verify());
+        for item in self.executables.iter().map(|e| e.verify())
+            .chain(self.toolchains.iter().map(|t| t.verify()))
+            .chain(self.packages.iter().map(|p| p.verify()))
+        {
+            ec.extend(&item);
         }
         ec
     }
@@ -394,36 +356,45 @@ impl Config {
     }
 }
 
+impl fmt::Display for Config {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        writeln!(f, "Config: {}", self.name)?;
+        writeln!(f, "  testDir: {}", self.test_dir)?;
+        writeln!(f, "  executables:")?;
+        for exe in &self.executables {
+            writeln!(f, "    - {} ({})", exe.id, exe.exe_path)?;
+        }
+        writeln!(f, "  toolchains:")?;
+        for tc in &self.toolchains {
+            writeln!(f, "    - {} ({} steps)", tc.name, tc.len())?;
+        }
+        writeln!(f, "  packages:")?;
+        for pkg in &self.packages {
+            write!(f, "    - {}", pkg.name)?;
+        }
+        Ok(())
+    }
+}
+
 /// Load and parse a JSON configuration file.
 pub fn load_config(config_path: &str, args: Option<&RunnerArgs>) -> Option<Config> {
     if !Path::new(config_path).exists() {
         return None;
     }
 
-    let content = match fs::read_to_string(config_path) {
-        Ok(c) => c,
-        Err(_) => {
-            log(0, 0, &format!("Config Error: Failed to parse config: {config_path}"));
-            return None;
-        }
-    };
+    let content = fs::read_to_string(config_path).ok().or_else(|| {
+        log(0, 0, &format!("Config Error: Failed to parse config: {config_path}"));
+        None
+    })?;
 
-    let config_data: serde_json::Value = match serde_json::from_str(&content) {
-        Ok(v) => v,
-        Err(_) => {
-            log(0, 0, &format!("Config Error: Failed to parse config: {config_path}"));
-            return None;
-        }
-    };
+    let config_data: serde_json::Value = serde_json::from_str(&content).ok().or_else(|| {
+        log(0, 0, &format!("Config Error: Failed to parse config: {config_path}"));
+        None
+    })?;
 
     let debug_package = args
-        .and_then(|a| {
-            if a.debug_package.is_empty() {
-                None
-            } else {
-                Some(a.debug_package.as_str())
-            }
-        });
+        .map(|a| a.debug_package.as_str())
+        .filter(|p| !p.is_empty());
     let package_filter = args.map(|a| a.package_filter.as_str()).unwrap_or("");
 
     Some(Config::new(config_path, &config_data, debug_package, package_filter))
diff --git a/dragon-runner-rs/src/error.rs b/dragon-runner-rs/src/error.rs
index d600851..e46dd52 100644
--- a/dragon-runner-rs/src/error.rs
+++ b/dragon-runner-rs/src/error.rs
@@ -1,58 +1,48 @@
 use std::fmt;
+use thiserror::Error;
 
-#[derive(Debug, Clone)]
-pub enum Error {
+#[derive(Debug, Clone, Error)]
+pub enum DragonError {
+    #[error("Config Error: {0}")]
     Config(String),
+    #[error("Testfile Error: {0}")]
     TestFile(String),
 }
 
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Error::Config(msg) => write!(f, "Config Error: {msg}"),
-            Error::TestFile(msg) => write!(f, "Testfile Error: {msg}"),
-        }
-    }
-}
-
+/// Collect validation errors from config, toolchains, test files, etc.
+/// Just a thin newtype over Vec so we can impl Display.
 #[derive(Debug, Clone, Default)]
-pub struct ErrorCollection {
-    pub errors: Vec<Error>,
-}
+pub struct Errors(pub Vec<DragonError>);
 
-impl ErrorCollection {
+impl Errors {
     pub fn new() -> Self {
-        Self { errors: Vec::new() }
+        Self(Vec::new())
     }
 
     pub fn has_errors(&self) -> bool {
-        !self.errors.is_empty()
-    }
-
-    pub fn add(&mut self, error: Error) {
-        self.errors.push(error);
+        !self.0.is_empty()
     }
 
-    pub fn extend(&mut self, other: &ErrorCollection) {
-        self.errors.extend(other.errors.iter().cloned());
+    pub fn push(&mut self, error: DragonError) {
+        self.0.push(error);
     }
 
-    pub fn extend_errors(&mut self, errors: &[Error]) {
-        self.errors.extend(errors.iter().cloned());
+    pub fn extend(&mut self, other: &Errors) {
+        self.0.extend_from_slice(&other.0);
     }
 
     pub fn len(&self) -> usize {
-        self.errors.len()
+        self.0.len()
     }
 
     pub fn is_empty(&self) -> bool {
-        self.errors.is_empty()
+        self.0.is_empty()
     }
 }
 
-impl fmt::Display for ErrorCollection {
+impl fmt::Display for Errors {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        for (i, err) in self.errors.iter().enumerate() {
+        for (i, err) in self.0.iter().enumerate() {
             if i > 0 {
                 writeln!(f)?;
             }
@@ -63,5 +53,5 @@ impl fmt::Display for ErrorCollection {
 }
 
 pub trait Verifiable {
-    fn verify(&self) -> ErrorCollection;
+    fn verify(&self) -> Errors;
 }
diff --git a/dragon-runner-rs/src/harness.rs b/dragon-runner-rs/src/harness.rs
index c40092a..9d91599 100644
--- a/dragon-runner-rs/src/harness.rs
+++ b/dragon-runner-rs/src/harness.rs
@@ -14,15 +14,11 @@ pub struct SubPackageCounters {
     pub test_count: usize,
 }
 
-/// Base harness logic — iterate over executables, toolchains, packages, subpackages, tests.
-/// Concrete harnesses implement the hooks.
+/// Mutable hooks called during the default iteration.
+/// Config and cli_args are passed separately to avoid cloning.
 pub trait TestHarness {
-    fn config(&self) -> &Config;
-    fn cli_args(&self) -> &RunnerArgs;
     fn run_passed(&self) -> bool;
-    fn set_run_passed(&mut self, val: bool);
-
-    fn process_test_result(&mut self, result: TestResult, counters: &mut SubPackageCounters);
+    fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters);
 
     fn pre_run_hook(&mut self) {}
     fn post_run_hook(&mut self) {}
@@ -31,21 +27,20 @@ pub trait TestHarness {
     fn pre_subpackage_hook(&mut self, _spkg: &crate::config::SubPackage) {}
     fn post_subpackage_hook(&mut self, _counters: &SubPackageCounters) {}
 
-    fn iterate(&mut self) {
+    /// Default iteration: executables x toolchains x packages x subpackages x tests.
+    fn iterate(&mut self, config: &Config, cli_args: &RunnerArgs) {
         self.pre_run_hook();
 
-        let config = self.config().clone();
-        let cli_args = self.cli_args().clone();
-
         for exe in &config.executables {
             self.pre_executable_hook(&exe.id);
             log(0, 0, &format!("Running executable: {}", exe.id));
-            exe.source_env();
+            let exe_env = exe.runtime_env();
             let mut exe_pass = 0;
             let mut exe_total = 0;
 
             for tc in &config.toolchains {
-                let runner = ToolChainRunner::new(tc.clone(), cli_args.timeout);
+                let runner = ToolChainRunner::new(tc.clone(), cli_args.timeout)
+                    .with_env(exe_env.clone());
                 log(0, 1, &format!("Running Toolchain: {}", tc.name));
                 let mut tc_pass = 0;
                 let mut tc_total = 0;
@@ -56,10 +51,8 @@ pub trait TestHarness {
                     log(0, 2, &format!("Entering package {}", pkg.name));
 
                     for spkg in &pkg.subpackages {
-                        // Glob filter
                         if !config.package_filter.is_empty() {
-                            let pat = glob::Pattern::new(&config.package_filter.to_lowercase());
-                            if let Ok(pat) = pat {
+                            if let Ok(pat) = glob::Pattern::new(&config.package_filter.to_lowercase()) {
                                 if !pat.matches(&spkg.path.to_lowercase()) {
                                     continue;
                                 }
@@ -67,16 +60,13 @@ pub trait TestHarness {
                         }
 
                         log(0, 3, &format!("Entering subpackage {}", spkg.name));
-                        let mut counters = SubPackageCounters {
-                            pass_count: 0,
-                            test_count: 0,
-                        };
+                        let mut counters = SubPackageCounters { pass_count: 0, test_count: 0 };
                         self.pre_subpackage_hook(spkg);
 
                         for test in &spkg.tests {
                             let result = runner.run(test, exe);
                             let fast_fail = cli_args.fast_fail && !result.did_pass;
-                            self.process_test_result(result, &mut counters);
+                            self.process_test_result(result, cli_args, &mut counters);
                             if fast_fail {
                                 self.post_subpackage_hook(&counters);
                                 self.post_executable_hook();
@@ -86,14 +76,7 @@ pub trait TestHarness {
                         }
 
                         self.post_subpackage_hook(&counters);
-                        log(
-                            0,
-                            3,
-                            &format!(
-                                "Subpackage Passed:  {} / {}",
-                                counters.pass_count, counters.test_count
-                            ),
-                        );
+                        log(0, 3, &format!("Subpackage Passed:  {} / {}", counters.pass_count, counters.test_count));
                         pkg_pass += counters.pass_count;
                         pkg_total += counters.test_count;
                     }
@@ -115,8 +98,8 @@ pub trait TestHarness {
         self.post_run_hook();
     }
 
-    fn run(&mut self) -> bool {
-        self.iterate();
+    fn run(&mut self, config: &Config, cli_args: &RunnerArgs) -> bool {
+        self.iterate(config, cli_args);
         self.run_passed()
     }
 }
@@ -126,34 +109,25 @@ pub trait TestHarness {
 // ---------------------------------------------------------------------------
 
 pub struct RegularHarness {
-    pub config: Config,
-    pub cli_args: RunnerArgs,
     pub failures: Vec<TestResult>,
     pub passed: bool,
 }
 
 impl RegularHarness {
-    pub fn new(config: Config, cli_args: RunnerArgs) -> Self {
-        Self {
-            config,
-            cli_args,
-            failures: Vec::new(),
-            passed: true,
-        }
+    pub fn new() -> Self {
+        Self { failures: Vec::new(), passed: true }
     }
 }
 
 impl TestHarness for RegularHarness {
-    fn config(&self) -> &Config { &self.config }
-    fn cli_args(&self) -> &RunnerArgs { &self.cli_args }
     fn run_passed(&self) -> bool { self.passed }
-    fn set_run_passed(&mut self, val: bool) { self.passed = val; }
 
-    fn process_test_result(&mut self, result: TestResult, counters: &mut SubPackageCounters) {
-        let test_name = result.test.file.clone();
+    fn process_test_result(&mut self, result: TestResult, _cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
+        let test_name = &result.test.file;
         if result.did_pass {
             let tag = if result.error_test { "[E-PASS] " } else { "[PASS] " };
             log(0, 4, &format!("{}{}", tag.green(), test_name));
+            counters.pass_count += 1;
         } else {
             let tag = if result.error_test { "[E-FAIL] " } else { "[FAIL] " };
             log(0, 4, &format!("{}{}", tag.red(), test_name));
@@ -161,16 +135,6 @@ impl TestHarness for RegularHarness {
             self.failures.push(result);
         }
         counters.test_count += 1;
-        if counters.test_count > 0 && counters.test_count > counters.pass_count {
-            // already counted pass below
-        }
-        // Re-check: pass counting
-        counters.pass_count += if self.failures.last().map(|f| f.test.file == test_name).unwrap_or(false) {
-            0
-        } else {
-            // the test we just processed passed
-            1
-        };
     }
 
     fn post_executable_hook(&mut self) {
@@ -183,54 +147,42 @@ impl TestHarness for RegularHarness {
 // ---------------------------------------------------------------------------
 
 pub struct TournamentHarness {
-    pub config: Config,
-    pub cli_args: RunnerArgs,
     pub passed: bool,
 }
 
 impl TournamentHarness {
-    pub fn new(config: Config, cli_args: RunnerArgs) -> Self {
-        Self {
-            config,
-            cli_args,
-            passed: true,
-        }
+    pub fn new() -> Self {
+        Self { passed: true }
+    }
+
+    /// Tournament has its own iteration logic (cross-product of packages x executables).
+    pub fn run(&mut self, config: &Config, cli_args: &RunnerArgs) -> bool {
+        self.tournament_iterate(config, cli_args);
+        self.passed
     }
 
     fn log_failure_to_file(file: &str, result: &TestResult) {
         if result.did_pass {
             return;
         }
-        let mut f = OpenOptions::new()
-            .create(true)
-            .append(true)
-            .open(file)
-            .unwrap_or_else(|_| panic!("Cannot open feedback file: {}", file));
-
-        let exp_out = result.test.get_expected_out();
-        let gen_out = result.gen_output.as_deref().unwrap_or(b"");
-
-        let _ = writeln!(f, "{}", "=".repeat(80));
-        let _ = writeln!(f, "Test: {}", result.test.file);
-        let _ = writeln!(f, "\nExpected Output: {:?}", String::from_utf8_lossy(exp_out));
-        let _ = writeln!(f, "Generated Output: {:?}", String::from_utf8_lossy(gen_out));
-    }
-}
+        let Ok(mut f) = OpenOptions::new().create(true).append(true).open(file) else { return };
 
-impl TestHarness for TournamentHarness {
-    fn config(&self) -> &Config { &self.config }
-    fn cli_args(&self) -> &RunnerArgs { &self.cli_args }
-    fn run_passed(&self) -> bool { self.passed }
-    fn set_run_passed(&mut self, val: bool) { self.passed = val; }
+        let exp = String::from_utf8_lossy(result.test.get_expected_out());
+        let gen = result.gen_output.as_deref()
+            .map(|b| String::from_utf8_lossy(b).into_owned())
+            .unwrap_or_default();
 
-    fn process_test_result(&mut self, _result: TestResult, _counters: &mut SubPackageCounters) {
-        // Tournament uses its own iterate, this is unused
+        let _ = writeln!(f, "{}\nTest: {}\n\nExpected Output: {exp:?}\nGenerated Output: {gen:?}",
+            "=".repeat(80), result.test.file);
     }
 
-    fn iterate(&mut self) {
-        let config = self.config.clone();
-        let cli_args = self.cli_args.clone();
+    fn append_log(path: &str, line: &str) {
+        if let Ok(mut f) = OpenOptions::new().create(true).append(true).open(path) {
+            let _ = writeln!(f, "{line}");
+        }
+    }
 
+    fn tournament_iterate(&mut self, config: &Config, cli_args: &RunnerArgs) {
         let mut attacking_pkgs: Vec<&Package> = config.packages.iter().collect();
         attacking_pkgs.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
 
@@ -241,72 +193,52 @@ impl TestHarness for TournamentHarness {
         let failure_log = &cli_args.failure_log;
 
         for tc in &config.toolchains {
-            let runner = ToolChainRunner::new(tc.clone(), cli_args.timeout);
-
             let csv_filename = format!("toolchain_{}.csv", tc.name);
             let mut csv_file = fs::File::create(&csv_filename).expect("cannot create CSV");
 
-            // Header row
             let header: Vec<&str> = std::iter::once(tc.name.as_str())
                 .chain(attacking_pkgs.iter().map(|p| p.name.as_str()))
                 .collect();
             let _ = writeln!(csv_file, "{}", header.join(","));
-
             println!("\nToolchain: {}", tc.name);
 
             for def_exe in &defending_exes {
-                def_exe.source_env();
+                let runner = ToolChainRunner::new(tc.clone(), cli_args.timeout)
+                    .with_env(def_exe.runtime_env());
                 let feedback_file = format!("{}-{}feedback.txt", def_exe.id, tc.name);
                 let mut row_cells: Vec<String> = vec![def_exe.id.clone()];
 
                 for a_pkg in &attacking_pkgs {
                     print!("\n  {:<12} --> {:<12}", a_pkg.name, def_exe.id);
-                    let mut pass_count = 0;
-                    let mut test_count = 0;
-
-                    for a_spkg in &a_pkg.subpackages {
-                        for test in &a_spkg.tests {
-                            let result = runner.run(test, def_exe);
-                            if result.did_pass {
-                                print!("{}", ".".green());
-                                pass_count += 1;
-                                if solution_exe == Some(&def_exe.id) && !failure_log.is_empty() {
-                                    let mut f = OpenOptions::new()
-                                        .create(true)
-                                        .append(true)
-                                        .open("pass_log.txt")
-                                        .ok();
-                                    if let Some(ref mut f) = f {
-                                        let _ = writeln!(
-                                            f,
-                                            "{} {} {}",
-                                            tc.name, a_pkg.name, result.test.path
-                                        );
-                                    }
-                                }
-                            } else {
-                                print!("{}", ".".red());
-                                Self::log_failure_to_file(&feedback_file, &result);
-                                if solution_exe == Some(&def_exe.id) && !failure_log.is_empty() {
-                                    let mut f = OpenOptions::new()
-                                        .create(true)
-                                        .append(true)
-                                        .open(failure_log)
-                                        .ok();
-                                    if let Some(ref mut f) = f {
-                                        let _ = writeln!(
-                                            f,
-                                            "{} {} {}",
-                                            tc.name, a_pkg.name, result.test.path
-                                        );
-                                    }
-                                }
+                    let mut pass_count = 0usize;
+                    let mut test_count = 0usize;
+
+                    let tests = a_pkg.subpackages.iter().flat_map(|s| &s.tests);
+                    for test in tests {
+                        let result = runner.run(test, def_exe);
+                        let is_solution = solution_exe == Some(&def_exe.id);
+
+                        if result.did_pass {
+                            print!("{}", ".".green());
+                            pass_count += 1;
+                            if is_solution && !failure_log.is_empty() {
+                                Self::append_log("pass_log.txt", &format!(
+                                    "{} {} {}", tc.name, a_pkg.name, result.test.path
+                                ));
+                            }
+                        } else {
+                            print!("{}", ".".red());
+                            Self::log_failure_to_file(&feedback_file, &result);
+                            if is_solution && !failure_log.is_empty() {
+                                Self::append_log(failure_log, &format!(
+                                    "{} {} {}", tc.name, a_pkg.name, result.test.path
+                                ));
                             }
-                            test_count += 1;
                         }
+                        test_count += 1;
                     }
 
-                    row_cells.push(format!("{}/{}", pass_count, test_count));
+                    row_cells.push(format!("{pass_count}/{test_count}"));
                 }
 
                 let _ = writeln!(csv_file, "{}", row_cells.join(","));
@@ -315,48 +247,43 @@ impl TestHarness for TournamentHarness {
     }
 }
 
+impl TestHarness for TournamentHarness {
+    fn run_passed(&self) -> bool { self.passed }
+
+    fn process_test_result(&mut self, _result: TestResult, _cli_args: &RunnerArgs, _counters: &mut SubPackageCounters) {
+        // Tournament uses its own tournament_iterate
+    }
+}
+
 // ---------------------------------------------------------------------------
 // MemoryCheckHarness
 // ---------------------------------------------------------------------------
 
 pub struct MemoryCheckHarness {
-    pub config: Config,
-    pub cli_args: RunnerArgs,
     pub passed: bool,
     pub leak_tests: Vec<TestResult>,
     pub test_count: usize,
 }
 
 impl MemoryCheckHarness {
-    pub fn new(config: Config, cli_args: RunnerArgs) -> Self {
-        Self {
-            config,
-            cli_args,
-            passed: true,
-            leak_tests: Vec::new(),
-            test_count: 0,
-        }
+    pub fn new() -> Self {
+        Self { passed: true, leak_tests: Vec::new(), test_count: 0 }
     }
 }
 
 impl TestHarness for MemoryCheckHarness {
-    fn config(&self) -> &Config { &self.config }
-    fn cli_args(&self) -> &RunnerArgs { &self.cli_args }
     fn run_passed(&self) -> bool { self.passed }
-    fn set_run_passed(&mut self, val: bool) { self.passed = val; }
 
-    fn process_test_result(&mut self, result: TestResult, counters: &mut SubPackageCounters) {
+    fn process_test_result(&mut self, result: TestResult, _cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
         self.test_count += 1;
         counters.test_count += 1;
 
-        let test_name = result.test.file.clone();
+        let test_name = &result.test.file;
         if result.did_pass {
-            let tag = "[PASS] ";
-            log(0, 4, &format!("{}{}", tag.green(), test_name));
+            log(0, 4, &format!("{}{}", "[PASS] ".green(), test_name));
             counters.pass_count += 1;
         } else {
-            let tag = "[FAIL] ";
-            log(0, 4, &format!("{}{}", tag.red(), test_name));
+            log(0, 4, &format!("{}{}", "[FAIL] ".red(), test_name));
         }
 
         if result.memory_leak {
@@ -367,11 +294,7 @@ impl TestHarness for MemoryCheckHarness {
     fn post_executable_hook(&mut self) {
         log(0, 0, &format!("Leak Summary: ({} tests)", self.leak_tests.len()));
         for result in &self.leak_tests {
-            log(
-                0,
-                4,
-                &format!("{}{}", "[LEAK] ".yellow(), result.test.file),
-            );
+            log(0, 4, &format!("{}{}", "[LEAK] ".yellow(), result.test.file));
         }
         self.leak_tests.clear();
         self.test_count = 0;
@@ -383,8 +306,6 @@ impl TestHarness for MemoryCheckHarness {
 // ---------------------------------------------------------------------------
 
 pub struct PerformanceTestingHarness {
-    pub config: Config,
-    pub cli_args: RunnerArgs,
     pub passed: bool,
     pub csv_cols: Vec<Vec<String>>,
     pub cur_col: Vec<String>,
@@ -394,14 +315,12 @@ pub struct PerformanceTestingHarness {
 }
 
 impl PerformanceTestingHarness {
-    pub fn new(config: Config, cli_args: RunnerArgs) -> Self {
+    pub fn new() -> Self {
         Self {
-            config,
-            cli_args,
             passed: true,
             csv_cols: Vec::new(),
             cur_col: Vec::new(),
-            testfile_col: vec!["Test".to_string()],
+            testfile_col: vec!["Test".into()],
             first_exec: true,
             failures: Vec::new(),
         }
@@ -409,32 +328,27 @@ impl PerformanceTestingHarness {
 }
 
 impl TestHarness for PerformanceTestingHarness {
-    fn config(&self) -> &Config { &self.config }
-    fn cli_args(&self) -> &RunnerArgs { &self.cli_args }
     fn run_passed(&self) -> bool { self.passed }
-    fn set_run_passed(&mut self, val: bool) { self.passed = val; }
 
-    fn process_test_result(&mut self, result: TestResult, counters: &mut SubPackageCounters) {
+    fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
         if self.first_exec {
             self.testfile_col.push(result.test.file.clone());
         }
 
-        let test_name = result.test.file.clone();
+        let test_name = &result.test.file;
         if result.did_pass {
             counters.pass_count += 1;
             log(0, 4, &format!("{}{}", "[PASS] ".green(), test_name));
-            self.cur_col
-                .push(result.time.map(|t| format!("{:.4}", t)).unwrap_or_default());
+            self.cur_col.push(result.time.map(|t| format!("{t:.4}")).unwrap_or_default());
         } else {
-            self.cur_col
-                .push(format!("{:.4}", self.cli_args.timeout));
+            self.cur_col.push(format!("{:.4}", cli_args.timeout));
             self.failures.push(result);
         }
         counters.test_count += 1;
     }
 
     fn pre_executable_hook(&mut self, exe_id: &str) {
-        self.cur_col.push(exe_id.to_string());
+        self.cur_col.push(exe_id.into());
     }
 
     fn post_executable_hook(&mut self) {
@@ -442,21 +356,15 @@ impl TestHarness for PerformanceTestingHarness {
             self.csv_cols.push(self.testfile_col.clone());
             self.first_exec = false;
         }
-        self.csv_cols.push(self.cur_col.clone());
-        self.cur_col.clear();
+        self.csv_cols.push(std::mem::take(&mut self.cur_col));
     }
 
     fn post_run_hook(&mut self) {
-        // Transpose columns into rows
         let max_len = self.csv_cols.iter().map(|c| c.len()).max().unwrap_or(0);
         let mut f = fs::File::create("perf.csv").expect("cannot create perf.csv");
         for row_idx in 0..max_len {
-            let row: Vec<&str> = self
-                .csv_cols
-                .iter()
-                .map(|col| {
-                    col.get(row_idx).map(|s| s.as_str()).unwrap_or("")
-                })
+            let row: Vec<&str> = self.csv_cols.iter()
+                .map(|col| col.get(row_idx).map(|s| s.as_str()).unwrap_or(""))
                 .collect();
             let _ = writeln!(f, "{}", row.join(","));
         }
diff --git a/dragon-runner-rs/src/log.rs b/dragon-runner-rs/src/log.rs
index b403475..3fc663a 100644
--- a/dragon-runner-rs/src/log.rs
+++ b/dragon-runner-rs/src/log.rs
@@ -1,32 +1,35 @@
 use std::env;
-use std::sync::OnceLock;
+use std::sync::atomic::{AtomicU32, Ordering};
 
-static LOGGER: OnceLock<Logger> = OnceLock::new();
+static DEBUG_LEVEL: AtomicU32 = AtomicU32::new(u32::MAX);
 
-struct Logger {
-    debug_level: u32,
-}
-
-impl Logger {
-    fn new() -> Self {
-        let debug_level = env::var("DRAGON_RUNNER_DEBUG")
-            .ok()
-            .and_then(|v| v.parse().ok())
-            .unwrap_or(0);
-        Self { debug_level }
+fn debug_level() -> u32 {
+    let cached = DEBUG_LEVEL.load(Ordering::Relaxed);
+    if cached != u32::MAX {
+        return cached;
     }
+    let level = env::var("DRAGON_RUNNER_DEBUG")
+        .ok()
+        .and_then(|v| v.parse().ok())
+        .unwrap_or(0);
+    DEBUG_LEVEL.store(level, Ordering::Relaxed);
+    level
 }
 
-fn get_logger() -> &'static Logger {
-    LOGGER.get_or_init(Logger::new)
+/// Re-read DRAGON_RUNNER_DEBUG from the environment.
+/// Call after setting the env var (e.g. from CLI parsing).
+pub fn refresh_debug_level() {
+    let level = env::var("DRAGON_RUNNER_DEBUG")
+        .ok()
+        .and_then(|v| v.parse().ok())
+        .unwrap_or(0);
+    DEBUG_LEVEL.store(level, Ordering::Relaxed);
 }
 
 /// Log a message at a given verbosity level with indentation.
 pub fn log(level: u32, indent: usize, msg: &str) {
-    let logger = get_logger();
-    if logger.debug_level >= level {
-        let prefix = " ".repeat(indent);
-        println!("{prefix}{msg}");
+    if debug_level() >= level {
+        println!("{:indent$}{msg}", "", indent = indent);
     }
 }
 
@@ -39,6 +42,6 @@ pub fn log_multiline(content: &str, level: u32, indent: usize) {
 
 /// Log a delimiter line.
 pub fn log_delimiter(title: &str, level: u32, indent: usize) {
-    let delimiter = "-".repeat(20);
-    log(level, indent, &format!("{delimiter} {title} {delimiter}"));
+    let delim = "-".repeat(20);
+    log(level, indent, &format!("{delim} {title} {delim}"));
 }
diff --git a/dragon-runner-rs/src/main.rs b/dragon-runner-rs/src/main.rs
index 8ada199..34af9e5 100644
--- a/dragon-runner-rs/src/main.rs
+++ b/dragon-runner-rs/src/main.rs
@@ -31,7 +31,6 @@ fn main() {
             0,
             &format!("Parsed {} below:", cli_args.config_file),
         );
-        // TODO: config Display impl for pretty printing
         log(
             0,
             0,
@@ -63,22 +62,10 @@ fn main() {
     config.log_test_info();
 
     let success = match cli_args.mode {
-        Mode::Regular => {
-            let mut harness = RegularHarness::new(config, cli_args);
-            harness.run()
-        }
-        Mode::Tournament => {
-            let mut harness = TournamentHarness::new(config, cli_args);
-            harness.run()
-        }
-        Mode::Memcheck => {
-            let mut harness = MemoryCheckHarness::new(config, cli_args);
-            harness.run()
-        }
-        Mode::Perf => {
-            let mut harness = PerformanceTestingHarness::new(config, cli_args);
-            harness.run()
-        }
+        Mode::Regular => RegularHarness::new().run(&config, &cli_args),
+        Mode::Tournament => TournamentHarness::new().run(&config, &cli_args),
+        Mode::Memcheck => MemoryCheckHarness::new().run(&config, &cli_args),
+        Mode::Perf => PerformanceTestingHarness::new().run(&config, &cli_args),
     };
 
     std::process::exit(if success { 0 } else { 1 });
diff --git a/dragon-runner-rs/src/runner.rs b/dragon-runner-rs/src/runner.rs
index 95f1722..c57d5c3 100644
--- a/dragon-runner-rs/src/runner.rs
+++ b/dragon-runner-rs/src/runner.rs
@@ -1,7 +1,9 @@
+use std::collections::HashMap;
 use std::env;
 use std::fs;
 use std::path::Path;
 use std::process;
+use std::sync::Arc;
 use std::time::{Duration, Instant};
 
 use regex::Regex;
@@ -60,7 +62,7 @@ impl CommandResult {
 
 /// Result of running a complete test case through a toolchain.
 pub struct TestResult {
-    pub test: TestFile,
+    pub test: Arc<TestFile>,
     pub did_pass: bool,
     pub did_timeout: bool,
     pub error_test: bool,
@@ -72,7 +74,7 @@ pub struct TestResult {
 }
 
 impl TestResult {
-    pub fn new(test: TestFile) -> Self {
+    pub fn new(test: Arc<TestFile>) -> Self {
         Self {
             test,
             did_pass: false,
@@ -91,6 +93,8 @@ impl TestResult {
 pub struct ToolChainRunner {
     pub tc: ToolChain,
     pub timeout: f64,
+    /// Extra environment variables to inject into spawned subprocesses (e.g. runtime lib paths).
+    pub extra_env: HashMap<String, String>,
     reserved_exit_codes: Vec<i32>,
     runtime_errors: Vec<&'static str>,
 }
@@ -100,16 +104,22 @@ impl ToolChainRunner {
         Self {
             tc,
             timeout,
+            extra_env: HashMap::new(),
             reserved_exit_codes: vec![VALGRIND_EXIT_CODE],
             runtime_errors: vec!["SizeError", "IndexError", "MathError", "StrideError"],
         }
     }
 
+    pub fn with_env(mut self, env: HashMap<String, String>) -> Self {
+        self.extra_env = env;
+        self
+    }
+
     /// Run each step of the toolchain for a given test and executable.
-    pub fn run(&self, test: &TestFile, exe: &Executable) -> TestResult {
+    pub fn run(&self, test: &Arc<TestFile>, exe: &Executable) -> TestResult {
         let mut input_file = test.path.clone();
         let expected = test.get_expected_out().to_vec();
-        let mut tr = TestResult::new(test.clone());
+        let mut tr = TestResult::new(Arc::clone(test));
         let tc_len = self.tc.len();
 
         for (index, step) in self.tc.iter().enumerate() {
@@ -208,13 +218,13 @@ impl ToolChainRunner {
         let mut cr = CommandResult::new(&command.cmd);
         let start = Instant::now();
 
-        // Use subprocess::run with timeout, mirroring the Python approach
-        let result = process::Command::new(&command.args[0])
-            .args(&command.args[1..])
+        let mut cmd = process::Command::new(&command.args[0]);
+        cmd.args(&command.args[1..])
             .stdin(process::Stdio::piped())
             .stdout(process::Stdio::piped())
             .stderr(process::Stdio::piped())
-            .spawn();
+            .envs(&self.extra_env);
+        let result = cmd.spawn();
 
         match result {
             Ok(mut child) => {
@@ -323,7 +333,10 @@ impl ToolChainRunner {
                     .or_else(|| caps.get(2))
                     .map(|m| m.as_str())
                     .unwrap_or("");
-                if let Ok(val) = env::var(var_name) {
+                // Check runner's extra_env first, then fall back to process env
+                let val = self.extra_env.get(var_name).cloned()
+                    .or_else(|| env::var(var_name).ok());
+                if let Some(val) = val {
                     *arg = arg
                         .replace(&format!("${var_name}"), &val)
                         .replace(&format!("${{{var_name}}}"), &val);
diff --git a/dragon-runner-rs/src/testfile.rs b/dragon-runner-rs/src/testfile.rs
index decf8a5..06c60e5 100644
--- a/dragon-runner-rs/src/testfile.rs
+++ b/dragon-runner-rs/src/testfile.rs
@@ -2,7 +2,7 @@ use std::fs;
 use std::io::{self, BufRead};
 use std::path::Path;
 
-use crate::error::{Error, ErrorCollection, Verifiable};
+use crate::error::{DragonError, Errors, Verifiable};
 use crate::util::{file_to_bytes, str_to_bytes};
 
 /// Represents a single test case file with parsed directives.
@@ -40,40 +40,18 @@ impl DirectiveResult {
 impl TestFile {
     pub fn new(test_path: &str) -> Self {
         let path_obj = Path::new(test_path);
-        let stem = path_obj
-            .file_stem()
-            .unwrap_or_default()
-            .to_string_lossy()
-            .into_owned();
+        let stem = path_obj.file_stem().unwrap_or_default().to_string_lossy().into_owned();
         let extension = path_obj
             .extension()
             .map(|e| format!(".{}", e.to_string_lossy()))
             .unwrap_or_default();
-        let file = format!("{}{}", stem, extension);
+        let file = format!("{stem}{extension}");
         let comment_syntax = "//".to_string();
 
-        let expected_out = Self::get_content_static(
-            test_path,
-            &comment_syntax,
-            "CHECK:",
-            "CHECK_FILE:",
-        );
-        let input_stream = Self::get_content_static(
-            test_path,
-            &comment_syntax,
-            "INPUT:",
-            "INPUT_FILE:",
-        );
-
-        Self {
-            path: test_path.to_string(),
-            stem,
-            extension,
-            file,
-            comment_syntax,
-            expected_out,
-            input_stream,
-        }
+        let expected_out = Self::resolve_directive(test_path, &comment_syntax, "CHECK:", "CHECK_FILE:");
+        let input_stream = Self::resolve_directive(test_path, &comment_syntax, "INPUT:", "INPUT_FILE:");
+
+        Self { path: test_path.into(), stem, extension, file, comment_syntax, expected_out, input_stream }
     }
 
     pub fn get_expected_out(&self) -> &[u8] {
@@ -84,160 +62,116 @@ impl TestFile {
         self.input_stream.as_bytes()
     }
 
-    /// Generic method to get content based on inline and file directives.
-    fn get_content_static(
+    /// Resolve inline vs file directives into final byte content.
+    fn resolve_directive(
         test_path: &str,
         comment_syntax: &str,
-        inline_directive: &str,
-        file_directive: &str,
+        inline_dir: &str,
+        file_dir: &str,
     ) -> DirectiveResult {
-        let inline_contents = Self::get_directive_contents(test_path, comment_syntax, inline_directive);
-        let file_contents = Self::get_directive_contents(test_path, comment_syntax, file_directive);
+        let inline = Self::parse_directive(test_path, comment_syntax, inline_dir);
+        let file_ref = Self::parse_directive(test_path, comment_syntax, file_dir);
 
-        match (&inline_contents, &file_contents) {
-            // Both directives present — conflict
+        match (inline, file_ref) {
             (Some(Ok(_)), Some(Ok(_))) => DirectiveResult::Err(format!(
-                "Directive Conflict for test {}: Supplied both {} and {}",
-                Path::new(test_path)
-                    .file_name()
-                    .unwrap_or_default()
-                    .to_string_lossy(),
-                inline_directive,
-                file_directive,
+                "Directive Conflict for test {}: Supplied both {inline_dir} and {file_dir}",
+                Path::new(test_path).file_name().unwrap_or_default().to_string_lossy(),
             )),
 
-            // Only inline directive
-            (Some(Ok(bytes)), _) => DirectiveResult::Ok(bytes.clone()),
-            (Some(Err(e)), _) => DirectiveResult::Err(e.clone()),
-
-            // Only file directive — read referenced file
-            (None, Some(Ok(file_ref_bytes))) => {
-                let file_str = String::from_utf8_lossy(file_ref_bytes).trim().to_string();
-                let parent = Path::new(test_path).parent().unwrap_or(Path::new(""));
-                let full_path = parent.join(&file_str);
-
-                if !full_path.exists() {
-                    return DirectiveResult::Err(format!(
-                        "Failed to locate path supplied to {}\n\tTest:{}\n\tPath:{}\n",
-                        file_directive,
-                        test_path,
-                        full_path.display(),
-                    ));
-                }
-
-                match file_to_bytes(&full_path.to_string_lossy()) {
-                    Some(bytes) => DirectiveResult::Ok(bytes),
-                    None => DirectiveResult::Err(format!(
-                        "Failed to convert file {} to bytes",
-                        full_path.display()
-                    )),
-                }
-            }
-            (None, Some(Err(e))) => DirectiveResult::Err(e.clone()),
+            (Some(Ok(bytes)), _) => DirectiveResult::Ok(bytes),
+            (Some(Err(e)), _) => DirectiveResult::Err(e),
+
+            (None, Some(Ok(ref_bytes))) => Self::read_referenced_file(test_path, file_dir, &ref_bytes),
+            (None, Some(Err(e))) => DirectiveResult::Err(e),
 
-            // Neither directive — empty
             (None, None) => DirectiveResult::Ok(Vec::new()),
         }
     }
 
-    /// Parse directive contents from the test file.
-    /// Returns None if no directive found, Some(Ok(bytes)) for content,
-    /// or Some(Err(msg)) on parse error.
-    fn get_directive_contents(
+    /// Given file-reference bytes from a FILE directive, resolve and read the target file.
+    fn read_referenced_file(test_path: &str, directive: &str, ref_bytes: &[u8]) -> DirectiveResult {
+        let file_str = String::from_utf8_lossy(ref_bytes).trim().to_string();
+        let parent = Path::new(test_path).parent().unwrap_or(Path::new(""));
+        let full_path = parent.join(&file_str);
+
+        if !full_path.exists() {
+            return DirectiveResult::Err(format!(
+                "Failed to locate path supplied to {directive}\n\tTest:{test_path}\n\tPath:{}\n",
+                full_path.display(),
+            ));
+        }
+
+        file_to_bytes(&full_path.to_string_lossy())
+            .map(DirectiveResult::Ok)
+            .unwrap_or_else(|| DirectiveResult::Err(format!(
+                "Failed to convert file {} to bytes", full_path.display()
+            )))
+    }
+
+    /// Scan a test file for lines matching `// DIRECTIVE:value` and collect the values.
+    /// Returns None if no matches found.
+    fn parse_directive(
         test_path: &str,
         comment_syntax: &str,
-        directive_prefix: &str,
+        directive: &str,
     ) -> Option<Result<Vec<u8>, String>> {
         let file = match fs::File::open(test_path) {
             Ok(f) => f,
-            Err(_) => {
-                return Some(Err(format!(
-                    "Unkown error occured while parsing testfile: {}",
-                    test_path
-                )));
-            }
+            Err(_) => return Some(Err(format!(
+                "Unknown error occurred while parsing testfile: {test_path}"
+            ))),
         };
 
-        let reader = io::BufReader::new(file);
         let mut contents: Vec<u8> = Vec::new();
-        let mut first_match = true;
+        let mut found_any = false;
 
-        for line_result in reader.lines() {
-            let line = match line_result {
+        for line in io::BufReader::new(file).lines() {
+            let line = match line {
                 Ok(l) => l,
-                Err(_) => {
-                    return Some(Err(format!(
-                        "Unkown error occured while parsing testfile: {}",
-                        test_path
-                    )));
-                }
+                Err(_) => return Some(Err(format!(
+                    "Unknown error occurred while parsing testfile: {test_path}"
+                ))),
             };
 
-            let comment_index = match line.find(comment_syntax) {
-                Some(i) => i,
-                None => continue,
-            };
-            let directive_index = match line.find(directive_prefix) {
-                Some(i) => i,
-                None => continue,
-            };
-
-            // Comment must appear before directive
-            if comment_index > directive_index {
-                continue;
+            match (line.find(comment_syntax), line.find(directive)) {
+                (Some(c), Some(d)) if c <= d => {}
+                _ => continue,
             }
 
-            // Extract the right-hand side after the directive
-            let rhs = match line.split_once(directive_prefix) {
+            let rhs = match line.split_once(directive) {
                 Some((_, rhs)) => rhs,
                 None => continue,
             };
 
-            let rhs_bytes = str_to_bytes(rhs, true);
-
-            if !first_match {
+            if found_any {
                 contents.push(b'\n');
             }
-            contents.extend_from_slice(&rhs_bytes);
-            first_match = false;
+            contents.extend_from_slice(&str_to_bytes(rhs, true));
+            found_any = true;
         }
 
-        if first_match {
-            // No matches found
-            None
-        } else {
-            Some(Ok(contents))
-        }
+        found_any.then(|| Ok(contents))
     }
 
     /// Check if a path is a valid test file (not hidden, not .out/.ins extension).
-    pub fn is_test(test_path: &Path) -> bool {
-        if !test_path.is_file() {
-            return false;
-        }
-        let name = test_path
-            .file_name()
-            .unwrap_or_default()
-            .to_string_lossy();
-        if name.starts_with('.') {
-            return false;
-        }
-        let ext = test_path
-            .extension()
-            .unwrap_or_default()
-            .to_string_lossy();
-        ext != "out" && ext != "ins"
+    pub fn is_test(path: &Path) -> bool {
+        path.is_file()
+            && !path.file_name().unwrap_or_default().to_string_lossy().starts_with('.')
+            && !matches!(
+                path.extension().and_then(|e| e.to_str()),
+                Some("out" | "ins")
+            )
     }
 }
 
 impl Verifiable for TestFile {
-    fn verify(&self) -> ErrorCollection {
-        let mut ec = ErrorCollection::new();
+    fn verify(&self) -> Errors {
+        let mut ec = Errors::new();
         if let DirectiveResult::Err(msg) = &self.expected_out {
-            ec.add(Error::TestFile(msg.clone()));
+            ec.push(DragonError::TestFile(msg.clone()));
         }
         if let DirectiveResult::Err(msg) = &self.input_stream {
-            ec.add(Error::TestFile(msg.clone()));
+            ec.push(DragonError::TestFile(msg.clone()));
         }
         ec
     }
diff --git a/dragon-runner-rs/src/toolchain.rs b/dragon-runner-rs/src/toolchain.rs
index ea088f0..d175b04 100644
--- a/dragon-runner-rs/src/toolchain.rs
+++ b/dragon-runner-rs/src/toolchain.rs
@@ -1,6 +1,6 @@
 use std::path::Path;
 
-use crate::error::{Error, ErrorCollection, Verifiable};
+use crate::error::{DragonError, Errors, Verifiable};
 
 /// A single step in a toolchain (e.g., compile, link, run).
 #[derive(Debug, Clone)]
@@ -17,63 +17,36 @@ pub struct Step {
 impl Step {
     pub fn from_json(data: &serde_json::Value) -> Self {
         Self {
-            name: data
-                .get("stepName")
-                .and_then(|v| v.as_str())
-                .unwrap_or("")
-                .to_string(),
-            exe_path: data
-                .get("executablePath")
-                .and_then(|v| v.as_str())
-                .unwrap_or("")
-                .to_string(),
+            name: data["stepName"].as_str().unwrap_or("").into(),
+            exe_path: data["executablePath"].as_str().unwrap_or("").into(),
             arguments: data
                 .get("arguments")
                 .and_then(|v| v.as_array())
-                .map(|arr| {
-                    arr.iter()
-                        .filter_map(|v| v.as_str().map(String::from))
-                        .collect()
-                })
+                .map(|arr| arr.iter().filter_map(|v| v.as_str().map(Into::into)).collect())
                 .unwrap_or_default(),
-            output: data
-                .get("output")
-                .and_then(|v| v.as_str())
-                .map(String::from),
-            allow_error: data
-                .get("allowError")
-                .and_then(|v| v.as_bool())
-                .unwrap_or(false),
-            uses_ins: data
-                .get("usesInStr")
-                .and_then(|v| v.as_bool())
-                .unwrap_or(false),
-            uses_runtime: data
-                .get("usesRuntime")
-                .and_then(|v| v.as_bool())
-                .unwrap_or(false),
+            output: data.get("output").and_then(|v| v.as_str()).map(Into::into),
+            allow_error: data["allowError"].as_bool().unwrap_or(false),
+            uses_ins: data["usesInStr"].as_bool().unwrap_or(false),
+            uses_runtime: data["usesRuntime"].as_bool().unwrap_or(false),
         }
     }
 }
 
 impl Verifiable for Step {
-    fn verify(&self) -> ErrorCollection {
-        let mut errors = ErrorCollection::new();
+    fn verify(&self) -> Errors {
+        let mut errors = Errors::new();
         if self.name.is_empty() {
-            errors.add(Error::Config(format!(
-                "Missing required filed 'stepName' in Step {}",
-                self.name
+            errors.push(DragonError::Config(format!(
+                "Missing required field 'stepName' in Step {}", self.name
             )));
         }
         if self.exe_path.is_empty() {
-            errors.add(Error::Config(format!(
-                "Missing required field 'exe_path' in Step: {}",
-                self.name
+            errors.push(DragonError::Config(format!(
+                "Missing required field 'exe_path' in Step: {}", self.name
             )));
         } else if !self.exe_path.starts_with('$') && !Path::new(&self.exe_path).exists() {
-            errors.add(Error::Config(format!(
-                "Cannot find exe_path '{}' in Step: {}",
-                self.exe_path, self.name
+            errors.push(DragonError::Config(format!(
+                "Cannot find exe_path '{}' in Step: {}", self.exe_path, self.name
             )));
         }
         errors
@@ -89,10 +62,9 @@ pub struct ToolChain {
 
 impl ToolChain {
     pub fn new(name: &str, steps_data: &[serde_json::Value]) -> Self {
-        let steps = steps_data.iter().map(Step::from_json).collect();
         Self {
-            name: name.to_string(),
-            steps,
+            name: name.into(),
+            steps: steps_data.iter().map(Step::from_json).collect(),
         }
     }
 
@@ -100,17 +72,20 @@ impl ToolChain {
         self.steps.len()
     }
 
+    pub fn is_empty(&self) -> bool {
+        self.steps.is_empty()
+    }
+
     pub fn iter(&self) -> std::slice::Iter<'_, Step> {
         self.steps.iter()
     }
 }
 
 impl Verifiable for ToolChain {
-    fn verify(&self) -> ErrorCollection {
-        let mut errors = ErrorCollection::new();
-        for step in &self.steps {
-            errors.extend(&step.verify());
-        }
-        errors
+    fn verify(&self) -> Errors {
+        self.steps.iter().fold(Errors::new(), |mut acc, step| {
+            acc.extend(&step.verify());
+            acc
+        })
     }
 }
diff --git a/dragon-runner-rs/tests/test_grader.rs b/dragon-runner-rs/tests/test_grader.rs
index 64de1cd..b80786c 100644
--- a/dragon-runner-rs/tests/test_grader.rs
+++ b/dragon-runner-rs/tests/test_grader.rs
@@ -2,7 +2,7 @@ use std::path::Path;
 
 use dragon_runner_rs::cli::{Mode, RunnerArgs};
 use dragon_runner_rs::config::load_config;
-use dragon_runner_rs::harness::{TestHarness, TournamentHarness};
+use dragon_runner_rs::harness::TournamentHarness;
 
 fn configs_dir() -> std::path::PathBuf {
     let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
@@ -29,8 +29,8 @@ fn test_grader_config() {
         ..Default::default()
     };
 
-    let mut harness = TournamentHarness::new(config, args);
-    harness.run();
+    let mut harness = TournamentHarness::new();
+    harness.run(&config, &args);
 
     assert!(
         Path::new(failure_log).exists(),
diff --git a/dragon-runner-rs/tests/test_runner.rs b/dragon-runner-rs/tests/test_runner.rs
index 92256c4..1fa2a4a 100644
--- a/dragon-runner-rs/tests/test_runner.rs
+++ b/dragon-runner-rs/tests/test_runner.rs
@@ -20,9 +20,9 @@ fn create_config(name: &str) -> Config {
 /// Run all tests for a config and assert they match expected_result.
 fn run_tests_for_config(config: &Config, expected_result: bool) {
     for exe in &config.executables {
-        exe.source_env();
         for tc in &config.toolchains {
-            let runner = ToolChainRunner::new(tc.clone(), 10.0);
+            let runner = ToolChainRunner::new(tc.clone(), 10.0)
+                .with_env(exe.runtime_env());
             for pkg in &config.packages {
                 for spkg in &pkg.subpackages {
                     for test in &spkg.tests {
diff --git a/dragon-runner-rs/tests/test_runtime.rs b/dragon-runner-rs/tests/test_runtime.rs
index 89b8f12..e4eff9e 100644
--- a/dragon-runner-rs/tests/test_runtime.rs
+++ b/dragon-runner-rs/tests/test_runtime.rs
@@ -20,9 +20,9 @@ fn tests_dir() -> std::path::PathBuf {
 
 fn run_tests_for_config(config: &Config, expected_result: bool) {
     for exe in &config.executables {
-        exe.source_env();
         for tc in &config.toolchains {
-            let runner = ToolChainRunner::new(tc.clone(), 3.0);
+            let runner = ToolChainRunner::new(tc.clone(), 3.0)
+                .with_env(exe.runtime_env());
             for pkg in &config.packages {
                 for spkg in &pkg.subpackages {
                     for test in &spkg.tests {

From 374a6a4f22f8c046814f4111a9c0dbe860b27d79 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 15:13:21 -0700
Subject: [PATCH 03/45] refactor: move scripts to rust src

---
 dragon-runner-rs/scripts/__init__.py    |   0
 dragon-runner-rs/scripts/add_empty.py   | 110 +++++++++++++++
 dragon-runner-rs/scripts/base.py        |  42 ++++++
 dragon-runner-rs/scripts/build.py       | 103 ++++++++++++++
 dragon-runner-rs/scripts/checkout.py    | 121 +++++++++++++++++
 dragon-runner-rs/scripts/clean_build.py |  60 +++++++++
 dragon-runner-rs/scripts/gather.py      |  90 +++++++++++++
 dragon-runner-rs/scripts/gen_config.py  | 100 ++++++++++++++
 dragon-runner-rs/scripts/grade.py       | 172 ++++++++++++++++++++++++
 dragon-runner-rs/scripts/grade_perf.py  |  91 +++++++++++++
 dragon-runner-rs/scripts/key.py         |  33 +++++
 dragon-runner-rs/scripts/loader.py      |  77 +++++++++++
 12 files changed, 999 insertions(+)
 create mode 100644 dragon-runner-rs/scripts/__init__.py
 create mode 100644 dragon-runner-rs/scripts/add_empty.py
 create mode 100644 dragon-runner-rs/scripts/base.py
 create mode 100644 dragon-runner-rs/scripts/build.py
 create mode 100644 dragon-runner-rs/scripts/checkout.py
 create mode 100644 dragon-runner-rs/scripts/clean_build.py
 create mode 100644 dragon-runner-rs/scripts/gather.py
 create mode 100644 dragon-runner-rs/scripts/gen_config.py
 create mode 100644 dragon-runner-rs/scripts/grade.py
 create mode 100644 dragon-runner-rs/scripts/grade_perf.py
 create mode 100644 dragon-runner-rs/scripts/key.py
 create mode 100644 dragon-runner-rs/scripts/loader.py

diff --git a/dragon-runner-rs/scripts/__init__.py b/dragon-runner-rs/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dragon-runner-rs/scripts/add_empty.py b/dragon-runner-rs/scripts/add_empty.py
new file mode 100644
index 0000000..3590cee
--- /dev/null
+++ b/dragon-runner-rs/scripts/add_empty.py
@@ -0,0 +1,110 @@
+"""
+============================== 415 Grading Script ==============================
+Author: Justin Meimar
+Name: add_empty.py
+Desc:
+================================================================================
+"""
+import sys
+import argparse
+import random
+import string
+from pathlib import Path
+from typing import List
+from base import Script
+
+
+class AddEmptyScript(Script):
+
+    @classmethod
+    def name(cls) -> str:
+        return "add_empty"
+
+    @classmethod
+    def description(cls) -> str:
+        return "Add empty test cases to student test packages"
+
+    @classmethod
+    def get_parser(cls) -> argparse.ArgumentParser:
+        parser = argparse.ArgumentParser(
+            prog="add_empty",
+            description="Add empty test cases to test packages"
+        )
+        parser.add_argument("key_file", type=Path, help="Key file which has a line for each (SID, GH_Username) pair")
+        parser.add_argument("search_path", type=Path, help="Path to search for test files")
+        parser.add_argument("empty_content", type=str, help="Empty content to write into files")
+        return parser
+
+    @staticmethod
+    def load_key(key_path):
+        config = {}
+        with open(key_path) as key_file:
+            for line in key_file.readlines():
+                sid, gh_username = line.strip().split(' ')
+                print("SID: ", sid, "\tGH Username: ", gh_username)
+                config[sid] = gh_username
+        print("Config Loaded...")
+        return config
+
+    @staticmethod
+    def count_files_with_exclusions(directory: Path, excluded_extensions: list) -> int:
+        count = 0
+        for path in directory.rglob('*'):
+            if path.is_file():
+                if path.suffix.lower() not in excluded_extensions:
+                    count += 1
+        return count
+
+    @staticmethod
+    def add_empty(key_file: Path, search_path: Path, empty_content: str):
+        config = AddEmptyScript.load_key(key_file)
+
+        if not search_path.is_dir():
+            error = "Could not create test directory."
+            print(error)
+            return 1
+
+        all_fine = True
+        for (sid, gh_user) in config.items():
+            all_matches = list(search_path.rglob(sid))
+            if len(all_matches) == 0:
+                print(f"Can not find a directory matching: {sid} in {search_path.name}")
+                exit(1)
+            if len(all_matches) > 1:
+                print(f"Found several matches for what should be a unique directory named {sid}:")
+                for m in all_matches:
+                    print("Matched: ", m)
+                exit(1)
+
+            sid_test_dir = Path(all_matches[0])
+            assert sid_test_dir.is_dir() and sid_test_dir.exists() and f"{sid_test_dir} should exist."
+
+            test_count = 0
+            for path in sid_test_dir.rglob("*"):
+                if path.is_file() and not path.is_dir() and not path.name.startswith('.'):
+                    if path.suffix.lower() not in [".ins", ".out"]:
+                        test_count += 1
+
+            if test_count >= 5:
+                continue
+
+            all_fine = False
+            while test_count < 5:
+                suffix= ''.join(random.choices(string.ascii_letters + string.digits, k=8))
+                file_path = sid_test_dir / f"TA_empty_{test_count+1}_{suffix}.in"
+                file_path.write_text(empty_content)
+                test_count += 1
+                print(f"{sid} - Writing an empty file: {file_path.name}...")
+
+        if all_fine:
+            print("All students submited at least five testcases!")
+
+    @classmethod
+    def main(cls, args: List[str]) -> int:
+        parser = cls.get_parser()
+        parsed_args = parser.parse_args(args)
+        cls.add_empty(parsed_args.key_file, parsed_args.search_path, parsed_args.empty_content)
+        return 0
+
+if __name__ == '__main__':
+    sys.exit(AddEmptyScript.main(sys.argv[1:]))
diff --git a/dragon-runner-rs/scripts/base.py b/dragon-runner-rs/scripts/base.py
new file mode 100644
index 0000000..7501372
--- /dev/null
+++ b/dragon-runner-rs/scripts/base.py
@@ -0,0 +1,42 @@
+from abc import ABC, abstractmethod
+import argparse
+from typing import List
+
+class Script(ABC):
+    """
+    Base class for all dragon-runner scripts.
+    Provides a standard interface for script metadata and execution.
+    """
+    @classmethod
+    @abstractmethod
+    def name(cls) -> str:
+        """Return the script name (e.g., 'build.py')"""
+        pass
+
+    @classmethod
+    @abstractmethod
+    def description(cls) -> str:
+        """Return a brief description of what the script does"""
+        pass
+
+    @classmethod
+    @abstractmethod
+    def get_parser(cls) -> argparse.ArgumentParser:
+        """Return the argument parser for this script"""
+        pass
+
+    @classmethod
+    def usage(cls) -> str:
+        """Generate usage string from the parser"""
+        parser = cls.get_parser()
+        usage_lines = parser.format_help().split('\n')
+        if usage_lines and usage_lines[0].startswith('usage:'):
+            parts = usage_lines[0].split(None, 2)
+            rest = parts[2] if len(parts) > 2 else ''
+            usage_lines[0] = f"usage: dragon-runner script {cls.name()} {rest}"
+        return '\n'.join(usage_lines)
+
+    @classmethod
+    @abstractmethod
+    def main(cls, args: List[str]) -> int:
+        pass
diff --git a/dragon-runner-rs/scripts/build.py b/dragon-runner-rs/scripts/build.py
new file mode 100644
index 0000000..3471ed1
--- /dev/null
+++ b/dragon-runner-rs/scripts/build.py
@@ -0,0 +1,103 @@
+"""
+============================== 415 Grading Script ==============================
+Author: Justin Meimar
+Name: build.py
+Desc: build the compilers with cmake.. && make -j <n> and log those which
+      fail.
+================================================================================
+"""
+
+import os
+import sys
+import subprocess
+import shutil
+import argparse
+from pathlib import Path
+from typing import List
+from base import Script
+
+
+class BuildScript(Script):
+
+    @classmethod
+    def name(cls) -> str:
+        return "build"
+
+    @classmethod
+    def description(cls) -> str:
+        return "Build student compiler projects with CMake"
+
+    @classmethod
+    def get_parser(cls) -> argparse.ArgumentParser:
+        parser = argparse.ArgumentParser(
+            prog="build",
+            description="Build the compilers with cmake && make -j <n> and log those which fail"
+        )
+        parser.add_argument("start_dir", type=Path, help="Walking and build directories from this path")
+        parser.add_argument("log_file", type=Path, help="Path to log file")
+        parser.add_argument("dir_prefix", type=str, help="Prefix common to all directories to be built")
+        parser.add_argument("n", type=int, default=2, help="n_threads")
+        return parser
+
+    @classmethod
+    def build(cls, start_dir, log_path, dir_prefix, n_threads="2"):
+        root_path = Path(start_dir).absolute()
+        log_path = Path(log_path).absolute()
+
+        directories = [d for d in root_path.iterdir() if d.is_dir() and (dir_prefix in d.name) and d.name != '.']
+
+        print("Directories to build:")
+        for d in directories:
+            print(" ", d)
+
+        for dir_path in directories:
+            print(f"-- Building project: {dir_path.name}", end='')
+            build_dir_path = dir_path / 'build'
+            try:
+                os.chdir(dir_path)
+            except OSError:
+                with open(log_path, 'a') as f:
+                    f.write(f"{dir_path.name}: Failed to change directory\n")
+                continue
+
+            if (build_dir_path).exists():
+                shutil.rmtree(build_dir_path)
+            os.makedirs(build_dir_path)
+            os.chdir(build_dir_path)
+            try:
+                build_log = log_path.name + str(dir_path.stem)
+                with open(build_log, 'w') as log_file:
+                    log_file.write(f"\n=== Building {dir_path.name} ===\n")
+                    subprocess.run(
+                        ['cmake', '..'],
+                        stdout=log_file,
+                        stderr=subprocess.STDOUT,
+                        check=True
+                    )
+                    subprocess.run(
+                        ['make', '-j', n_threads],
+                        stdout=log_file,
+                        stderr=subprocess.STDOUT,
+                        check=True
+                    )
+                print(" [SUCCESS]")
+            except subprocess.CalledProcessError:
+                print(f" [FAILED]")
+                build_log = log_path.name + str(dir_path.stem)
+                with open(build_log, 'w') as f:
+                    f.write(f"{dir_path.name}: build failed\n")
+            finally:
+                os.chdir(root_path)
+
+        print(f"Build process completed. Check {log_path} for build output and errors.")
+
+    @classmethod
+    def main(cls, args: List[str]) -> int:
+        parser = cls.get_parser()
+        parsed_args = parser.parse_args(args)
+        parsed_args.log_file.unlink(missing_ok=True)
+        cls.build(parsed_args.start_dir, parsed_args.log_file, parsed_args.dir_prefix, str(parsed_args.n))
+        return 0
+
+if __name__ == '__main__':
+    sys.exit(BuildScript.main(sys.argv[1:]))
diff --git a/dragon-runner-rs/scripts/checkout.py b/dragon-runner-rs/scripts/checkout.py
new file mode 100644
index 0000000..eedfad7
--- /dev/null
+++ b/dragon-runner-rs/scripts/checkout.py
@@ -0,0 +1,121 @@
+import sys
+import subprocess
+import argparse
+from datetime import datetime
+from pathlib import Path
+from typing import List
+from base import Script
+
+
+class CheckoutScript(Script):
+
+    @classmethod
+    def name(cls) -> str:
+        return "checkout"
+
+    @classmethod
+    def description(cls) -> str:
+        return "Checkout git repositories to the latest commit before a specified time"
+
+    @classmethod
+    def get_parser(cls) -> argparse.ArgumentParser:
+        parser = argparse.ArgumentParser(
+            prog="checkout",
+            description="Checkout student git repositories to a specific commit time"
+        )
+        parser.add_argument('submission_dir',
+                          type=Path,
+                          help='Directory of repositories to checkout')
+        parser.add_argument('checkout_time',
+                          help='Checkout time in format: "YYYY-MM-DD HH:MM:SS"')
+        return parser
+
+    @classmethod
+    def get_commit_at_time(cls, repo_path, checkout_time):
+        result = subprocess.run(
+            ['git', 'rev-list', '-1', f'--before={checkout_time}', 'HEAD'],
+            cwd=repo_path,
+            capture_output=True,
+            text=True
+        )
+        if result.returncode != 0:
+            return None
+        return result.stdout.strip()
+
+    @classmethod
+    def get_commit_time(cls, repo_path, commit_hash):
+        result = subprocess.run(
+            ['git', 'show', '-s', '--format=%ci', commit_hash],
+            cwd=repo_path,
+            capture_output=True,
+            text=True
+        )
+        if result.returncode != 0:
+            return None
+        return result.stdout.strip()
+
+    @classmethod
+    def checkout_commit(cls, repo_path, commit_hash):
+        result = subprocess.run(
+            ['git', 'checkout', commit_hash],
+            cwd=repo_path,
+            capture_output=True,
+            text=True
+        )
+        return result.returncode == 0
+
+    @classmethod
+    def process_repositories(cls, submissions_dir: Path, checkout_time: str):
+        for submission_dir in sorted(submissions_dir.iterdir()):
+            if not submission_dir.is_dir():
+                continue
+
+            git_dir = submission_dir / '.git'
+            if not git_dir.exists():
+                print(f"\nSkipping {submission_dir.name} - not a git repository")
+                continue
+            print(f"\nProcessing: {submission_dir.name}")
+
+            commit_hash = cls.get_commit_at_time(submission_dir, checkout_time)
+            if not commit_hash:
+                print(f"  No commits found before {checkout_time}")
+                continue
+
+            commit_time = cls.get_commit_time(submission_dir, commit_hash)
+            if cls.checkout_commit(submission_dir, commit_hash):
+                print(f"  Checked out to: {commit_hash[:8]}")
+                print(f"  Commit time: {commit_time}")
+            else:
+                print(f"  Failed to checkout {commit_hash[:8]}")
+
+    @classmethod
+    def validate_checkout_time(cls, time_str):
+        try:
+            datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S')
+            return True
+        except ValueError:
+            return False
+
+    @classmethod
+    def main(cls, args: List[str]) -> int:
+        parser = cls.get_parser()
+        parsed_args = parser.parse_args(args)
+
+        sub = Path(parsed_args.submission_dir)
+
+        if not sub.exists():
+            print("Submission directory does not exist...")
+            return 1
+
+        if not cls.validate_checkout_time(parsed_args.checkout_time):
+            print('Invalid checkout_time format. Use: "YYYY-MM-DD HH:MM:SS"')
+            return 1
+
+        print(f"Using submission dir: {sub}")
+        print(f"Checking out to latest commit before: {parsed_args.checkout_time}")
+
+        cls.process_repositories(sub, parsed_args.checkout_time)
+        return 0
+
+if __name__ == "__main__":
+    sys.exit(CheckoutScript.main(sys.argv[1:]))
diff --git a/dragon-runner-rs/scripts/clean_build.py b/dragon-runner-rs/scripts/clean_build.py
new file mode 100644
index 0000000..e1b7282
--- /dev/null
+++ b/dragon-runner-rs/scripts/clean_build.py
@@ -0,0 +1,60 @@
+import sys
+import shutil
+from pathlib import Path
+import argparse
+from typing import List
+from base import Script
+
+
+class CleanBuildScript(Script):
+
+    @classmethod
+    def name(cls) -> str:
+        return "clean-build"
+
+    @classmethod
+    def description(cls) -> str:
+        return "Remove build directories from student submissions"
+
+    @classmethod
+    def get_parser(cls) -> argparse.ArgumentParser:
+        parser = argparse.ArgumentParser(
+            prog="clean-build",
+            description="Remove build directories from all submissions"
+        )
+        parser.add_argument('submission_dir', type=Path, help='Directory of submissions to clean')
+        return parser
+
+    @staticmethod
+    def remove_build_dirs(submissions_dir: Path):
+        for submission_dir in sorted(submissions_dir.iterdir()):
+            if not submission_dir.is_dir():
+                continue
+
+            build_dir = submission_dir / 'build'
+            if not build_dir.exists():
+                continue
+
+            print(f"Removing build directory in: {submission_dir.name}")
+            try:
+                shutil.rmtree(build_dir)
+                print(f"  Successfully removed")
+            except Exception as e:
+                print(f"  Failed: {e}")
+
+    @classmethod
+    def main(cls, args: List[str]) -> int:
+        parser = cls.get_parser()
+        parsed_args = parser.parse_args(args)
+
+        sub = Path(parsed_args.submission_dir)
+
+        if not sub.exists():
+            print("Submission directory does not exist...")
+            return 1
+
+        cls.remove_build_dirs(sub)
+        return 0
+
+if __name__ == "__main__":
+    sys.exit(CleanBuildScript.main(sys.argv[1:]))
diff --git a/dragon-runner-rs/scripts/gather.py b/dragon-runner-rs/scripts/gather.py
new file mode 100644
index 0000000..0e0d569
--- /dev/null
+++ b/dragon-runner-rs/scripts/gather.py
@@ -0,0 +1,90 @@
+"""
+============================== 415 Grading Script ==============================
+Author: Justin Meimar
+Name: gather.py
+Desc:
+================================================================================
+"""
+
+import sys
+import shutil
+import argparse
+from pathlib import Path
+from typing import List
+from base import Script
+
+
+class GatherScript(Script):
+
+    @classmethod
+    def name(cls) -> str:
+        return "gather"
+
+    @classmethod
+    def description(cls) -> str:
+        return "Gather test files from student submissions"
+
+    @classmethod
+    def get_parser(cls) -> argparse.ArgumentParser:
+        parser = argparse.ArgumentParser(
+            prog="gather",
+            description="Gather all the testfiles in student directories"
+        )
+        parser.add_argument("key_file", type=Path, help="Key file which has a line for each (SID, GH_Username) pair")
+        parser.add_argument("search_path", type=Path, help="Path to search for test files")
+        parser.add_argument("project_name", type=Path, help="Path to search for test files")
+        return parser
+
+    @staticmethod
+    def load_key(key_path: Path):
+        config = {}
+        with open(key_path) as key_file:
+            for line in key_file.readlines():
+                sid, gh_username = line.strip().split(' ')
+                print("SID: ", sid, "\tGH Username: ", gh_username)
+                config[sid] = gh_username
+        return config
+
+    @staticmethod
+    def gather(key_file: Path,
+           search_path: str,
+           project_name: str,
+           output_dir: str = "submitted-testfiles"):
+        is_rt = True
+        config = GatherScript.load_key(key_file)
+        search_dir = Path(search_path)
+        project_name = str(project_name).strip()
+
+        if not search_dir.is_dir():
+            error = "Could not create test directory."
+            print(error)
+            return 1
+
+        directories = [d for d in search_dir.iterdir() if d.is_dir() and str(project_name) in d.name]
+        for (sid, gh_user) in config.items():
+            print("Finding submission for: ", gh_user)
+            for d in directories:
+                if gh_user in str(d):
+                    if is_rt:
+                        suffix = '-'.join(gh_user.split('-')[1:])
+                        expected_test_dir = d / "tests" / "testfiles" / suffix
+                    else:
+                        expected_test_dir = d / "tests" / "testfiles" / sid
+
+                    if expected_test_dir.is_dir():
+                        print(f"-- Found properly formatted testfiles for {sid}")
+                        shutil.copytree(expected_test_dir, (Path(output_dir) / sid), dirs_exist_ok=True)
+                        break
+                    else:
+                        print(f"-- Could NOT find testfiles for {sid}")
+                        exit(1)
+
+    @classmethod
+    def main(cls, args: List[str]) -> int:
+        parser = cls.get_parser()
+        parsed_args = parser.parse_args(args)
+        cls.gather(parsed_args.key_file, parsed_args.search_path, parsed_args.project_name)
+        return 0
+
+if __name__ == '__main__':
+    sys.exit(GatherScript.main(sys.argv[1:]))
diff --git a/dragon-runner-rs/scripts/gen_config.py b/dragon-runner-rs/scripts/gen_config.py
new file mode 100644
index 0000000..56f4262
--- /dev/null
+++ b/dragon-runner-rs/scripts/gen_config.py
@@ -0,0 +1,100 @@
+"""
+============================== 415 Grading Script ==============================
+Author: Justin Meimar
+Name: gen_config.py
+Desc:
+================================================================================
+"""
+import sys
+import json
+import argparse
+from typing import Optional, List
+from pathlib import Path
+from typing import Iterator, Tuple
+from base import Script
+from key import Key
+
+
+class GenConfigScript(Script):
+
+    @classmethod
+    def name(cls) -> str:
+        return "gen-config"
+
+    @classmethod
+    def description(cls) -> str:
+        return "Generate dragon-runner configuration from submissions"
+
+    @classmethod
+    def get_parser(cls) -> argparse.ArgumentParser:
+        parser = argparse.ArgumentParser(
+            prog="gen-config",
+            description="Generate dragon-runner configuration from student submissions"
+        )
+        parser.add_argument("key_path", type=Path,
+            help="Path to key file containing each team/ccid on a line.")
+        parser.add_argument("submissions_path", type=Path,
+            help="Path to project submissions cloned from github classroom.")
+        parser.add_argument("binary", type=str,
+            help="Name of binary to expect in prohjects bin/")
+        parser.add_argument("--runtime", type=str, default=None,
+            help="Name of runtime library to expect in prohjects bin/")
+        return parser
+
+    @staticmethod
+    def gen_config(key_path:Path,
+               submission_dir:Path,
+               binary:str,
+               runtime:Optional[str]=None):
+
+        executables_config = {}
+        runtimes_config = {}
+        config = {}
+
+        assert key_path.is_file(), "must supply regular file as key"
+        assert submission_dir.is_dir(), "must supply directory to submissions."
+
+        key = Key(key_path)
+        for (sids, repo_suffix) in key.iter_both():
+            match_dir = [d for d in submission_dir.iterdir() if d.is_dir() and str(repo_suffix) in d.name]
+            if match_dir == []:
+                print(f"Couldn't find: repo with suffix {repo_suffix}")
+                exit(1)
+
+            match_dir = Path(match_dir[0])
+            expected_package = match_dir / "tests/testfiles" / sids
+            expected_binary = match_dir / f"bin/{binary}"
+            expected_runtime = match_dir / f"bin/{runtime}"
+
+            if not expected_package.is_file:
+                print(f"Can not find expected package: {expected_package}")
+                break;
+
+            if not expected_binary.is_file:
+                print(f"Can not find expected binary: {expected_binary}")
+                break;
+
+            if runtime is not None and not expected_runtime.is_file:
+                print(f"Can not find expected binary: {expected_binary}")
+                break;
+
+            executables_config.update({f"{sids}":f"{Path.absolute(expected_binary)}"})
+            runtimes_config.update({f"{sids}":f"{Path.absolute(expected_runtime)}"})
+
+        config.update({"testedExecutablePaths": executables_config})
+        if runtime is not None:
+            config.update({"runtimes": runtimes_config})
+
+        print(json.dumps(config, indent=4))
+        with open('config.json', 'w') as f:
+            json.dump(config, f, indent=4)
+
+    @classmethod
+    def main(cls, args: List[str]) -> int:
+        parser = cls.get_parser()
+        parsed_args = parser.parse_args(args)
+        cls.gen_config(parsed_args.key_path, parsed_args.submissions_path, parsed_args.binary, parsed_args.runtime)
+        return 0
+
+if __name__ == '__main__':
+    sys.exit(GenConfigScript.main(sys.argv[1:]))
diff --git a/dragon-runner-rs/scripts/grade.py b/dragon-runner-rs/scripts/grade.py
new file mode 100644
index 0000000..ac5eca8
--- /dev/null
+++ b/dragon-runner-rs/scripts/grade.py
@@ -0,0 +1,172 @@
+"""
+This script must run with symmetric tables, meaning nrows = ncols.
+"""
+import sys
+import argparse
+import csv
+from pathlib import Path
+from fractions import Fraction
+from typing import List
+from base import Script
+
+
+class GradeScript(Script):
+
+    DEFENSIVE_PTS = 2
+    OFFENSIVE_PTS = 1
+    COHERENCE_PTS = 10
+    COMPETITIVE_WEIGHT = 0.2
+    TA_WEIGHT = 0.5
+
+    @classmethod
+    def name(cls) -> str:
+        return "grade"
+
+    @classmethod
+    def description(cls) -> str:
+        return "Grade tournament results and compute scores"
+
+    @classmethod
+    def get_parser(cls) -> argparse.ArgumentParser:
+        parser = argparse.ArgumentParser(
+            prog="grade",
+            description="Grade 415 tournament results"
+        )
+        parser.add_argument("tournament_csvs", type=Path, nargs="+",
+                          help="Path(s) to tournament CSV files")
+        parser.add_argument("output_csv", type=Path,
+                          help="Path to output CSV file")
+        parser.add_argument("--solution-name", type=str, default="solution",
+                          help="Name of the solution/TA executable in the CSV (default: 'solution')")
+        return parser
+
+    @staticmethod
+    def parse_fraction(s):
+        try:
+            return round(float(Fraction(s)), 4)
+        except (ValueError, ZeroDivisionError):
+            return round(float(s), 4) if s else 0.0
+
+    @staticmethod
+    def load_csv(filepath):
+        with open(filepath, 'r') as f:
+            return list(csv.reader(f))
+
+    @classmethod
+    def average_tables(cls, tables):
+        table = tables[0]
+        n_rows = len(table)
+        n_cols = len(table[0])
+        assert n_rows == n_cols, f"Expected table to be symmetric! Found {n_rows} rows and {n_cols} columns"
+
+        avg_table = [row[:] for row in tables[0]]
+        avg_table[0][0] = "toolchain_summary"
+        for j in range(1, n_cols):
+            for i in range(1, n_rows):
+                avg_cell = 0
+                for table in tables:
+                    avg_cell += cls.parse_fraction(table[i][j])
+                avg_table[i][j] = round(avg_cell / len(tables), 2)
+        return avg_table
+
+    @classmethod
+    def compute_tournament_points(cls, table, solution_name):
+        n_rows = len(table)
+        n_cols = len(table[0])
+        solution_col = None
+        for j in range(1, n_cols):
+            if table[0][j].lower() == solution_name.lower():
+                solution_col = j
+                break
+
+        print(f"{n_rows}:{n_cols}")
+        print(f"Computing tournament with solution '{table[0][solution_col]}' at column: {solution_col}")
+        scores = {
+            'defensive': [],
+            'offensive': [],
+            'coherence': [],
+            'ta': []
+        }
+        for j in range(1, n_cols):
+            d_score = 0
+            o_score = 0
+            c_score = 0
+            ta_score = 0
+            c_score = cls.COHERENCE_PTS if cls.parse_fraction(table[j][j]) == 1 else 0
+            if solution_col is not None and solution_col < len(table[j]):
+                ta_score = cls.parse_fraction(table[j][solution_col])
+
+            for i in range(1, n_rows):
+                if i != j:
+                    d_score += cls.DEFENSIVE_PTS * cls.parse_fraction(table[j][i])
+
+            for k in range(1, n_cols):
+                if k != j and k < len(table[j]):
+                    o_score += cls.OFFENSIVE_PTS * (1 - cls.parse_fraction(table[k][j]))
+
+            scores['defensive'].append(round(d_score, 2))
+            scores['offensive'].append(round(o_score, 2))
+            scores['coherence'].append(c_score)
+            scores['ta'].append(ta_score)
+
+        print(scores)
+        return scores
+
+    @classmethod
+    def create_summary_table(cls, base_table, avg_scores):
+        summary = [["toolchain summary"] + base_table[0][1:]]
+
+        for i in range(1, len(base_table)):
+            if i < len(base_table[0]):
+                row = [base_table[i][0]]
+                for j in range(1, len(base_table[0])):
+                    if i < len(base_table) and j < len(base_table[i]):
+                        row.append(round(cls.parse_fraction(base_table[i][j]), 3))
+                    else:
+                        row.append(0)
+                summary.append(row)
+
+        competitive_total = []
+        for i in range(len(avg_scores['defensive'])):
+            total = (avg_scores['defensive'][i] +
+                    avg_scores['offensive'][i] +
+                    avg_scores['coherence'][i])
+            competitive_total.append(total)
+
+        max_score = max(competitive_total) if competitive_total else 1
+
+        summary.append(["Defensive Points"] + [f"{s:.2f}" for s in avg_scores['defensive']])
+        summary.append(["Offensive Points"] + [f"{s:.2f}" for s in avg_scores['offensive']])
+        summary.append(["Coherence Points"] + [f"{s:.0f}" for s in avg_scores['coherence']])
+        summary.append(["Competitive Points"] + [f"{s:.2f}" for s in competitive_total])
+        summary.append(["TA Testing Score (50% Weight)"] +
+                       [f"{s * cls.TA_WEIGHT:.3f}" for s in avg_scores['ta']])
+        normalized = [cls.COMPETITIVE_WEIGHT * (s / max_score) for s in competitive_total]
+        summary.append(["Normalized Points (20% Weight)"] + [f"{s:.3f}" for s in normalized])
+        return summary
+
+    @classmethod
+    def grade(cls, toolchain_paths, output_path, solution_name):
+        tables = [cls.load_csv(path) for path in toolchain_paths]
+        avg_table = cls.average_tables(tables)
+        scores = cls.compute_tournament_points(avg_table, solution_name)
+
+        with open(output_path, 'w', newline='') as f:
+            writer = csv.writer(f)
+            for table in tables:
+                writer.writerows(table)
+                writer.writerow([])
+            writer.writerows(cls.create_summary_table(avg_table, scores))
+
+        print(f"Grading complete. Output written to {output_path}")
+        print(f"Solution name used: '{solution_name}'")
+        return 0
+
+    @classmethod
+    def main(cls, args: List[str]) -> int:
+        parser = cls.get_parser()
+        parsed_args = parser.parse_args(args)
+        return cls.grade(parsed_args.tournament_csvs, parsed_args.output_csv, parsed_args.solution_name)
+
+if __name__ == "__main__":
+    sys.exit(GradeScript.main(sys.argv[1:]))
diff --git a/dragon-runner-rs/scripts/grade_perf.py b/dragon-runner-rs/scripts/grade_perf.py
new file mode 100644
index 0000000..34adb9e
--- /dev/null
+++ b/dragon-runner-rs/scripts/grade_perf.py
@@ -0,0 +1,91 @@
+"""
+============================== 415 Grading Script ==============================
+Author: Justin Meimar
+Name: grade_perf.py
+Desc: Dragon-runner with a config pointing to the performance tests & an
+      executable for each compiler to be tested, when run with --mode=perf,
+      will produce a perf.csv file.
+
+      This script takes perf.csv as its input and runs the performance testing
+      grading algorithm to return a single CSV row, indicating the perf scores
+      for each team.
+
+      The intention is that the single row be manually copy and pasted into the
+      row output by the grade.py script.
+================================================================================
+"""
+import sys
+import argparse
+import csv
+import numpy as np
+from pathlib import Path
+from typing import List
+from base import Script
+
+
+class GradePerfScript(Script):
+
+    @classmethod
+    def name(cls) -> str:
+        return "grade-perf"
+
+    @classmethod
+    def description(cls) -> str:
+        return "Grade performance testing results"
+
+    @classmethod
+    def get_parser(cls) -> argparse.ArgumentParser:
+        parser = argparse.ArgumentParser(
+            prog="grade-perf",
+            description="Grade performance testing results"
+        )
+        parser.add_argument(
+            "perf_csv",
+            type=Path,
+            help="Path to csv file generated from grade mode"
+        )
+        parser.add_argument(
+            "output_csv",
+            type=Path,
+            help="Path to final output csv with grades"
+        )
+        return parser
+
+    @staticmethod
+    def grade_perf(*args):
+        if len(args) < 2:
+            print("Must supply two arguments: <perf_csv> <output_csv>")
+            return 1
+
+        with open(args[0], "r") as perf_csv:
+            reader = csv.reader(perf_csv)
+            headers = next(reader)
+            test_data = [row for row in reader if row and any(row)]
+
+        raw_times = np.array([[float(x) for x in row[1:]] for row in test_data])
+
+        scores = []
+        for times in raw_times:
+            fastest_time = min(times)
+            test_scores = [fastest_time / time for time in times]
+            scores.append(test_scores)
+        total_scores = np.mean(scores, axis=0)
+
+        print(headers[1:])
+        print(total_scores)
+
+        # Write results to output CSV
+        with open(args[1], "w") as output_csv:
+            writer = csv.writer(output_csv)
+            writer.writerow(headers[1:])
+            writer.writerow(total_scores)
+
+    @classmethod
+    def main(cls, args: List[str]) -> int:
+        parser = cls.get_parser()
+        parsed_args = parser.parse_args(args)
+        cls.grade_perf(parsed_args.perf_csv, parsed_args.output_csv)
+        return 0
+
+if __name__ == "__main__":
+    sys.exit(GradePerfScript.main(sys.argv[1:]))
diff --git a/dragon-runner-rs/scripts/key.py b/dragon-runner-rs/scripts/key.py
new file mode 100644
index 0000000..26536ce
--- /dev/null
+++ b/dragon-runner-rs/scripts/key.py
@@ -0,0 +1,33 @@
+from typing import Tuple
+from pathlib import Path
+from typing import Iterator
+
+class Key:
+    def __init__(self, key_path: Path):
+        self.key_path = key_path
+        self.sid_repo_suffix_map = {}
+
+        with open(key_path) as key_file:
+            for line in key_file.readlines():
+                sids, repo_suffix = line.strip().split(' ')
+                sid_list = sids.strip().split(',')
+                for sid in sid_list:
+                    self.sid_repo_suffix_map[sid] = repo_suffix
+
+    def __str__(self):
+        s = ""
+        for k, v in self.sid_repo_suffix_map.items():
+            s += (f"{k}\t{v}")
+        return s
+
+    def get_repo_for_sid(self, sid):
+        return self.sid_repo_suffix_map[sid]
+
+    def iter_sids(self) -> Iterator[str]:
+        return iter(self.sid_repo_suffix_map.keys())
+
+    def iter_repos(self) -> Iterator[str]:
+        return iter(set(self.sid_repo_suffix_map.values()))
+
+    def iter_both(self) -> Iterator[Tuple[str, str]]:
+        return iter(self.sid_repo_suffix_map.items())
diff --git a/dragon-runner-rs/scripts/loader.py b/dragon-runner-rs/scripts/loader.py
new file mode 100644
index 0000000..ca46879
--- /dev/null
+++ b/dragon-runner-rs/scripts/loader.py
@@ -0,0 +1,77 @@
+import subprocess
+import sys
+import importlib
+from typing import List, Dict, Type, Optional
+from pathlib import Path
+from base import Script
+
+
+class Loader:
+    """
+    Dragon runner allows grading scripts to be run through its CLI.
+    Each script is executed as a subprocess using Python's -m flag to ensure
+    consistent behavior whether called directly or through dragon-runner.
+    """
+    # Directory containing the script modules
+    SCRIPTS_DIR = Path(__file__).parent
+
+    def __init__(self):
+        self.script_modules = {
+            "add_empty":     "add_empty",
+            "build":         "build",
+            "clean-build":   "clean_build",
+            "checkout":      "checkout",
+            "gather":        "gather",
+            "gen-config":    "gen_config",
+            "grade":         "grade",
+            "grade-perf":    "grade_perf",
+        }
+
+    def _load_script_class(self, module_name: str) -> Optional[Type[Script]]:
+        """
+        Dynamically load a script module and return its Script class if it exists.
+        Returns None if the module doesn't implement the Script interface.
+        """
+        try:
+            module = importlib.import_module(module_name)
+            for attr_name in dir(module):
+                attr = getattr(module, attr_name)
+                if (isinstance(attr, type) and
+                    issubclass(attr, Script) and
+                    attr is not Script):
+                    return attr
+        except Exception:
+            pass
+        return None
+
+    def __call__(self, args: List[str]):
+        """
+        Select the script to run from the mode argument passed through
+        dragon-runner CLI and execute it as a subprocess.
+        """
+        if args == [] or args[0] not in self.script_modules:
+            print(self)
+            return 1
+
+        module = self.script_modules[args[0]]
+        script_path = self.SCRIPTS_DIR / f"{module}.py"
+        cmd = [sys.executable, str(script_path)] + args[1:]
+        try:
+            result = subprocess.run(cmd, check=False)
+            return result.returncode
+        except Exception as e:
+            print(f"Failed to run script: {e}")
+            return 1
+
+    def __repr__(self):
+        """
+        Display all available scripts with their descriptions and usage.
+        """
+        s = "Available Scripts:\n"
+        for script_name, module_name in self.script_modules.items():
+            script_class = self._load_script_class(module_name)
+            max_script = max(self.script_modules.keys(),key=lambda x: len(x))
+            if script_class:
+                s += f" * {script_name}: {(len(max_script) - len(script_name))* ' '} "
+                s += f"{script_class.description()}\n"
+        return s

From bcda87a77b3f315ec925b4033703fbcee3760ec9 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 15:19:02 -0700
Subject: [PATCH 04/45] refactor: drive grading scripts from cli

---
 dragon-runner-rs/src/cli.rs  |  47 ++++++++++-----
 dragon-runner-rs/src/main.rs | 107 ++++++++++++++++++++++++++++-------
 2 files changed, 119 insertions(+), 35 deletions(-)

diff --git a/dragon-runner-rs/src/cli.rs b/dragon-runner-rs/src/cli.rs
index 4be409f..2ff23bd 100644
--- a/dragon-runner-rs/src/cli.rs
+++ b/dragon-runner-rs/src/cli.rs
@@ -98,6 +98,12 @@ pub enum Commands {
         #[command(flatten)]
         flags: CommonFlags,
     },
+    /// Run a grading script
+    Script {
+        /// Script name and arguments
+        #[arg(trailing_var_arg = true, allow_hyphen_values = true)]
+        args: Vec<String>,
+    },
 }
 
 #[derive(Debug, Clone)]
@@ -154,16 +160,23 @@ impl RunnerArgs {
     }
 }
 
-/// Parse CLI arguments into RunnerArgs.
+/// Result of parsing CLI arguments — either a runner mode or a script invocation.
+pub enum CliAction {
+    Run(RunnerArgs),
+    Script(Vec<String>),
+}
+
+/// Parse CLI arguments into a CliAction.
 ///
 /// Supports: `dragon-runner <mode> config.json [flags...]`
+///           `dragon-runner script <name> [args...]`
 /// If no recognized mode is given, inserts "regular" so clap can parse it.
-pub fn parse_cli_args() -> RunnerArgs {
+pub fn parse_cli_args() -> CliAction {
     let raw_args: Vec<String> = std::env::args().collect();
 
     // If the user omits the mode subcommand, default to "regular".
     // Detect this by checking whether the second arg is a known subcommand.
-    let known_modes = ["regular", "tournament", "perf", "memcheck"];
+    let known_modes = ["regular", "tournament", "perf", "memcheck", "script"];
     let args_to_parse = if raw_args.len() >= 2 && !known_modes.contains(&raw_args[1].as_str()) && !raw_args[1].starts_with('-') {
         // Insert "regular" as the subcommand
         let mut patched = vec![raw_args[0].clone(), "regular".to_string()];
@@ -175,18 +188,24 @@ pub fn parse_cli_args() -> RunnerArgs {
 
     let cli = Cli::parse_from(args_to_parse);
 
-    let (mode, flags) = match cli.command {
-        Commands::Regular { flags } => (Mode::Regular, flags),
-        Commands::Tournament { flags } => (Mode::Tournament, flags),
-        Commands::Perf { flags } => (Mode::Perf, flags),
-        Commands::Memcheck { flags } => (Mode::Memcheck, flags),
-    };
+    match cli.command {
+        Commands::Script { args } => CliAction::Script(args),
+        commands => {
+            let (mode, flags) = match commands {
+                Commands::Regular { flags } => (Mode::Regular, flags),
+                Commands::Tournament { flags } => (Mode::Tournament, flags),
+                Commands::Perf { flags } => (Mode::Perf, flags),
+                Commands::Memcheck { flags } => (Mode::Memcheck, flags),
+                Commands::Script { .. } => unreachable!(),
+            };
 
-    let args = RunnerArgs::from_flags(mode, flags);
+            let args = RunnerArgs::from_flags(mode, flags);
 
-    // Set debug environment variable and refresh the logger
-    std::env::set_var("DRAGON_RUNNER_DEBUG", args.verbosity.to_string());
-    crate::log::refresh_debug_level();
+            // Set debug environment variable and refresh the logger
+            std::env::set_var("DRAGON_RUNNER_DEBUG", args.verbosity.to_string());
+            crate::log::refresh_debug_level();
 
-    args
+            CliAction::Run(args)
+        }
+    }
 }
diff --git a/dragon-runner-rs/src/main.rs b/dragon-runner-rs/src/main.rs
index 34af9e5..81762b0 100644
--- a/dragon-runner-rs/src/main.rs
+++ b/dragon-runner-rs/src/main.rs
@@ -1,12 +1,90 @@
+use std::process::Command;
+
 use colored::Colorize;
 
-use dragon_runner_rs::cli::{parse_cli_args, Mode};
+use dragon_runner_rs::cli::{parse_cli_args, CliAction, Mode};
 use dragon_runner_rs::config::load_config;
 use dragon_runner_rs::harness::*;
 use dragon_runner_rs::log::log;
 
+/// Directory containing grading scripts.
+/// Uses CARGO_MANIFEST_DIR baked in at compile time, so it works for both
+/// `cargo run` and `cargo install --path .` (as long as the source tree remains).
+/// Override with DRAGON_RUNNER_SCRIPTS env var if needed.
+fn scripts_dir() -> std::path::PathBuf {
+    if let Ok(dir) = std::env::var("DRAGON_RUNNER_SCRIPTS") {
+        return std::path::PathBuf::from(dir);
+    }
+    std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("scripts")
+}
+
+/// Map script CLI names to their Python module filenames.
+fn script_module(name: &str) -> Option<&'static str> {
+    match name {
+        "add_empty" => Some("add_empty.py"),
+        "build" => Some("build.py"),
+        "clean-build" => Some("clean_build.py"),
+        "checkout" => Some("checkout.py"),
+        "gather" => Some("gather.py"),
+        "gen-config" => Some("gen_config.py"),
+        "grade" => Some("grade.py"),
+        "grade-perf" => Some("grade_perf.py"),
+        _ => None,
+    }
+}
+
+fn run_script(args: Vec<String>) -> i32 {
+    if args.is_empty() {
+        let names = [
+            "add_empty", "build", "clean-build", "checkout",
+            "gather", "gen-config", "grade", "grade-perf",
+        ];
+        eprintln!("Available scripts:");
+        for name in &names {
+            eprintln!("  {}", name);
+        }
+        return 1;
+    }
+
+    let script_name = &args[0];
+    let module = match script_module(script_name) {
+        Some(m) => m,
+        None => {
+            eprintln!("Unknown script: {}", script_name);
+            return 1;
+        }
+    };
+
+    let script_path = scripts_dir().join(module);
+    if !script_path.exists() {
+        eprintln!("Script file not found: {}", script_path.display());
+        return 1;
+    }
+
+    let status = Command::new("python3")
+        .arg(&script_path)
+        .args(&args[1..])
+        .status();
+
+    match status {
+        Ok(s) => s.code().unwrap_or(1),
+        Err(e) => {
+            eprintln!("Failed to run script: {}", e);
+            1
+        }
+    }
+}
+
 fn main() {
-    let cli_args = parse_cli_args();
+    let action = parse_cli_args();
+
+    let cli_args = match action {
+        CliAction::Script(args) => {
+            std::process::exit(run_script(args));
+        }
+        CliAction::Run(args) => args,
+    };
+
     log(1, 0, &format!("{:?}", cli_args));
 
     let config = match load_config(&cli_args.config_file, Some(&cli_args)) {
@@ -17,25 +95,12 @@ fn main() {
         }
     };
 
-    if config.error_collection.has_errors() {
-        log(
-            0,
-            0,
-            &format!(
-                "Found Config {} error(s):",
-                config.error_collection.len()
-            ),
-        );
-        log(
-            0,
-            0,
-            &format!("Parsed {} below:", cli_args.config_file),
-        );
-        log(
-            0,
-            0,
-            &format!("{}", config.error_collection).red().to_string(),
-        );
+    if !config.errors.is_empty() {
+        log(0, 0, &format!("Found Config {} error(s):", config.errors.len()));
+        log(0, 0, &format!("Parsed {} below:", cli_args.config_file));
+        for e in &config.errors {
+            log(0, 0, &format!("{e}").red().to_string());
+        }
         std::process::exit(1);
     }
 

From e438dd538826b022d81295fef0d74960f3d2c816 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 15:19:53 -0700
Subject: [PATCH 05/45] refactor: replace custom error type with sum type

---
 dragon-runner-rs/src/config.rs    | 51 +++++++++++++------------------
 dragon-runner-rs/src/error.rs     | 48 ++---------------------------
 dragon-runner-rs/src/testfile.rs  | 14 ++++-----
 dragon-runner-rs/src/toolchain.rs | 17 +++++------
 4 files changed, 38 insertions(+), 92 deletions(-)

diff --git a/dragon-runner-rs/src/config.rs b/dragon-runner-rs/src/config.rs
index c6c12da..3742cdd 100644
--- a/dragon-runner-rs/src/config.rs
+++ b/dragon-runner-rs/src/config.rs
@@ -5,7 +5,7 @@ use std::path::{Path, PathBuf};
 use std::sync::Arc;
 
 use crate::cli::RunnerArgs;
-use crate::error::{DragonError, Errors, Verifiable};
+use crate::error::{DragonError, Validate};
 use crate::log::log;
 use crate::testfile::TestFile;
 use crate::toolchain::ToolChain;
@@ -53,12 +53,9 @@ impl SubPackage {
     }
 }
 
-impl Verifiable for SubPackage {
-    fn verify(&self) -> Errors {
-        self.tests.iter().fold(Errors::new(), |mut acc, t| {
-            acc.extend(&t.verify());
-            acc
-        })
+impl Validate for SubPackage {
+    fn validate(&self) -> Vec<DragonError> {
+        self.tests.iter().flat_map(|t| t.validate()).collect()
     }
 }
 
@@ -132,12 +129,9 @@ impl Package {
     }
 }
 
-impl Verifiable for Package {
-    fn verify(&self) -> Errors {
-        self.subpackages.iter().fold(Errors::new(), |mut acc, spkg| {
-            acc.extend(&spkg.verify());
-            acc
-        })
+impl Validate for Package {
+    fn validate(&self) -> Vec<DragonError> {
+        self.subpackages.iter().flat_map(|s| s.validate()).collect()
     }
 }
 
@@ -183,9 +177,9 @@ impl Executable {
     }
 }
 
-impl Verifiable for Executable {
-    fn verify(&self) -> Errors {
-        let mut errors = Errors::new();
+impl Validate for Executable {
+    fn validate(&self) -> Vec<DragonError> {
+        let mut errors = Vec::new();
         if !Path::new(&self.exe_path).exists() {
             errors.push(DragonError::Config(format!(
                 "Cannot find binary file: {} in Executable: {}", self.exe_path, self.id
@@ -215,7 +209,7 @@ pub struct Config {
     pub toolchains: Vec<ToolChain>,
     pub packages: Vec<Package>,
     pub package_filter: String,
-    pub error_collection: Errors,
+    pub errors: Vec<DragonError>,
 }
 
 impl Config {
@@ -258,9 +252,9 @@ impl Config {
             toolchains,
             packages,
             package_filter: package_filter.into(),
-            error_collection: Errors::new(),
+            errors: Vec::new(),
         };
-        cfg.error_collection = cfg.collect_errors();
+        cfg.errors = cfg.collect_errors();
         cfg
     }
 
@@ -326,20 +320,19 @@ impl Config {
             .collect()
     }
 
-    fn collect_errors(&self) -> Errors {
-        let mut ec = Errors::new();
+    fn collect_errors(&self) -> Vec<DragonError> {
+        let mut errors = Vec::new();
         if !Path::new(&self.test_dir).exists() {
-            ec.push(DragonError::Config(format!(
+            errors.push(DragonError::Config(format!(
                 "Cannot find test directory: {}", self.test_dir
             )));
         }
-        for item in self.executables.iter().map(|e| e.verify())
-            .chain(self.toolchains.iter().map(|t| t.verify()))
-            .chain(self.packages.iter().map(|p| p.verify()))
-        {
-            ec.extend(&item);
-        }
-        ec
+        errors.extend(
+            self.executables.iter().flat_map(|e| e.validate())
+                .chain(self.toolchains.iter().flat_map(|t| t.validate()))
+                .chain(self.packages.iter().flat_map(|p| p.validate()))
+        );
+        errors
     }
 
     pub fn log_test_info(&self) {
diff --git a/dragon-runner-rs/src/error.rs b/dragon-runner-rs/src/error.rs
index e46dd52..7ff9af8 100644
--- a/dragon-runner-rs/src/error.rs
+++ b/dragon-runner-rs/src/error.rs
@@ -1,4 +1,3 @@
-use std::fmt;
 use thiserror::Error;
 
 #[derive(Debug, Clone, Error)]
@@ -9,49 +8,6 @@ pub enum DragonError {
     TestFile(String),
 }
 
-/// Collect validation errors from config, toolchains, test files, etc.
-/// Just a thin newtype over Vec so we can impl Display.
-#[derive(Debug, Clone, Default)]
-pub struct Errors(pub Vec<DragonError>);
-
-impl Errors {
-    pub fn new() -> Self {
-        Self(Vec::new())
-    }
-
-    pub fn has_errors(&self) -> bool {
-        !self.0.is_empty()
-    }
-
-    pub fn push(&mut self, error: DragonError) {
-        self.0.push(error);
-    }
-
-    pub fn extend(&mut self, other: &Errors) {
-        self.0.extend_from_slice(&other.0);
-    }
-
-    pub fn len(&self) -> usize {
-        self.0.len()
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.0.is_empty()
-    }
-}
-
-impl fmt::Display for Errors {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        for (i, err) in self.0.iter().enumerate() {
-            if i > 0 {
-                writeln!(f)?;
-            }
-            write!(f, "{err}")?;
-        }
-        Ok(())
-    }
-}
-
-pub trait Verifiable {
-    fn verify(&self) -> Errors;
+pub trait Validate {
+    fn validate(&self) -> Vec<DragonError>;
 }
diff --git a/dragon-runner-rs/src/testfile.rs b/dragon-runner-rs/src/testfile.rs
index 06c60e5..e017c04 100644
--- a/dragon-runner-rs/src/testfile.rs
+++ b/dragon-runner-rs/src/testfile.rs
@@ -2,7 +2,7 @@ use std::fs;
 use std::io::{self, BufRead};
 use std::path::Path;
 
-use crate::error::{DragonError, Errors, Verifiable};
+use crate::error::{DragonError, Validate};
 use crate::util::{file_to_bytes, str_to_bytes};
 
 /// Represents a single test case file with parsed directives.
@@ -164,15 +164,15 @@ impl TestFile {
     }
 }
 
-impl Verifiable for TestFile {
-    fn verify(&self) -> Errors {
-        let mut ec = Errors::new();
+impl Validate for TestFile {
+    fn validate(&self) -> Vec<DragonError> {
+        let mut errors = Vec::new();
         if let DirectiveResult::Err(msg) = &self.expected_out {
-            ec.push(DragonError::TestFile(msg.clone()));
+            errors.push(DragonError::TestFile(msg.clone()));
         }
         if let DirectiveResult::Err(msg) = &self.input_stream {
-            ec.push(DragonError::TestFile(msg.clone()));
+            errors.push(DragonError::TestFile(msg.clone()));
         }
-        ec
+        errors
     }
 }
diff --git a/dragon-runner-rs/src/toolchain.rs b/dragon-runner-rs/src/toolchain.rs
index d175b04..577b449 100644
--- a/dragon-runner-rs/src/toolchain.rs
+++ b/dragon-runner-rs/src/toolchain.rs
@@ -1,6 +1,6 @@
 use std::path::Path;
 
-use crate::error::{DragonError, Errors, Verifiable};
+use crate::error::{DragonError, Validate};
 
 /// A single step in a toolchain (e.g., compile, link, run).
 #[derive(Debug, Clone)]
@@ -32,9 +32,9 @@ impl Step {
     }
 }
 
-impl Verifiable for Step {
-    fn verify(&self) -> Errors {
-        let mut errors = Errors::new();
+impl Validate for Step {
+    fn validate(&self) -> Vec<DragonError> {
+        let mut errors = Vec::new();
         if self.name.is_empty() {
             errors.push(DragonError::Config(format!(
                 "Missing required field 'stepName' in Step {}", self.name
@@ -81,11 +81,8 @@ impl ToolChain {
     }
 }
 
-impl Verifiable for ToolChain {
-    fn verify(&self) -> Errors {
-        self.steps.iter().fold(Errors::new(), |mut acc, step| {
-            acc.extend(&step.verify());
-            acc
-        })
+impl Validate for ToolChain {
+    fn validate(&self) -> Vec<DragonError> {
+        self.steps.iter().flat_map(|s| s.validate()).collect()
     }
 }

From 7035d77260d8bb3bfda5a85b4e86f57474e8dcf8 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 15:28:57 -0700
Subject: [PATCH 06/45] clean: delete old python project

---
 .../scripts/__init__.py => CLAUDE.md          |   0
 dragon-runner-rs/Cargo.lock => Cargo.lock     |   0
 dragon-runner-rs/Cargo.toml => Cargo.toml     |   4 +
 dragon-runner-rs/tests/test_config.rs         | 107 -----
 dragon-runner-rs/tests/test_grader.rs         |  42 --
 dragon-runner-rs/tests/test_runner.rs         |  64 ---
 dragon-runner-rs/tests/test_runtime.rs        |  76 ----
 dragon_runner/scripts/__init__.py             |   0
 dragon_runner/scripts/add_empty.py            | 111 -----
 dragon_runner/scripts/base.py                 |  43 --
 dragon_runner/scripts/build.py                | 104 -----
 dragon_runner/scripts/checkout.py             | 122 -----
 dragon_runner/scripts/clean_build.py          |  61 ---
 dragon_runner/scripts/gather.py               |  91 ----
 dragon_runner/scripts/gen_config.py           | 131 ------
 dragon_runner/scripts/grade.py                | 172 -------
 dragon_runner/scripts/grade_perf.py           |  92 ----
 dragon_runner/scripts/key.py                  |  34 --
 dragon_runner/scripts/loader.py               |  75 ----
 dragon_runner/src/__init__.py                 |   0
 dragon_runner/src/cli.py                      | 112 -----
 dragon_runner/src/config.py                   | 275 ------------
 dragon_runner/src/errors.py                   |  58 ---
 dragon_runner/src/harness.py                  | 315 -------------
 dragon_runner/src/log.py                      |  38 --
 dragon_runner/src/main.py                     |  80 ----
 dragon_runner/src/runner.py                   | 423 ------------------
 dragon_runner/src/server.py                   | 282 ------------
 dragon_runner/src/testfile.py                 | 204 ---------
 dragon_runner/src/toolchain.py                |  68 ---
 dragon_runner/src/utils.py                    | 153 -------
 pyproject.toml                                |  26 --
 pytest.ini                                    |   2 -
 {dragon_runner => scripts}/__init__.py        |   0
 .../scripts => scripts}/add_empty.py          |   0
 {dragon-runner-rs/scripts => scripts}/base.py |   0
 .../scripts => scripts}/build.py              |   0
 .../scripts => scripts}/checkout.py           |   0
 .../scripts => scripts}/clean_build.py        |   0
 .../scripts => scripts}/gather.py             |   0
 .../scripts => scripts}/gen_config.py         |   0
 .../scripts => scripts}/grade.py              |   0
 .../scripts => scripts}/grade_perf.py         |   0
 {dragon-runner-rs/scripts => scripts}/key.py  |   0
 .../scripts => scripts}/loader.py             |   0
 {dragon-runner-rs/src => src}/cli.rs          |   0
 {dragon-runner-rs/src => src}/config.rs       |  92 ++++
 {dragon-runner-rs/src => src}/error.rs        |   0
 {dragon-runner-rs/src => src}/harness.rs      |  49 +-
 {dragon-runner-rs/src => src}/lib.rs          |   0
 {dragon-runner-rs/src => src}/log.rs          |   0
 {dragon-runner-rs/src => src}/main.rs         |   0
 {dragon-runner-rs/src => src}/runner.rs       |  87 ++++
 {dragon-runner-rs/src => src}/testfile.rs     |   0
 {dragon-runner-rs/src => src}/toolchain.rs    |   0
 {dragon-runner-rs/src => src}/util.rs         |   0
 tests/conftest.py                             |  36 --
 tests/run_tests.py                            |  17 -
 tests/test_config.py                          |  74 ---
 tests/test_grader.py                          |  20 -
 tests/test_runner.py                          |  43 --
 tests/test_runtime.py                         |  51 ---
 62 files changed, 224 insertions(+), 3610 deletions(-)
 rename dragon-runner-rs/scripts/__init__.py => CLAUDE.md (100%)
 rename dragon-runner-rs/Cargo.lock => Cargo.lock (100%)
 rename dragon-runner-rs/Cargo.toml => Cargo.toml (85%)
 delete mode 100644 dragon-runner-rs/tests/test_config.rs
 delete mode 100644 dragon-runner-rs/tests/test_grader.rs
 delete mode 100644 dragon-runner-rs/tests/test_runner.rs
 delete mode 100644 dragon-runner-rs/tests/test_runtime.rs
 delete mode 100644 dragon_runner/scripts/__init__.py
 delete mode 100644 dragon_runner/scripts/add_empty.py
 delete mode 100644 dragon_runner/scripts/base.py
 delete mode 100644 dragon_runner/scripts/build.py
 delete mode 100644 dragon_runner/scripts/checkout.py
 delete mode 100644 dragon_runner/scripts/clean_build.py
 delete mode 100644 dragon_runner/scripts/gather.py
 delete mode 100644 dragon_runner/scripts/gen_config.py
 delete mode 100644 dragon_runner/scripts/grade.py
 delete mode 100644 dragon_runner/scripts/grade_perf.py
 delete mode 100644 dragon_runner/scripts/key.py
 delete mode 100644 dragon_runner/scripts/loader.py
 delete mode 100644 dragon_runner/src/__init__.py
 delete mode 100644 dragon_runner/src/cli.py
 delete mode 100644 dragon_runner/src/config.py
 delete mode 100644 dragon_runner/src/errors.py
 delete mode 100644 dragon_runner/src/harness.py
 delete mode 100644 dragon_runner/src/log.py
 delete mode 100644 dragon_runner/src/main.py
 delete mode 100644 dragon_runner/src/runner.py
 delete mode 100644 dragon_runner/src/server.py
 delete mode 100644 dragon_runner/src/testfile.py
 delete mode 100644 dragon_runner/src/toolchain.py
 delete mode 100644 dragon_runner/src/utils.py
 delete mode 100644 pyproject.toml
 delete mode 100644 pytest.ini
 rename {dragon_runner => scripts}/__init__.py (100%)
 rename {dragon-runner-rs/scripts => scripts}/add_empty.py (100%)
 rename {dragon-runner-rs/scripts => scripts}/base.py (100%)
 rename {dragon-runner-rs/scripts => scripts}/build.py (100%)
 rename {dragon-runner-rs/scripts => scripts}/checkout.py (100%)
 rename {dragon-runner-rs/scripts => scripts}/clean_build.py (100%)
 rename {dragon-runner-rs/scripts => scripts}/gather.py (100%)
 rename {dragon-runner-rs/scripts => scripts}/gen_config.py (100%)
 rename {dragon-runner-rs/scripts => scripts}/grade.py (100%)
 rename {dragon-runner-rs/scripts => scripts}/grade_perf.py (100%)
 rename {dragon-runner-rs/scripts => scripts}/key.py (100%)
 rename {dragon-runner-rs/scripts => scripts}/loader.py (100%)
 rename {dragon-runner-rs/src => src}/cli.rs (100%)
 rename {dragon-runner-rs/src => src}/config.rs (81%)
 rename {dragon-runner-rs/src => src}/error.rs (100%)
 rename {dragon-runner-rs/src => src}/harness.rs (92%)
 rename {dragon-runner-rs/src => src}/lib.rs (100%)
 rename {dragon-runner-rs/src => src}/log.rs (100%)
 rename {dragon-runner-rs/src => src}/main.rs (100%)
 rename {dragon-runner-rs/src => src}/runner.rs (82%)
 rename {dragon-runner-rs/src => src}/testfile.rs (100%)
 rename {dragon-runner-rs/src => src}/toolchain.rs (100%)
 rename {dragon-runner-rs/src => src}/util.rs (100%)
 delete mode 100644 tests/conftest.py
 delete mode 100644 tests/run_tests.py
 delete mode 100644 tests/test_config.py
 delete mode 100644 tests/test_grader.py
 delete mode 100644 tests/test_runner.py
 delete mode 100644 tests/test_runtime.py

diff --git a/dragon-runner-rs/scripts/__init__.py b/CLAUDE.md
similarity index 100%
rename from dragon-runner-rs/scripts/__init__.py
rename to CLAUDE.md
diff --git a/dragon-runner-rs/Cargo.lock b/Cargo.lock
similarity index 100%
rename from dragon-runner-rs/Cargo.lock
rename to Cargo.lock
diff --git a/dragon-runner-rs/Cargo.toml b/Cargo.toml
similarity index 85%
rename from dragon-runner-rs/Cargo.toml
rename to Cargo.toml
index 65bcd8c..31b6034 100644
--- a/dragon-runner-rs/Cargo.toml
+++ b/Cargo.toml
@@ -3,6 +3,10 @@ name = "dragon-runner-rs"
 version = "0.1.0"
 edition = "2021"
 
+[[bin]]
+name = "dragon-runner"
+path = "src/main.rs"
+
 [dependencies]
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
diff --git a/dragon-runner-rs/tests/test_config.rs b/dragon-runner-rs/tests/test_config.rs
deleted file mode 100644
index 9ecc4f5..0000000
--- a/dragon-runner-rs/tests/test_config.rs
+++ /dev/null
@@ -1,107 +0,0 @@
-use std::path::{Path, PathBuf};
-
-use dragon_runner_rs::config::load_config;
-
-fn configs_dir() -> PathBuf {
-    // tests/configs/ lives at the repo root level, two levels up from dragon-runner-rs/tests/
-    let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
-    manifest.parent().unwrap().join("tests").join("configs")
-}
-
-fn config_path(name: &str) -> String {
-    configs_dir().join(name).to_string_lossy().into_owned()
-}
-
-#[test]
-fn test_valid_config() {
-    let path = config_path("gccPassConfig.json");
-    let config = load_config(&path, None).expect("config should load");
-
-    assert!(
-        Path::new(&config.test_dir).exists(),
-        "test_dir should exist: {}",
-        config.test_dir
-    );
-    assert!(!config.packages.is_empty(), "should have packages");
-
-    for pkg in &config.packages {
-        assert!(!pkg.subpackages.is_empty(), "package {} should have subpackages", pkg.name);
-        for spkg in &pkg.subpackages {
-            assert!(!spkg.tests.is_empty(), "subpackage {} should have tests", spkg.name);
-        }
-    }
-
-    assert!(
-        !config.error_collection.has_errors(),
-        "should have no errors, got: {}",
-        config.error_collection
-    );
-}
-
-#[test]
-fn test_package_filter() {
-    let path = config_path("gccPassConfig.json");
-    let config = load_config(&path, None).expect("config should load");
-
-    // Collect all subpackage paths
-    let all_subpackages: Vec<&str> = config
-        .packages
-        .iter()
-        .flat_map(|pkg| pkg.subpackages.iter())
-        .map(|spkg| spkg.path.as_str())
-        .collect();
-
-    assert!(!all_subpackages.is_empty(), "should have subpackages");
-
-    // Test filter pattern "*ErrorPass*"
-    let filter_pattern = "*ErrorPass*";
-    let filtered: Vec<&&str> = all_subpackages
-        .iter()
-        .filter(|path| {
-            glob::Pattern::new(&filter_pattern.to_lowercase())
-                .map(|pat| pat.matches(&path.to_lowercase()))
-                .unwrap_or(false)
-        })
-        .collect();
-
-    assert!(!filtered.is_empty(), "filter should match some subpackages");
-
-    for path in &filtered {
-        assert!(
-            path.to_lowercase().contains("errorpass"),
-            "filtered path should contain 'errorpass': {}",
-            path
-        );
-    }
-}
-
-#[test]
-fn test_invalid_dir_config() {
-    let path = config_path("invalidDirConfig.json");
-    let config = load_config(&path, None).expect("config should load");
-
-    assert!(
-        config.error_collection.has_errors(),
-        "should have errors for invalid dir"
-    );
-    assert!(
-        !Path::new(&config.test_dir).exists(),
-        "test_dir should not exist"
-    );
-}
-
-#[test]
-fn test_invalid_exe_config() {
-    let path = config_path("invalidExeConfig.json");
-    let config = load_config(&path, None).expect("config should load");
-
-    assert!(
-        config.error_collection.has_errors(),
-        "should have errors for invalid exe"
-    );
-    assert_eq!(config.executables.len(), 1);
-    assert!(
-        !Path::new(&config.executables[0].exe_path).exists(),
-        "exe_path should not exist"
-    );
-}
diff --git a/dragon-runner-rs/tests/test_grader.rs b/dragon-runner-rs/tests/test_grader.rs
deleted file mode 100644
index b80786c..0000000
--- a/dragon-runner-rs/tests/test_grader.rs
+++ /dev/null
@@ -1,42 +0,0 @@
-use std::path::Path;
-
-use dragon_runner_rs::cli::{Mode, RunnerArgs};
-use dragon_runner_rs::config::load_config;
-use dragon_runner_rs::harness::TournamentHarness;
-
-fn configs_dir() -> std::path::PathBuf {
-    let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
-    manifest.parent().unwrap().join("tests").join("configs")
-}
-
-fn config_path(name: &str) -> String {
-    configs_dir().join(name).to_string_lossy().into_owned()
-}
-
-#[test]
-fn test_grader_config() {
-    let path = config_path("ConfigGrade.json");
-    let config = load_config(&path, None).expect("config should load");
-
-    let failure_log = "Failures_rs.txt";
-    // Clean up from previous runs
-    let _ = std::fs::remove_file(failure_log);
-
-    let args = RunnerArgs {
-        mode: Mode::Tournament,
-        failure_log: failure_log.to_string(),
-        timeout: 2.0,
-        ..Default::default()
-    };
-
-    let mut harness = TournamentHarness::new();
-    harness.run(&config, &args);
-
-    assert!(
-        Path::new(failure_log).exists(),
-        "failure log should have been created"
-    );
-
-    // Clean up
-    let _ = std::fs::remove_file(failure_log);
-}
diff --git a/dragon-runner-rs/tests/test_runner.rs b/dragon-runner-rs/tests/test_runner.rs
deleted file mode 100644
index 1fa2a4a..0000000
--- a/dragon-runner-rs/tests/test_runner.rs
+++ /dev/null
@@ -1,64 +0,0 @@
-use std::path::Path;
-
-use dragon_runner_rs::config::{load_config, Config};
-use dragon_runner_rs::runner::ToolChainRunner;
-
-fn configs_dir() -> std::path::PathBuf {
-    let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
-    manifest.parent().unwrap().join("tests").join("configs")
-}
-
-fn config_path(name: &str) -> String {
-    configs_dir().join(name).to_string_lossy().into_owned()
-}
-
-fn create_config(name: &str) -> Config {
-    let path = config_path(name);
-    load_config(&path, None).expect("config should load")
-}
-
-/// Run all tests for a config and assert they match expected_result.
-fn run_tests_for_config(config: &Config, expected_result: bool) {
-    for exe in &config.executables {
-        for tc in &config.toolchains {
-            let runner = ToolChainRunner::new(tc.clone(), 10.0)
-                .with_env(exe.runtime_env());
-            for pkg in &config.packages {
-                for spkg in &pkg.subpackages {
-                    for test in &spkg.tests {
-                        let result = runner.run(test, exe);
-                        assert_eq!(
-                            result.did_pass, expected_result,
-                            "Test {} expected {} but got {}",
-                            test.file,
-                            if expected_result { "PASS" } else { "FAIL" },
-                            if result.did_pass { "PASS" } else { "FAIL" },
-                        );
-                    }
-                }
-            }
-        }
-    }
-}
-
-#[test]
-fn test_gcc_pass() {
-    let config = create_config("gccPassConfig.json");
-    assert!(
-        !config.error_collection.has_errors(),
-        "config errors: {}",
-        config.error_collection
-    );
-    run_tests_for_config(&config, true);
-}
-
-#[test]
-fn test_gcc_fail() {
-    let config = create_config("gccFailConfig.json");
-    assert!(
-        !config.error_collection.has_errors(),
-        "config errors: {}",
-        config.error_collection
-    );
-    run_tests_for_config(&config, false);
-}
diff --git a/dragon-runner-rs/tests/test_runtime.rs b/dragon-runner-rs/tests/test_runtime.rs
deleted file mode 100644
index e4eff9e..0000000
--- a/dragon-runner-rs/tests/test_runtime.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-use std::path::Path;
-use std::process;
-
-use dragon_runner_rs::config::{load_config, Config};
-use dragon_runner_rs::runner::ToolChainRunner;
-
-fn configs_dir() -> std::path::PathBuf {
-    let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
-    manifest.parent().unwrap().join("tests").join("configs")
-}
-
-fn config_path(name: &str) -> String {
-    configs_dir().join(name).to_string_lossy().into_owned()
-}
-
-fn tests_dir() -> std::path::PathBuf {
-    let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
-    manifest.parent().unwrap().join("tests")
-}
-
-fn run_tests_for_config(config: &Config, expected_result: bool) {
-    for exe in &config.executables {
-        for tc in &config.toolchains {
-            let runner = ToolChainRunner::new(tc.clone(), 3.0)
-                .with_env(exe.runtime_env());
-            for pkg in &config.packages {
-                for spkg in &pkg.subpackages {
-                    for test in &spkg.tests {
-                        let result = runner.run(test, exe);
-                        assert_eq!(
-                            result.did_pass, expected_result,
-                            "Test {} expected {} but got {}",
-                            test.file,
-                            if expected_result { "PASS" } else { "FAIL" },
-                            if result.did_pass { "PASS" } else { "FAIL" },
-                        );
-                    }
-                }
-            }
-        }
-    }
-}
-
-#[test]
-fn test_gcc_toolchain_success() {
-    let test_dir = tests_dir();
-    let compile_script = test_dir.join("scripts/test-scripts/compile_lib.py");
-    let lib_src_dir = test_dir.join("lib/src");
-    let lib_out_dir = test_dir.join("lib");
-
-    assert!(compile_script.exists(), "missing compile_lib.py");
-
-    let expected_lib = test_dir.join("lib/libfib.so");
-    if !expected_lib.exists() {
-        let status = process::Command::new("python3")
-            .args([
-                compile_script.to_str().unwrap(),
-                lib_src_dir.to_str().unwrap(),
-                lib_out_dir.to_str().unwrap(),
-            ])
-            .status()
-            .expect("failed to run compile_lib.py");
-        assert!(status.success(), "shared object compilation failed");
-        assert!(expected_lib.exists(), "failed to create shared object");
-    }
-
-    let path = config_path("runtimeConfigLinux.json");
-    let config = load_config(&path, None).expect("config should load");
-    assert!(
-        !config.error_collection.has_errors(),
-        "config errors: {}",
-        config.error_collection
-    );
-
-    run_tests_for_config(&config, true);
-}
diff --git a/dragon_runner/scripts/__init__.py b/dragon_runner/scripts/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/dragon_runner/scripts/add_empty.py b/dragon_runner/scripts/add_empty.py
deleted file mode 100644
index 31eee68..0000000
--- a/dragon_runner/scripts/add_empty.py
+++ /dev/null
@@ -1,111 +0,0 @@
-"""
-============================== 415 Grading Script ==============================
-Author: Justin Meimar
-Name: add_empty.py
-Desc:
-================================================================================
-"""
-import sys
-import argparse
-import random
-import string
-from pathlib import Path
-from typing import List
-from dragon_runner.scripts.base import Script
-
-
-class AddEmptyScript(Script):
-
-    @classmethod
-    def name(cls) -> str:
-        return "add_empty"
-
-    @classmethod
-    def description(cls) -> str:
-        return "Add empty test cases to student test packages"
-
-    @classmethod
-    def get_parser(cls) -> argparse.ArgumentParser:
-        parser = argparse.ArgumentParser(
-            prog="add_empty",
-            description="Add empty test cases to test packages"
-        )
-        parser.add_argument("key_file", type=Path, help="Key file which has a line for each (SID, GH_Username) pair")
-        parser.add_argument("search_path", type=Path, help="Path to search for test files")
-        parser.add_argument("empty_content", type=str, help="Empty content to write into files")
-        return parser
-
-    @staticmethod
-    def load_key(key_path):
-        config = {}
-        with open(key_path) as key_file:
-            for line in key_file.readlines():
-                sid, gh_username = line.strip().split(' ')
-                print("SID: ", sid, "\tGH Username: ", gh_username)
-                config[sid] = gh_username
-        print("Config Loaded...")
-        return config
-
-    @staticmethod
-    def count_files_with_exclusions(directory: Path, excluded_extensions: list) -> int:
-        count = 0
-        for path in directory.rglob('*'):
-            if path.is_file():
-                if path.suffix.lower() not in excluded_extensions:
-                    count += 1
-        return count
-
-    @staticmethod
-    def add_empty(key_file: Path, search_path: Path, empty_content: str):
-        config = AddEmptyScript.load_key(key_file)
-
-        if not search_path.is_dir():
-            error = "Could not create test directory."
-            print(error)
-            return 1
-
-        all_fine = True
-        for (sid, gh_user) in config.items():
-            all_matches = list(search_path.rglob(sid))
-            if len(all_matches) == 0:
-                print(f"Can not find a directory matching: {sid} in {search_path.name}")
-                exit(1)
-            if len(all_matches) > 1:
-                print(f"Found several matches for what should be a unique directory named {sid}:")
-                for m in all_matches:
-                    print("Matched: ", m)
-                exit(1)
-
-            sid_test_dir = Path(all_matches[0])
-            assert sid_test_dir.is_dir() and sid_test_dir.exists() and f"{sid_test_dir} should exist."
-
-            test_count = 0
-            for path in sid_test_dir.rglob("*"):
-                if path.is_file() and not path.is_dir() and not path.name.startswith('.'):
-                    if path.suffix.lower() not in [".ins", ".out"]:
-                        test_count += 1
-
-            if test_count >= 5:
-                continue
-
-            all_fine = False
-            while test_count < 5:
-                suffix= ''.join(random.choices(string.ascii_letters + string.digits, k=8))
-                file_path = sid_test_dir / f"TA_empty_{test_count+1}_{suffix}.in"
-                file_path.write_text(empty_content)
-                test_count += 1
-                print(f"{sid} - Writing an empty file: {file_path.name}...")
-
-        if all_fine:
-            print("All students submited at least five testcases!")
-
-    @classmethod
-    def main(cls, args: List[str]) -> int:
-        parser = cls.get_parser()
-        parsed_args = parser.parse_args(args)
-        cls.add_empty(parsed_args.key_file, parsed_args.search_path, parsed_args.empty_content)
-        return 0
-
-if __name__ == '__main__':
-    sys.exit(AddEmptyScript.main(sys.argv[1:]))
-
diff --git a/dragon_runner/scripts/base.py b/dragon_runner/scripts/base.py
deleted file mode 100644
index 4f4f8d3..0000000
--- a/dragon_runner/scripts/base.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from abc import ABC, abstractmethod
-import argparse
-from typing import List
-
-class Script(ABC):
-    """
-    Base class for all dragon-runner scripts.
-    Provides a standard interface for script metadata and execution.
-    """
-    @classmethod
-    @abstractmethod
-    def name(cls) -> str:
-        """Return the script name (e.g., 'build.py')"""
-        pass
-
-    @classmethod
-    @abstractmethod
-    def description(cls) -> str:
-        """Return a brief description of what the script does"""
-        pass
-
-    @classmethod
-    @abstractmethod
-    def get_parser(cls) -> argparse.ArgumentParser:
-        """Return the argument parser for this script"""
-        pass
-
-    @classmethod
-    def usage(cls) -> str:
-        """Generate usage string from the parser"""
-        parser = cls.get_parser()
-        usage_lines = parser.format_help().split('\n')
-        if usage_lines and usage_lines[0].startswith('usage:'):
-            parts = usage_lines[0].split(None, 2)
-            rest = parts[2] if len(parts) > 2 else ''
-            usage_lines[0] = f"usage: dragon-runner script {cls.name()} {rest}"
-        return '\n'.join(usage_lines)
-
-    @classmethod
-    @abstractmethod
-    def main(cls, args: List[str]) -> int: 
-        pass
-
diff --git a/dragon_runner/scripts/build.py b/dragon_runner/scripts/build.py
deleted file mode 100644
index aef7875..0000000
--- a/dragon_runner/scripts/build.py
+++ /dev/null
@@ -1,104 +0,0 @@
-"""
-============================== 415 Grading Script ==============================
-Author: Justin Meimar
-Name: build.py
-Desc: build the compilers with cmake.. && make -j <n> and log those which
-      fail.
-================================================================================
-"""
-
-import os
-import sys
-import subprocess
-import shutil
-import argparse
-from pathlib import Path
-from typing import List
-from dragon_runner.scripts.base import Script
-
-
-class BuildScript(Script):
-
-    @classmethod
-    def name(cls) -> str:
-        return "build"
-
-    @classmethod
-    def description(cls) -> str:
-        return "Build student compiler projects with CMake"
-
-    @classmethod
-    def get_parser(cls) -> argparse.ArgumentParser:
-        parser = argparse.ArgumentParser(
-            prog="build",
-            description="Build the compilers with cmake && make -j <n> and log those which fail"
-        )
-        parser.add_argument("start_dir", type=Path, help="Walking and build directories from this path")
-        parser.add_argument("log_file", type=Path, help="Path to log file")
-        parser.add_argument("dir_prefix", type=str, help="Prefix common to all directories to be built")
-        parser.add_argument("n", type=int, default=2, help="n_threads")
-        return parser
-
-    @classmethod
-    def build(cls, start_dir, log_path, dir_prefix, n_threads="2"):
-        root_path = Path(start_dir).absolute()
-        log_path = Path(log_path).absolute()
-
-        directories = [d for d in root_path.iterdir() if d.is_dir() and (dir_prefix in d.name) and d.name != '.']
-
-        print("Directories to build:")
-        for d in directories:
-            print(" ", d)
-
-        for dir_path in directories:
-            print(f"-- Building project: {dir_path.name}", end='')
-            build_dir_path = dir_path / 'build'
-            try:
-                os.chdir(dir_path)
-            except OSError:
-                with open(log_path, 'a') as f:
-                    f.write(f"{dir_path.name}: Failed to change directory\n")
-                continue
-
-            if (build_dir_path).exists():
-                shutil.rmtree(build_dir_path)
-            os.makedirs(build_dir_path)
-            os.chdir(build_dir_path)
-            try:
-                build_log = log_path.name + str(dir_path.stem)
-                with open(build_log, 'w') as log_file:
-                    log_file.write(f"\n=== Building {dir_path.name} ===\n")
-                    subprocess.run(
-                        ['cmake', '..'],
-                        stdout=log_file,
-                        stderr=subprocess.STDOUT,
-                        check=True
-                    )
-                    subprocess.run(
-                        ['make', '-j', n_threads],
-                        stdout=log_file,
-                        stderr=subprocess.STDOUT,
-                        check=True
-                    )
-                print(" [SUCCESS]")
-            except subprocess.CalledProcessError:
-                print(f" [FAILED]")
-                build_log = log_path.name + str(dir_path.stem)
-                with open(build_log, 'w') as f:
-                    f.write(f"{dir_path.name}: build failed\n")
-            finally:
-                os.chdir(root_path)
-
-        print(f"Build process completed. Check {log_path} for build output and errors.")
-
-    @classmethod
-    def main(cls, args: List[str]) -> int:
-        parser = cls.get_parser()
-        parsed_args = parser.parse_args(args)
-        parsed_args.log_file.unlink(missing_ok=True)
-        cls.build(parsed_args.start_dir, parsed_args.log_file, parsed_args.dir_prefix, str(parsed_args.n))
-        return 0
-
-if __name__ == '__main__':
-    sys.exit(BuildScript.main(sys.argv[1:]))
-
diff --git a/dragon_runner/scripts/checkout.py b/dragon_runner/scripts/checkout.py
deleted file mode 100644
index 9ef154e..0000000
--- a/dragon_runner/scripts/checkout.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import sys
-import subprocess
-import argparse
-from datetime import datetime
-from pathlib import Path
-from typing import List
-from dragon_runner.scripts.base import Script
-
-
-class CheckoutScript(Script):
-
-    @classmethod
-    def name(cls) -> str:
-        return "checkout"
-
-    @classmethod
-    def description(cls) -> str:
-        return "Checkout git repositories to the latest commit before a specified time"
-
-    @classmethod
-    def get_parser(cls) -> argparse.ArgumentParser:
-        parser = argparse.ArgumentParser(
-            prog="checkout",
-            description="Checkout student git repositories to a specific commit time"
-        )
-        parser.add_argument('submission_dir',
-                          type=Path,
-                          help='Directory of repositories to checkout')
-        parser.add_argument('checkout_time',
-                          help='Checkout time in format: "YYYY-MM-DD HH:MM:SS"')
-        return parser
-
-    @classmethod
-    def get_commit_at_time(cls, repo_path, checkout_time):
-        result = subprocess.run(
-            ['git', 'rev-list', '-1', f'--before={checkout_time}', 'HEAD'],
-            cwd=repo_path,
-            capture_output=True,
-            text=True
-        )
-        if result.returncode != 0:
-            return None
-        return result.stdout.strip()
-
-    @classmethod
-    def get_commit_time(cls, repo_path, commit_hash):
-        result = subprocess.run(
-            ['git', 'show', '-s', '--format=%ci', commit_hash],
-            cwd=repo_path,
-            capture_output=True,
-            text=True
-        )
-        if result.returncode != 0:
-            return None
-        return result.stdout.strip()
-
-    @classmethod
-    def checkout_commit(cls, repo_path, commit_hash):
-        result = subprocess.run(
-            ['git', 'checkout', commit_hash],
-            cwd=repo_path,
-            capture_output=True,
-            text=True
-        )
-        return result.returncode == 0
-
-    @classmethod
-    def process_repositories(cls, submissions_dir: Path, checkout_time: str):
-        for submission_dir in sorted(submissions_dir.iterdir()):
-            if not submission_dir.is_dir():
-                continue
-
-            git_dir = submission_dir / '.git'
-            if not git_dir.exists():
-                print(f"\nSkipping {submission_dir.name} - not a git repository")
-                continue
-            print(f"\nProcessing: {submission_dir.name}")
-
-            commit_hash = cls.get_commit_at_time(submission_dir, checkout_time)
-            if not commit_hash:
-                print(f"  No commits found before {checkout_time}")
-                continue
-
-            commit_time = cls.get_commit_time(submission_dir, commit_hash)
-            if cls.checkout_commit(submission_dir, commit_hash):
-                print(f"  Checked out to: {commit_hash[:8]}")
-                print(f"  Commit time: {commit_time}")
-            else:
-                print(f"  Failed to checkout {commit_hash[:8]}")
-
-    @classmethod
-    def validate_checkout_time(cls, time_str):
-        try:
-            datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S')
-            return True
-        except ValueError:
-            return False
-
-    @classmethod
-    def main(cls, args: List[str]) -> int:
-        parser = cls.get_parser()
-        parsed_args = parser.parse_args(args)
-
-        sub = Path(parsed_args.submission_dir)
-
-        if not sub.exists():
-            print("Submission directory does not exist...")
-            return 1
-
-        if not cls.validate_checkout_time(parsed_args.checkout_time):
-            print('Invalid checkout_time format. Use: "YYYY-MM-DD HH:MM:SS"')
-            return 1
-
-        print(f"Using submission dir: {sub}")
-        print(f"Checking out to latest commit before: {parsed_args.checkout_time}")
-
-        cls.process_repositories(sub, parsed_args.checkout_time)
-        return 0
-
-if __name__ == "__main__":
-    sys.exit(CheckoutScript.main(sys.argv[1:]))
-
diff --git a/dragon_runner/scripts/clean_build.py b/dragon_runner/scripts/clean_build.py
deleted file mode 100644
index a60c07f..0000000
--- a/dragon_runner/scripts/clean_build.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import sys
-import shutil
-from pathlib import Path
-import argparse
-from typing import List
-from dragon_runner.scripts.base import Script
-
-
-class CleanBuildScript(Script):
-
-    @classmethod
-    def name(cls) -> str:
-        return "clean-build"
-
-    @classmethod
-    def description(cls) -> str:
-        return "Remove build directories from student submissions"
-
-    @classmethod
-    def get_parser(cls) -> argparse.ArgumentParser:
-        parser = argparse.ArgumentParser(
-            prog="clean-build",
-            description="Remove build directories from all submissions"
-        )
-        parser.add_argument('submission_dir', type=Path, help='Directory of submissions to clean')
-        return parser
-
-    @staticmethod
-    def remove_build_dirs(submissions_dir: Path):
-        for submission_dir in sorted(submissions_dir.iterdir()):
-            if not submission_dir.is_dir():
-                continue
-
-            build_dir = submission_dir / 'build'
-            if not build_dir.exists():
-                continue
-
-            print(f"Removing build directory in: {submission_dir.name}")
-            try:
-                shutil.rmtree(build_dir)
-                print(f"  Successfully removed")
-            except Exception as e:
-                print(f"  Failed: {e}")
-
-    @classmethod
-    def main(cls, args: List[str]) -> int:
-        parser = cls.get_parser()
-        parsed_args = parser.parse_args(args)
-
-        sub = Path(parsed_args.submission_dir)
-
-        if not sub.exists():
-            print("Submission directory does not exist...")
-            return 1
-
-        cls.remove_build_dirs(sub)
-        return 0
-
-if __name__ == "__main__":
-    sys.exit(CleanBuildScript.main(sys.argv[1:]))
-
diff --git a/dragon_runner/scripts/gather.py b/dragon_runner/scripts/gather.py
deleted file mode 100644
index 3db1eb3..0000000
--- a/dragon_runner/scripts/gather.py
+++ /dev/null
@@ -1,91 +0,0 @@
-"""
-============================== 415 Grading Script ==============================
-Author: Justin Meimar
-Name: gather.py
-Desc:
-================================================================================
-"""
-
-import sys
-import shutil
-import argparse
-from pathlib import Path
-from typing import List
-from dragon_runner.scripts.base import Script
-
-
-class GatherScript(Script):
-
-    @classmethod
-    def name(cls) -> str:
-        return "gather"
-
-    @classmethod
-    def description(cls) -> str:
-        return "Gather test files from student submissions"
-
-    @classmethod
-    def get_parser(cls) -> argparse.ArgumentParser:
-        parser = argparse.ArgumentParser(
-            prog="gather",
-            description="Gather all the testfiles in student directories"
-        )
-        parser.add_argument("key_file", type=Path, help="Key file which has a line for each (SID, GH_Username) pair")
-        parser.add_argument("search_path", type=Path, help="Path to search for test files")
-        parser.add_argument("project_name", type=Path, help="Path to search for test files")
-        return parser
-
-    @staticmethod
-    def load_key(key_path: Path):
-        config = {}
-        with open(key_path) as key_file:
-            for line in key_file.readlines():
-                sid, gh_username = line.strip().split(' ')
-                print("SID: ", sid, "\tGH Username: ", gh_username)
-                config[sid] = gh_username
-        return config
-
-    @staticmethod
-    def gather(key_file: Path,
-           search_path: str,
-           project_name: str,
-           output_dir: str = "submitted-testfiles"):
-        is_rt = True
-        config = GatherScript.load_key(key_file)
-        search_dir = Path(search_path)
-        project_name = str(project_name).strip()
-
-        if not search_dir.is_dir():
-            error = "Could not create test directory."
-            print(error)
-            return 1
-
-        directories = [d for d in search_dir.iterdir() if d.is_dir() and str(project_name) in d.name]
-        for (sid, gh_user) in config.items():
-            print("Finding submission for: ", gh_user)
-            for d in directories:
-                if gh_user in str(d):
-                    if is_rt:
-                        suffix = '-'.join(gh_user.split('-')[1:])
-                        expected_test_dir = d / "tests" / "testfiles" / suffix
-                    else:
-                        expected_test_dir = d / "tests" / "testfiles" / sid
-
-                    if expected_test_dir.is_dir():
-                        print(f"-- Found properly formatted testfiles for {sid}")
-                        shutil.copytree(expected_test_dir, (Path(output_dir) / sid), dirs_exist_ok=True)
-                        break
-                    else:
-                        print(f"-- Could NOT find testfiles for {sid}")
-                        exit(1)
-
-    @classmethod
-    def main(cls, args: List[str]) -> int:
-        parser = cls.get_parser()
-        parsed_args = parser.parse_args(args)
-        cls.gather(parsed_args.key_file, parsed_args.search_path, parsed_args.project_name)
-        return 0
-
-if __name__ == '__main__':
-    sys.exit(GatherScript.main(sys.argv[1:]))
-
diff --git a/dragon_runner/scripts/gen_config.py b/dragon_runner/scripts/gen_config.py
deleted file mode 100644
index 392628e..0000000
--- a/dragon_runner/scripts/gen_config.py
+++ /dev/null
@@ -1,131 +0,0 @@
-"""
-============================== 415 Grading Script ==============================
-Author: Justin Meimar
-Name: gen_config.py
-Desc:
-================================================================================
-"""
-import sys
-import json
-import argparse
-from typing import Optional, List
-from pathlib import Path
-from typing import Iterator, Tuple
-from dragon_runner.scripts.base import Script
-
-
-class Key: 
-    def __init__(self, key_path: Path):
-        self.key_path = key_path
-        self.sid_repo_suffix_map = {}
-
-        with open(key_path) as key_file: 
-            for line in key_file.readlines():
-                sids, repo_suffix = line.strip().split(' ')
-                sid_list = sids.strip().split(',') 
-                for sid in sid_list:
-                    self.sid_repo_suffix_map[sid] = repo_suffix
-    
-    def __str__(self):
-        s = ""
-        for k, v in self.sid_repo_suffix_map.items():
-            s += (f"{k}\t{v}")
-        return s
-    
-    def get_repo_for_sid(self, sid):
-        return self.sid_repo_suffix_map[sid]
-    
-    def iter_sids(self) -> Iterator[str]:
-        return iter(self.sid_repo_suffix_map.keys())
-
-    def iter_repos(self) -> Iterator[str]:
-        return iter(set(self.sid_repo_suffix_map.values()))
-
-    def iter_both(self) -> Iterator[Tuple[str, str]]:
-        return iter(self.sid_repo_suffix_map.items())
-
-
-class GenConfigScript(Script):
-
-    @classmethod
-    def name(cls) -> str:
-        return "gen-config"
-
-    @classmethod
-    def description(cls) -> str:
-        return "Generate dragon-runner configuration from submissions"
-
-    @classmethod
-    def get_parser(cls) -> argparse.ArgumentParser:
-        parser = argparse.ArgumentParser(
-            prog="gen-config",
-            description="Generate dragon-runner configuration from student submissions"
-        )
-        parser.add_argument("key_path", type=Path,
-            help="Path to key file containing each team/ccid on a line.")
-        parser.add_argument("submissions_path", type=Path,
-            help="Path to project submissions cloned from github classroom.")
-        parser.add_argument("binary", type=str,
-            help="Name of binary to expect in prohjects bin/")
-        parser.add_argument("--runtime", type=str, default=None,
-            help="Name of runtime library to expect in prohjects bin/")
-        return parser
-
-    @staticmethod
-    def gen_config(key_path:Path,
-               submission_dir:Path,
-               binary:str,
-               runtime:Optional[str]=None):
-
-        executables_config = {}
-        runtimes_config = {}
-        config = {}
-
-        assert key_path.is_file(), "must supply regular file as key"
-        assert submission_dir.is_dir(), "must supply directory to submissions."
-
-        key = Key(key_path)
-        for (sids, repo_suffix) in key.iter_both():
-            match_dir = [d for d in submission_dir.iterdir() if d.is_dir() and str(repo_suffix) in d.name]
-            if match_dir == []:
-                print(f"Couldn't find: repo with suffix {repo_suffix}")
-                exit(1)
-
-            match_dir = Path(match_dir[0])
-            expected_package = match_dir / "tests/testfiles" / sids
-            expected_binary = match_dir / f"bin/{binary}"
-            expected_runtime = match_dir / f"bin/{runtime}"
-
-            if not expected_package.is_file:
-                print(f"Can not find expected package: {expected_package}")
-                break;
-
-            if not expected_binary.is_file:
-                print(f"Can not find expected binary: {expected_binary}")
-                break;
-
-            if runtime is not None and not expected_runtime.is_file:
-                print(f"Can not find expected binary: {expected_binary}")
-                break;
-
-            executables_config.update({f"{sids}":f"{Path.absolute(expected_binary)}"})
-            runtimes_config.update({f"{sids}":f"{Path.absolute(expected_runtime)}"})
-
-        config.update({"testedExecutablePaths": executables_config})
-        if runtime is not None:
-            config.update({"runtimes": runtimes_config})
-
-        print(json.dumps(config, indent=4))
-        with open('config.json', 'w') as f:
-            json.dump(config, f, indent=4)
-
-    @classmethod
-    def main(cls, args: List[str]) -> int:
-        parser = cls.get_parser()
-        parsed_args = parser.parse_args(args)
-        cls.gen_config(parsed_args.key_path, parsed_args.submissions_path, parsed_args.binary, parsed_args.runtime)
-        return 0
-
-if __name__ == '__main__':
-    sys.exit(GenConfigScript.main(sys.argv[1:])) 
- 
diff --git a/dragon_runner/scripts/grade.py b/dragon_runner/scripts/grade.py
deleted file mode 100644
index e859880..0000000
--- a/dragon_runner/scripts/grade.py
+++ /dev/null
@@ -1,172 +0,0 @@
-"""
-This script must run with symmetric tables, meaning nrows = ncols.
-"""
-import sys
-import argparse
-import csv
-from pathlib import Path
-from fractions import Fraction
-from typing import List
-from dragon_runner.scripts.base import Script
-
-
-class GradeScript(Script):
-
-    DEFENSIVE_PTS = 2
-    OFFENSIVE_PTS = 1
-    COHERENCE_PTS = 10
-    COMPETITIVE_WEIGHT = 0.2
-    TA_WEIGHT = 0.5
-
-    @classmethod
-    def name(cls) -> str:
-        return "grade"
-
-    @classmethod
-    def description(cls) -> str:
-        return "Grade tournament results and compute scores"
-
-    @classmethod
-    def get_parser(cls) -> argparse.ArgumentParser:
-        parser = argparse.ArgumentParser(
-            prog="grade",
-            description="Grade 415 tournament results"
-        )
-        parser.add_argument("tournament_csvs", type=Path, nargs="+",
-                          help="Path(s) to tournament CSV files")
-        parser.add_argument("output_csv", type=Path,
-                          help="Path to output CSV file")
-        parser.add_argument("--solution-name", type=str, default="solution",
-                          help="Name of the solution/TA executable in the CSV (default: 'solution')")
-        return parser
-
-    @staticmethod
-    def parse_fraction(s):
-        try:
-            return round(float(Fraction(s)), 4)
-        except (ValueError, ZeroDivisionError):
-            return round(float(s), 4) if s else 0.0
-
-    @staticmethod
-    def load_csv(filepath):
-        with open(filepath, 'r') as f:
-            return list(csv.reader(f))
-
-    @classmethod
-    def average_tables(cls, tables):
-        table = tables[0]
-        n_rows = len(table)
-        n_cols = len(table[0])
-        assert n_rows == n_cols, f"Expected table to be symmetric! Found {n_rows} rows and {n_cols} columns"
-
-        avg_table = [row[:] for row in tables[0]]
-        avg_table[0][0] = "toolchain_summary"
-        for j in range(1, n_cols):
-            for i in range(1, n_rows):
-                avg_cell = 0
-                for table in tables:
-                    avg_cell += cls.parse_fraction(table[i][j])
-                avg_table[i][j] = round(avg_cell / len(tables), 2)
-        return avg_table
-
-    @classmethod
-    def compute_tournament_points(cls, table, solution_name):
-        n_rows = len(table)
-        n_cols = len(table[0])
-        solution_col = None
-        for j in range(1, n_cols):
-            if table[0][j].lower() == solution_name.lower():
-                solution_col = j
-                break
-
-        print(f"{n_rows}:{n_cols}")
-        print(f"Computing tournament with solution '{table[0][solution_col]}' at column: {solution_col}")
-        scores = {
-            'defensive': [],
-            'offensive': [],
-            'coherence': [],
-            'ta': []
-        }
-        for j in range(1, n_cols):
-            d_score = 0
-            o_score = 0
-            c_score = 0
-            ta_score = 0
-            c_score = cls.COHERENCE_PTS if cls.parse_fraction(table[j][j]) == 1 else 0
-            if solution_col is not None and solution_col < len(table[j]):
-                ta_score = cls.parse_fraction(table[j][solution_col])
-
-            for i in range(1, n_rows):
-                if i != j:
-                    d_score += cls.DEFENSIVE_PTS * cls.parse_fraction(table[j][i])
-
-            for k in range(1, n_cols):
-                if k != j and k < len(table[j]):
-                    o_score += cls.OFFENSIVE_PTS * (1 - cls.parse_fraction(table[k][j]))
-
-            scores['defensive'].append(round(d_score, 2))
-            scores['offensive'].append(round(o_score, 2))
-            scores['coherence'].append(c_score)
-            scores['ta'].append(ta_score)
-
-        print(scores)
-        return scores
-
-    @classmethod
-    def create_summary_table(cls, base_table, avg_scores):
-        summary = [["toolchain summary"] + base_table[0][1:]]
-
-        for i in range(1, len(base_table)):
-            if i < len(base_table[0]):
-                row = [base_table[i][0]]
-                for j in range(1, len(base_table[0])):
-                    if i < len(base_table) and j < len(base_table[i]):
-                        row.append(round(cls.parse_fraction(base_table[i][j]), 3))
-                    else:
-                        row.append(0)
-                summary.append(row)
-
-        competitive_total = []
-        for i in range(len(avg_scores['defensive'])):
-            total = (avg_scores['defensive'][i] +
-                    avg_scores['offensive'][i] +
-                    avg_scores['coherence'][i])
-            competitive_total.append(total)
-
-        max_score = max(competitive_total) if competitive_total else 1
-
-        summary.append(["Defensive Points"] + [f"{s:.2f}" for s in avg_scores['defensive']])
-        summary.append(["Offensive Points"] + [f"{s:.2f}" for s in avg_scores['offensive']])
-        summary.append(["Coherence Points"] + [f"{s:.0f}" for s in avg_scores['coherence']])
-        summary.append(["Competitive Points"] + [f"{s:.2f}" for s in competitive_total])
-        summary.append(["TA Testing Score (50% Weight)"] +
-                       [f"{s * cls.TA_WEIGHT:.3f}" for s in avg_scores['ta']])
-        normalized = [cls.COMPETITIVE_WEIGHT * (s / max_score) for s in competitive_total]
-        summary.append(["Normalized Points (20% Weight)"] + [f"{s:.3f}" for s in normalized])
-        return summary
-
-    @classmethod
-    def grade(cls, toolchain_paths, output_path, solution_name):
-        tables = [cls.load_csv(path) for path in toolchain_paths]
-        avg_table = cls.average_tables(tables)
-        scores = cls.compute_tournament_points(avg_table, solution_name)
-
-        with open(output_path, 'w', newline='') as f:
-            writer = csv.writer(f)
-            for table in tables:
-                writer.writerows(table)
-                writer.writerow([])
-            writer.writerows(cls.create_summary_table(avg_table, scores))
-
-        print(f"Grading complete. Output written to {output_path}")
-        print(f"Solution name used: '{solution_name}'")
-        return 0
-
-    @classmethod
-    def main(cls, args: List[str]) -> int:
-        parser = cls.get_parser()
-        parsed_args = parser.parse_args(args)
-        return cls.grade(parsed_args.tournament_csvs, parsed_args.output_csv, parsed_args.solution_name)
-
-if __name__ == "__main__":
-    sys.exit(GradeScript.main(sys.argv[1:]))
diff --git a/dragon_runner/scripts/grade_perf.py b/dragon_runner/scripts/grade_perf.py
deleted file mode 100644
index 615a451..0000000
--- a/dragon_runner/scripts/grade_perf.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""
-============================== 415 Grading Script ==============================
-Author: Justin Meimar
-Name: grade_perf.py
-Desc: Dragon-runner with a config pointing to the performance tests & an
-      executable for each compiler to be tested, when run with --mode=perf,
-      will produce a perf.csv file.
-
-      This script takes perf.csv as its input and runs the performance testing
-      grading algorithm to return a single CSV row, indicating the perf scores
-      for each team.
-
-      The intention is that the single row be manually copy and pasted into the
-      row output by the grade.py script.
-================================================================================
-"""
-import sys
-import argparse
-import csv
-import numpy as np
-from pathlib import Path
-from typing import List
-from dragon_runner.scripts.base import Script
-
-
-class GradePerfScript(Script):
-
-    @classmethod
-    def name(cls) -> str:
-        return "grade-perf"
-
-    @classmethod
-    def description(cls) -> str:
-        return "Grade performance testing results"
-
-    @classmethod
-    def get_parser(cls) -> argparse.ArgumentParser:
-        parser = argparse.ArgumentParser(
-            prog="grade-perf",
-            description="Grade performance testing results"
-        )
-        parser.add_argument(
-            "perf_csv",
-            type=Path,
-            help="Path to csv file generated from grade mode"
-        )
-        parser.add_argument(
-            "output_csv",
-            type=Path,
-            help="Path to final output csv with grades"
-        )
-        return parser
-
-    @staticmethod
-    def grade_perf(*args):
-        if len(args) < 2:
-            print("Must supply two arguments: <perf_csv> <output_csv>")
-            return 1
-
-        with open(args[0], "r") as perf_csv:
-            reader = csv.reader(perf_csv)
-            headers = next(reader)
-            test_data = [row for row in reader if row and any(row)]
-
-        raw_times = np.array([[float(x) for x in row[1:]] for row in test_data])
-
-        scores = []
-        for times in raw_times:
-            fastest_time = min(times)
-            test_scores = [fastest_time / time for time in times]
-            scores.append(test_scores)
-        total_scores = np.mean(scores, axis=0)
-
-        print(headers[1:])
-        print(total_scores)
-
-        # Write results to output CSV
-        with open(args[1], "w") as output_csv:
-            writer = csv.writer(output_csv)
-            writer.writerow(headers[1:])
-            writer.writerow(total_scores)
-
-    @classmethod
-    def main(cls, args: List[str]) -> int:
-        parser = cls.get_parser()
-        parsed_args = parser.parse_args(args)
-        cls.grade_perf(parsed_args.perf_csv, parsed_args.output_csv)
-        return 0
-
-if __name__ == "__main__":
-    sys.exit(GradePerfScript.main(sys.argv[1:]))
-    
diff --git a/dragon_runner/scripts/key.py b/dragon_runner/scripts/key.py
deleted file mode 100644
index 2ad5bbd..0000000
--- a/dragon_runner/scripts/key.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from typing import Tuple
-from pathlib import Path
-from typing import Iterator
-
-class Key: 
-    def __init__(self, key_path: Path):
-        self.key_path = key_path
-        self.sid_repo_suffix_map = {}
-
-        with open(key_path) as key_file: 
-            for line in key_file.readlines():
-                sids, repo_suffix = line.strip().split(' ')
-                sid_list = sids.strip().split(',') 
-                for sid in sid_list:
-                    self.sid_repo_suffix_map[sid] = repo_suffix
-    
-    def __str__(self):
-        s = ""
-        for k, v in self.sid_repo_suffix_map.items():
-            s += (f"{k}\t{v}")
-        return s
-    
-    def get_repo_for_sid(self, sid):
-        return self.sid_repo_suffix_map[sid]
-    
-    def iter_sids(self) -> Iterator[str]:
-        return iter(self.sid_repo_suffix_map.keys())
-
-    def iter_repos(self) -> Iterator[str]:
-        return iter(set(self.sid_repo_suffix_map.values()))
-
-    def iter_both(self) -> Iterator[Tuple[str, str]]:
-        return iter(self.sid_repo_suffix_map.items())
-
diff --git a/dragon_runner/scripts/loader.py b/dragon_runner/scripts/loader.py
deleted file mode 100644
index a77e05b..0000000
--- a/dragon_runner/scripts/loader.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import subprocess
-import sys
-import importlib
-from typing import List, Dict, Type, Optional
-from pathlib import Path
-from dragon_runner.scripts.base import Script
-
-
-class Loader:
-    """
-    Dragon runner allows grading scripts to be run through its CLI.
-    Each script is executed as a subprocess using Python's -m flag to ensure
-    consistent behavior whether called directly or through dragon-runner.
-    """
-    def __init__(self):
-        self.script_modules = {
-            "add_empty":     "dragon_runner.scripts.add_empty",
-            "build":         "dragon_runner.scripts.build",
-            "clean-build":   "dragon_runner.scripts.clean_build",
-            "checkout":      "dragon_runner.scripts.checkout",
-            "gather":        "dragon_runner.scripts.gather",
-            "gen-config":    "dragon_runner.scripts.gen_config",
-            "grade":         "dragon_runner.scripts.grade",
-            "grade-perf":    "dragon_runner.scripts.grade_perf",
-        }
-
-    def _load_script_class(self, module_name: str) -> Optional[Type[Script]]:
-        """
-        Dynamically load a script module and return its Script class if it exists.
-        Returns None if the module doesn't implement the Script interface.
-        """
-        try:
-            module = importlib.import_module(module_name)
-            # Look for a class that inherits from Script
-            for attr_name in dir(module):
-                attr = getattr(module, attr_name)
-                if (isinstance(attr, type) and
-                    issubclass(attr, Script) and
-                    attr is not Script):
-                    return attr
-        except Exception:
-            pass
-        return None
-
-    def __call__(self, args: List[str]):
-        """
-        Select the script to run from the mode argument passed through
-        dragon-runner CLI and execute it as a subprocess.
-        """
-        if args == [] or args[0] not in self.script_modules:
-            print(self)
-            return 1
-
-        module = self.script_modules[args[0]]
-        cmd = [sys.executable, "-m", module] + args[1:]
-        try:
-            result = subprocess.run(cmd, check=False)
-            return result.returncode
-        except Exception as e:
-            print(f"Failed to run script: {e}")
-            return 1
-
-    def __repr__(self):
-        """
-        Display all available scripts with their descriptions and usage.
-        """
-        s = "Available Scripts:\n"
-        for script_name, module_name in self.script_modules.items():
-            script_class = self._load_script_class(module_name)
-            max_script = max(self.script_modules.keys(),key=lambda x: len(x))
-            if script_class:
-                s += f" * {script_name}: {(len(max_script) - len(script_name))* ' '} "
-                s += f"{script_class.description()}\n"
-        return s
-
diff --git a/dragon_runner/src/__init__.py b/dragon_runner/src/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/dragon_runner/src/cli.py b/dragon_runner/src/cli.py
deleted file mode 100644
index f2ef95e..0000000
--- a/dragon_runner/src/cli.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from pathlib import Path
-from typing import Any, NamedTuple, List
-from dragon_runner.scripts.loader import Loader 
-from enum import Enum
-import argparse
-from enum import Enum
-from typing import List, NamedTuple, Protocol, runtime_checkable
-from pathlib import Path
-import argparse
-import sys
-import os
-
-class Mode(Enum):
-    REGULAR = "regular"
-    TOURNAMENT = "tournament"
-    PERF = "perf"
-    MEMCHECK = "memcheck"
-    SERVE = "serve"
-    SCRIPT = "script"
-
-@runtime_checkable
-class CLIArgs(Protocol):
-    mode: Mode
-
-class RunnerArgs(NamedTuple):
-    mode: Mode
-    config_file: str = ""
-    output: str = ""
-    failure_log: str = ""
-    debug_package: str = ""
-    package_filter: str = ""
-    timeout: float = 2.0
-    time: bool = False
-    verbosity: int = 0
-    verify: bool = False
-    show_testcase: bool = False
-    fast_fail: bool = False
-
-class ScriptArgs(NamedTuple):
-    mode: Mode
-    args: List[str] = []
-
-class ServerArgs(NamedTuple):
-    mode: Mode
-    port: int = 5000
-    serve_path: Path = Path(".")
-
-def parse_runner_args(argv_skip: int=1) -> RunnerArgs:
-    parser = argparse.ArgumentParser(description="CMPUT 415 testing utility")
-    
-    parser.add_argument("config_file", help="Path to the JSON configuration file")
-    parser.add_argument("--fail-log", dest="failure_log", default="")
-    parser.add_argument("--timeout", type=float, default=2.0)
-    parser.add_argument("--verify", action="store_true")
-    parser.add_argument("--debug-package", default="")
-    parser.add_argument("-p", "--package", dest="package_filter", default="", help="Filter packages by glob pattern (case insensitive)")
-    parser.add_argument("-t", "--time", action="store_true")
-    parser.add_argument("-v", "--verbosity", action="count", default=0)
-    parser.add_argument("-s", "--show-testcase", action="store_true")
-    parser.add_argument("-o", "--output", default="")
-    parser.add_argument("-f", "--fast-fail", dest="fast_fail", action="store_true")
-    
-    # Parse arguments
-    args = parser.parse_args(sys.argv[argv_skip:])
-    
-    # Set debug environment variable 
-    os.environ["DRAGON_RUNNER_DEBUG"] = str(args.verbosity)
-    
-    # Convert to dictionary and add mode
-    args_dict = vars(args)
-    args_dict["mode"] = Mode.REGULAR
-    
-    return RunnerArgs(**args_dict)
-
-
-def parse_server_args() -> ServerArgs:
-    parser = argparse.ArgumentParser(description="Server mode")
-    parser.add_argument("serve_path", type=Path, help="Config directory or file")
-    parser.add_argument("--port", type=int, default=5000)
-    
-    args = parser.parse_args(sys.argv[2:])
-    return ServerArgs(
-        mode=Mode.SERVE,
-        port=args.port,
-        serve_path=args.serve_path
-    )
-
-def parse_cli_args() -> Any:
-    if len(sys.argv) < 2:
-        print("Usage: dragon-runner [mode] config.json [args...]")
-        print("  mode: [regular|tournament|perf|memcheck|serve|script])")
-        print("  args: dragon-runner -h")
-        sys.exit(1)
-        
-    first_arg = sys.argv[1]
-    
-    # Create a mapping to convert string to Mode enum
-    mode_map = {mode.value: mode for mode in Mode}
-    
-    if first_arg in mode_map:
-        if first_arg == Mode.SERVE.value:
-            return parse_server_args()
-        elif first_arg == Mode.SCRIPT.value:
-            return ScriptArgs(mode=Mode.SCRIPT, args=sys.argv[2:])
-        else:
-            # For runner modes
-            args = parse_runner_args(argv_skip=2)
-            return RunnerArgs(**{**args._asdict(), "mode": mode_map[first_arg]})
-    else:
-        # If no mode is supplied, default to regular mode
-        return parse_runner_args(1)
-
diff --git a/dragon_runner/src/config.py b/dragon_runner/src/config.py
deleted file mode 100644
index d7b8a7d..0000000
--- a/dragon_runner/src/config.py
+++ /dev/null
@@ -1,275 +0,0 @@
-import json
-import os
-import sys
-from pathlib                        import Path
-from typing                         import Dict, List, Optional
-from dragon_runner.src.testfile     import TestFile
-from dragon_runner.src.errors       import ConfigError, Verifiable, ErrorCollection
-from dragon_runner.src.toolchain    import ToolChain
-from dragon_runner.src.utils        import resolve_relative
-from dragon_runner.src.log          import log
-from dragon_runner.src.cli          import RunnerArgs
-
-class SubPackage(Verifiable):
-    """
-    Represents a set of tests in a directory.
-    """
-    def __init__(self, path: str): 
-        self.path: str              = path
-        self.name: str              = os.path.basename(path)
-        self.tests: List[TestFile]  = [] 
-        if os.path.isdir(path):
-            self.tests = self.gather_tests()
-        else:
-            self.tests = [TestFile(path)]
-
-    def verify(self) -> ErrorCollection:
-        """
-        Verify the tests in our config have no errors.
-        """
-        ec = ErrorCollection();
-        for test in self.tests:
-            test_errors = test.verify()
-            ec.extend(test_errors) 
-        return ec
-        
-    @staticmethod
-    def is_test(test_path: str):
-        """
-        Ignore reserved output and input stream extensions and hidden files
-        """
-        return (os.path.isfile(test_path) and
-                not os.path.basename(test_path).startswith('.') and
-                not test_path.endswith(('.out', '.ins')))
-
-    def gather_tests(self) -> List[TestFile]:
-        """
-        Find all tests in the directory of the subpackage.
-        """
-        tests = []
-        for file in os.listdir(self.path):
-            test_path = os.path.join(self.path, file)
-            if self.is_test(test_path):
-                tests.append(TestFile(test_path))
-        return sorted(tests, key=lambda x: x.file) 
-
-class Package(Verifiable):
-    """
-    Represents a single test package. Shoud have a corresponding CCID if submitted. 
-    """
-    def __init__(self, path: str):
-        self.path: str      = path
-        self.name: str      = os.path.basename(path)
-        self.n_tests        = 0
-        self.subpackages    = [] 
-        
-        if os.path.isdir(path):
-            self.gather_subpackages()
-        else:
-            self.subpackages.append(SubPackage(path))
-
-    def verify(self) -> ErrorCollection:
-        """
-        Propogate up all errors in subpackages.
-        """ 
-        return ErrorCollection(ec for spkg in self.subpackages if (ec := spkg.verify()))
-
-    def add_subpackage(self, spkg: SubPackage):
-        """
-        Add a subpackage while keeping total test count up to date
-        """
-        self.n_tests += len(spkg.tests)
-        self.subpackages.append(spkg)
-
-    def gather_subpackages(self) -> List[SubPackage]:
-        """
-        Collect any directory within a package and create a subpackage.
-        """
-        subpackages = []
-        top_level_spkg = SubPackage(self.path) 
-        if len(top_level_spkg.tests) > 0:
-            self.add_subpackage(top_level_spkg)
-        for parent_path, dirs, _ in os.walk(self.path):
-            for dirname in dirs:
-                spkg = SubPackage(os.path.join(parent_path, dirname))
-                if len(spkg.tests) > 0:
-                    self.add_subpackage(spkg)
-        return subpackages
-
-class Executable(Verifiable):
-    """
-    Represents a single tested executable along with an optional associated runtime.
-    """
-    def __init__(self, id: str, exe_path: str, runtime: str):
-        self.id         = id
-        self.exe_path   = exe_path 
-        self.runtime    = runtime 
-        self.errors     = self.verify()
-    
-    def verify(self) -> ErrorCollection:
-        """
-        Check if the binary path exists and runtime path exists (if present)
-        """
-        errors = []
-        if not os.path.exists(self.exe_path):
-            errors.append(ConfigError(
-                f"Cannot find binary file: {self.exe_path} "
-                f"in Executable: {self.id}")
-            )
-        if self.runtime and not os.path.exists(self.runtime):
-            errors.append(ConfigError(
-                f"Cannot find runtime file: {self.runtime} "
-                f"in Executable: {self.id}")
-            )
-        return ErrorCollection(errors)
-
-    def source_env(self):
-        """
-        Source all env variables defined in this executables map
-        TODO: Eventually, this should be replaced with a more generic JSON config format that
-        allows env variables to be first class.
-        """
-        if self.runtime:
-            runtime_path = Path(self.runtime)
-            runtime_dir = runtime_path.parent
-            rt_filename = runtime_path.stem
-            
-            if sys.platform == "darwin":
-                preload_env = {
-                    "DYLD_LIBRARY_PATH": str(runtime_dir),
-                    "DYLD_INSERT_LIBRARIES": str(runtime_path)
-                }
-            else:
-                preload_env = {
-                    "LD_LIBRARY_PATH": str(runtime_dir),
-                    "LD_PRELOAD": str(runtime_path)
-                }
-
-            preload_env.update({
-                "RT_PATH": str(runtime_dir),
-                "RT_LIB": rt_filename[3:]
-            })
-
-            for key, value in preload_env.items():
-                os.environ[key] = value 
-    
-    def to_dict(self) -> Dict:
-        return {
-            'id': self.id,
-            'exe_path': self.exe_path
-        }
-
-class Config:
-    """
-    An in memory representation of the JSON configuration file which directs the tester. 
-    """
-    def __init__(self, config_path: str, config_data: Dict, debug_package: Optional[str], package_filter: str = ""):
-        self.name               = Path(config_path).stem
-        self.config_path        = os.path.abspath(config_path)
-        self.config_data        = config_data
-        self.debug_package      = debug_package
-        self.package_filter     = package_filter
-        self.test_dir           = resolve_relative(config_data['testDir'],
-                                                   os.path.abspath(config_path))
-        self.executables        = self.parse_executables(config_data['testedExecutablePaths'],
-                                                   config_data.get('runtimes', ""))
-        self.solution_exe       = config_data.get('solutionExecutable', None)
-        self.toolchains         = self.parse_toolchains(config_data['toolchains'])
-        self.packages           = self.gather_packages()
-        self.error_collection   = self.verify()
-    
-    def parse_executables(self, executables_data: Dict[str, str],
-                                runtimes_data: Dict[str, str]) -> List[Executable]:
-        """
-        Parse each executable and assign a corresponding runtime if supplied
-        """
-        def find_runtime(id) -> str:
-            if not runtimes_data:
-                return ""
-            for rt_id, rt_path in runtimes_data.items():
-                if rt_id == id :
-                    return os.path.abspath(resolve_relative(rt_path, self.config_path))
-            return ""
-        return [Executable(
-                    id,
-                    resolve_relative(path, self.config_path),
-                    find_runtime(id)
-                ) for id, path in executables_data.items()]
-    
-    def parse_toolchains(self, toolchains_data: Dict[str, List[Dict]]) -> List[ToolChain]:
-        """
-        Parse each toolchain from the config file and return a list of them.
-        """
-        return [ToolChain(name, steps) for name, steps in toolchains_data.items()]
-
-    def gather_packages(self) -> List[Package]:
-        """
-        Collect all top-level directories in testdir and create a package
-        """
-        packages = []
-        if self.debug_package:
-            packages.append(Package(self.debug_package))
-            return packages
-
-        for parent_path, dirs, _ in os.walk(self.test_dir):
-            for dirname in dirs:
-                pkg_path = os.path.join(parent_path, dirname)
-                packages.append(Package(pkg_path))
-            break
-        return packages
-
-    def log_test_info(self):
-        """
-        Prints a simple formatted table of test information.
-        """
-        log("\nPackages:", level=1)
-        for pkg in self.packages:
-            log(f"-- ({pkg.name})", level=1)
-            for spkg in pkg.subpackages:
-                log(f"  -- ({spkg.name})", level=2)
-                for test in spkg.tests:
-                    log(f"    -- ({test.file})", level=3)
-
-    def verify(self) -> ErrorCollection:
-        """
-        Pass up all errrors by value in downstream objects like Toolchain, Testfile and Executable
-        """
-        ec = ErrorCollection()
-        if not os.path.exists(self.test_dir):
-            ec.add(ConfigError(f"Cannot find test directory: {self.config_data['testDir']}"))  
-        for exe in self.executables:
-            ec.extend(exe.verify().errors)       
-        for tc in self.toolchains:
-            ec.extend(tc.verify().errors)
-        for pkg in self.packages:
-            ec.extend(pkg.verify().errors)
-        return ec
-
-    def to_dict(self) -> Dict: 
-        return {
-            'name': self.name,
-            'testDir': self.test_dir,
-            'executables': [exe.to_dict() for exe in self.executables],
-            'toolchains': {tc.name: tc.to_dict()[tc.name] for tc in self.toolchains},
-            'subpackages': [pkg.name for pkg in self.packages]
-        }
-    
-    def __repr__(self) -> str:
-        return json.dumps(self.to_dict(), indent=2)
-
-def load_config(config_path: str, args: Optional[RunnerArgs]=None) -> Optional[Config]:
-    """
-    Load and parse the JSON configuration file.
-    """
-    if not os.path.exists(config_path):
-        return None
-    try: 
-        with open(config_path, 'r') as config_file:
-            config_data = json.load(config_file)
-    except json.decoder.JSONDecodeError:
-        log("Config Error: Failed to parse config: ", config_path)
-        return None
-
-    debug_package = args.debug_package if args else None
-    package_filter = args.package_filter if args else ""
-    return Config(config_path, config_data, debug_package, package_filter)
diff --git a/dragon_runner/src/errors.py b/dragon_runner/src/errors.py
deleted file mode 100644
index b64adee..0000000
--- a/dragon_runner/src/errors.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from typing import List, Union, Iterable
-
-class Error:
-    def __str__(self): raise NotImplementedError("Must implement __str__")
-
-class ConfigError(Error):
-    def __init__(self, message: str):
-        self.message = message
-
-    def __str__(self):
-        return f"Config Error: {self.message}"
-
-class TestFileError(Error):
-    def __init__(self, message: str):
-        self.message = message
-
-    def __str__(self):
-        return f"Testfile Error: {self.message}"
-
-class ErrorCollection:
-    def __init__(self, errors: Union[None, 'ErrorCollection', Iterable[Error]] = None):
-        self.errors: List[Error] = []
-        if errors is not None:
-            if isinstance(errors, ErrorCollection):
-                self.errors = errors.errors.copy()
-            elif isinstance(errors, Iterable):
-                self.errors = list(errors)
-
-    def has_errors(self) -> bool:
-        return self.__bool__()
-
-    def add(self, error: Error):
-        self.errors.append(error)
-
-    def extend(self, errors: Union['ErrorCollection', Iterable[Error]]):
-        if isinstance(errors, ErrorCollection):
-            self.errors.extend(errors.errors)
-        elif isinstance(errors, Iterable):
-            self.errors.extend(errors)
-
-    def __bool__(self):
-        return len(self.errors) > 0
-
-    def __eq__(self, other):
-        if isinstance(other, bool):
-            return bool(self) == other
-        return False
-
-    def __len__(self):
-        return len(self.errors)
-
-    def __str__(self):
-        return "\n".join(str(error) for error in self.errors)
-
-class Verifiable:
-    def verify(self) -> ErrorCollection:
-        raise NotImplementedError("Subclasses must implement verify method")
-
diff --git a/dragon_runner/src/harness.py b/dragon_runner/src/harness.py
deleted file mode 100644
index af6c81c..0000000
--- a/dragon_runner/src/harness.py
+++ /dev/null
@@ -1,315 +0,0 @@
-import csv
-import fnmatch
-from colorama                   import Fore
-from typing                     import Any, List, Dict, Optional, Set
-from dragon_runner.src.cli      import RunnerArgs
-from dragon_runner.src.config   import Config, Executable, Package
-from dragon_runner.src.log      import log
-from dragon_runner.src.runner   import TestResult, ToolChainRunner
-from dragon_runner.src.utils    import file_to_str
-from itertools                  import zip_longest
-
-class TestHarness:
-    __test__ = False
-
-    def __init__(self, config: Config, cli_args: RunnerArgs):
-        self.config = config
-        self.cli_args: RunnerArgs = cli_args
-        self.failures: List[TestResult] = []
-        self.run_passed = True
-    
-    def process_test_result(self, test_result: TestResult, context: Dict[str, Any]):
-        """
-        Subclasses should override this method to handle test result processing and update counts.
-        """
-        raise NotImplementedError("Subclasses must implement this method")
-
-    def pre_subpackage_hook(self, spkg):
-        """Hook to run before iterating through a subpackage."""
-        pass
-
-    def post_subpackage_hook(self, context: Dict[str, Any]):
-        """Hook to run after iterating through a subpackage."""
-        pass
-
-    def pre_executable_hook(self, exe):
-        """Hook to run efore iterating through an executable."""
-        pass
-
-    def post_executable_hook(self):
-        """Hook to run after iterating through an executable"""
-        if self.failures != []:
-            pass
-            # todo: enable this with a flag
-            # log(f"Failure Summary: ({len(self.failures)} tests)") 
-            # for result in self.failures:
-            #     result.log()
-        self.failures = []
-    
-    def post_run_hook(self):
-        pass
-
-    def pre_run_hook(self):
-        pass
-
-    def iterate(self):
-        """
-        Basic structure to record which tests pass and fail. Additional functionality
-        can be implemented by overriding default hooks.
-        """
-        self.pre_run_hook()
-        for exe in self.config.executables:
-            self.pre_executable_hook(exe.id)
-            log(f"Running executable: {exe.id}", indent=0)
-            exe.source_env()
-            exe_pass_count = 0
-            exe_test_count = 0
-            for toolchain in self.config.toolchains:
-                tc_runner = ToolChainRunner(toolchain, self.cli_args.timeout)
-                log(f"Running Toolchain: {toolchain.name}", indent=1)
-                tc_pass_count = 0
-                tc_test_count = 0
-                for pkg in self.config.packages:
-                    pkg_pass_count = 0
-                    pkg_test_count = 0
-                    log(f"Entering package {pkg.name}", indent=2)
-                    for spkg in pkg.subpackages:
-                        # Glob pattern match against package_filter using subpackage path
-                        if self.config.package_filter:
-                            if not fnmatch.fnmatch(spkg.path.lower(), self.config.package_filter.lower()):
-                                continue
-                        log(f"Entering subpackage {spkg.name}", indent=3)
-                        counters = {"pass_count": 0, "test_count": 0}
-                        self.pre_subpackage_hook(spkg)
-                        for test in spkg.tests:
-                            test_result: TestResult = tc_runner.run(test, exe)
-                            self.process_test_result(test_result, counters)
-                            if self.cli_args.fast_fail and not test_result.did_pass:
-                                self.post_subpackage_hook(counters)
-                                self.post_executable_hook()
-                                self.post_run_hook()
-                                return
-                        self.post_subpackage_hook(counters)
-                        log("Subpackage Passed: ", counters["pass_count"], "/", counters["test_count"], indent=3)
-                        pkg_pass_count += counters["pass_count"]
-                        pkg_test_count += counters["test_count"]
-                    log("Packaged Passed: ", pkg_pass_count, "/", pkg_test_count, indent=2)
-                    tc_pass_count += pkg_pass_count
-                    tc_test_count += pkg_test_count
-                log("Toolchain Passed: ", tc_pass_count, "/", tc_test_count, indent=1)
-                exe_pass_count += tc_pass_count
-                exe_test_count += tc_test_count
-            log("Executable Passed: ", exe_pass_count, "/", exe_test_count)
-            self.post_executable_hook()
-        self.post_run_hook()
-
-    def run(self):
-        """Default run implementation."""
-        self.iterate()
-        return self.run_passed
-
-class RegularHarness(TestHarness):
-    
-    def process_test_result(self, test_result: TestResult, context: Dict[str, Any]):
-        """
-        Override the hook for regular run-specific implementation of counting passes
-        """
-        if test_result.did_pass:
-            context["pass_count"] += 1
-            test_result.log(args=self.cli_args)
-        else:
-            self.run_passed = False
-            self.failures.append(test_result)
-            test_result.log(args=self.cli_args)
-        context["test_count"] += 1
-
-class TournamentHarness(TestHarness):
-
-    def iterate(self):
-        """
-        Run the tester in grade mode. Run all test packages for each tested executable.
-        Write each toolchain table to the CSV file as it's completed.
-        """
-        attacking_pkgs = sorted(self.config.packages, key=lambda pkg: pkg.name.lower())
-        defending_exes = sorted(self.config.executables, key=lambda exe: exe.id.lower())
-        solution_exe = self.config.solution_exe
-        failure_log = self.cli_args.failure_log
-
-        for toolchain in self.config.toolchains:
-            tc_runner = ToolChainRunner(toolchain, self.cli_args.timeout)
-            tc_table = self.create_tc_dataframe(defending_exes, attacking_pkgs)
-
-            with open(f"toolchain_{toolchain.name}.csv", 'w') as toolchain_csv:
-                print(f"\nToolchain: {toolchain.name}")
-                csv_writer = csv.writer(toolchain_csv)
-                csv_writer.writerow([toolchain.name] + [pkg.name for pkg in attacking_pkgs])
-                toolchain_csv.flush()
-
-                for def_exe in defending_exes:
-                    def_exe.source_env()
-                    def_feedback_file = f"{def_exe.id}-{toolchain.name}feedback.txt"
-                    for a_pkg in attacking_pkgs:
-                        print(f"\n  {a_pkg.name:<12} --> {def_exe.id:<12}", end='') 
-                        pass_count = 0
-                        test_count = 0
-                        for a_spkg in a_pkg.subpackages:
-                            for test in a_spkg.tests:
-                                test_result: Optional[TestResult] = tc_runner.run(test, def_exe)
-                                if test_result and test_result.did_pass:
-                                    print(Fore.GREEN + '.' + Fore.RESET, end='')
-                                    pass_count += 1
-                                    if solution_exe == def_exe.id and failure_log:
-                                        with open("pass_log.txt", 'a') as f_log:
-                                            f_log.write(f"{toolchain.name} {a_pkg.name} {test_result.test.path}\n")
-                                else:
-                                    print(Fore.RED + '.' + Fore.RESET, end='')
-                                    self.log_failure_to_file(def_feedback_file, test_result)
-                                    if solution_exe == def_exe.id and failure_log:
-                                        with open(failure_log, 'a') as f_log:
-                                            f_log.write(f"{toolchain.name} {a_pkg.name} {test_result.test.path}\n")
-                                test_count += 1
-
-                        cell_value = f"{pass_count}/{test_count}"
-                        tc_table[def_exe.id][a_pkg.name] = cell_value
-                    csv_writer.writerow([def_exe.id] + [tc_table[def_exe.id][pkg.name] for pkg in attacking_pkgs])
-                    toolchain_csv.flush()
-
-    @staticmethod
-    def create_tc_dataframe(defenders: List[Executable],
-                            attackers: List[Package]) -> Dict[str, Dict[str, str]]:
-        """
-        Create an empty toolchain table with labels for defenders and attackers 
-        """ 
-        df = {exe.id: {pkg.name: '' for pkg in attackers} for exe in defenders}
-        return df
-
-    @staticmethod
-    def create_timing_dataframe() -> Dict[str, Dict[str, float]]:
-        """
-        TODO: Creating timing DF for Gazprea II (Only applicable for grading)
-        """
-        return {}
-
-    def log_failure_to_file(self, file, result: TestResult):
-        """
-        Give full feedback to a defender for all the tests they failed.
-        """
-        def trim_bytes(data: bytes, max_bytes: int = 10000) -> bytes:
-            trimmed = data[:max_bytes]
-            if len(data) > max_bytes:
-                trimmed += b"\n... (output trimmed to %d bytes)" % max_bytes
-            return trimmed
-        
-        if result.did_pass:
-            return
-
-        with open(file, 'a+') as feedback_file:
-            test_contents = result.test.pretty_print()
-            exp_out = trim_bytes(x) if isinstance(x := result.test.expected_out, bytes) else ""
-            gen_out = trim_bytes(x) if isinstance(x := result.gen_output, bytes) else ""
-            feedback_string = (
-              "="*80+'\n'
-              f"Test: {result.test.file}"
-              f"\nTest Contents:\n{test_contents}\n"
-              f"\nExpected Output: {exp_out}\n"
-              f"Generated Output: {gen_out}\n"
-            )
-
-            feedback_file.write(feedback_string)
-
-class MemoryCheckHarness(TestHarness):
-    
-    def __init__(self, config: Config, cli_args: RunnerArgs):
-        super().__init__(config, cli_args) 
-        self.leak_count = 0
-        self.test_count = 0
-        self.leak_tests: List[TestResult] = []
-    
-    def post_executable_hook(self):
-        """
-        Report failures to stdout.
-        """
-        log(f"Leak Summary: ({len(self.leak_tests)} tests)") 
-        for result in self.leak_tests:
-            log(Fore.YELLOW + "[LEAK] " + Fore.RESET + f"{result.test.file}",
-                indent=4)
-        self.leak_tests = []
-        self.test_count = 0 # reset for each executable
-
-    def process_test_result(self, test_result: TestResult, context: Dict[str, Any]):
-        """
-        Override the hook for regular run-specific implementation of counting passes
-        """
-        # TODO: Refactor an clean up. Not simple enough
-
-        # increment the test count
-        self.test_count += 1
-        context["test_count"] += 1
-
-        # log the test result
-        test_result.log(args=self.cli_args)
-        
-        # track tests which leak
-        if test_result.memory_leak:
-            self.leak_tests.append(test_result)
-     
-        # track passes as usual
-        if test_result.did_pass:
-            context["pass_count"] += 1 
-       
-class PerformanceTestingHarness(TestHarness):
-    
-    def __init__(self, config: Config, cli_args: RunnerArgs):
-        super().__init__(config, cli_args)
-        self.csv_cols = []
-        self.cur_col = []
-        self.testfile_col = ["Test"]
-        self.first_exec = True
-
-    @staticmethod
-    def create_tc_dataframe(defenders: List[Executable],
-                            attackers: List[Package]) -> Dict[str, Set[str]]:
-        """
-        Create an empty toolchain table with labels for defenders and attackers 
-        """ 
-        df = {exe.id: {pkg.name for pkg in attackers} for exe in defenders}
-        return df
-    
-    def process_test_result(self, test_result: TestResult, context: Dict[str, Any]):
-        """
-        Override the hook for regular run-specific implementation of counting passes
-        """
-        # only construct a column for the test file names once 
-        if self.first_exec:
-            self.testfile_col.append(test_result.test.file)
-        
-        if test_result.did_pass:
-            context["pass_count"] += 1
-            test_result.log(args=self.cli_args)
-            self.cur_col.append(test_result.time)
-            
-        else:
-            self.cur_col.append(self.cli_args.timeout)
-            self.failures.append(test_result)
-            test_result.log(args=self.cli_args)
-        context["test_count"] += 1
-    
-    def pre_executable_hook(self, exe):
-        self.cur_col.append(exe)
-
-    def post_executable_hook(self): 
-        if self.first_exec:
-            self.csv_cols.append(self.testfile_col)
-            self.first_exec = False
-        
-        self.csv_cols.append(self.cur_col)
-        self.cur_col = []
-    
-    def post_run_hook(self):  
-        # transpose the columns into rows for writing
-        csv_rows = zip_longest(*self.csv_cols, fillvalue='')
-        
-        with open('perf.csv', 'w', newline='') as file:
-            writer = csv.writer(file)
-            writer.writerows(csv_rows)
-
diff --git a/dragon_runner/src/log.py b/dragon_runner/src/log.py
deleted file mode 100644
index 60db1c8..0000000
--- a/dragon_runner/src/log.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import os
-
-class Logger:
-    def __init__(self):
-        self.debug_level = self._get_debug_level()
-
-    def _get_debug_level(self):
-        return int(os.environ.get('DRAGON_RUNNER_DEBUG', '0')) 
-
-    def log(self, level, indent, *args, **kwargs):
-        prefix = ' '*indent
-        if self.debug_level >= level:
-            print(prefix, *args, **kwargs) 
-
-_logger_instance = None
-
-def get_logger():
-    """
-    get singleton logger for the entire program
-    """
-    global _logger_instance
-    if _logger_instance is None:
-        _logger_instance = Logger()
-    return _logger_instance
-
-def log_multiline(content: str, level=0, indent=0, **kwargs):
-    """
-    Log multiline content with proper indentation
-    """
-    for line in str(content).splitlines():
-        log(line.rstrip(), level=level, indent=indent, **kwargs)
-
-def log(*args, level=0, indent=0, **kwargs):
-    get_logger().log(level, indent, *args, **kwargs)
-
-def log_delimiter(title: str, level=0, indent=0):
-    delimiter = '-' * 20
-    log(delimiter + ' ' + title + ' ' + delimiter, level=level, indent=indent)
diff --git a/dragon_runner/src/main.py b/dragon_runner/src/main.py
deleted file mode 100644
index 89ff925..0000000
--- a/dragon_runner/src/main.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from colorama                       import init, Fore
-from dragon_runner.src.cli          import Mode, parse_cli_args, ServerArgs, ScriptArgs
-from dragon_runner.src.config       import load_config
-from dragon_runner.src.log          import log, log_multiline
-from dragon_runner.scripts.loader   import Loader
-from dragon_runner.src.server       import serve
-from dragon_runner.src.harness      import * 
-
-# initialize terminal colors
-init(autoreset=True)
-
-def main(): 
-    # parse and verify the CLI arguments
-    cli_args = parse_cli_args()
-    log(cli_args, level=1)
-    
-    # run the server for running configs through HTTP
-    if isinstance(cli_args, ServerArgs):
-        serve(cli_args)
-        return 0
-
-    # dragon-runner can also be used as a loader for grading & other scripts
-    if isinstance(cli_args, ScriptArgs):
-        loader = Loader()
-        return loader(cli_args.args)
-
-    # parse and verify the config
-    config = load_config(cli_args.config_file, cli_args)
-    if not config:
-        log(f"Could not open config file: {cli_args.config_file}")
-        return 1
-
-    if config.error_collection:
-        log(f"Found Config {len(config.error_collection)} error(s):")
-        log(f"Parsed {cli_args.config_file} below:")
-        log_multiline(str(config), indent=2)
-        log(Fore.RED + str(config.error_collection) + Fore.RESET)
-        return 1
-
-    if cli_args.verify:
-        ccid = input("Enter your CCID/Github Team Name: ")
-        assert config and not config.error_collection
-        found = False
-        for pkg in config.packages:
-            log("Searching.. ", pkg.name, indent=2)
-            if pkg.name == ccid:
-                found = True
-        if not found:
-            print(f"Could not find package named after CCID: {ccid}")
-            return 1
-
-    # display the config info before running tests
-    config.log_test_info()
-
-    if cli_args.mode == Mode.REGULAR:
-        # run in regular mode
-        harness = RegularHarness(config, cli_args)
-
-    elif cli_args.mode == Mode.TOURNAMENT:
-        # run the tester in tournament mode
-        harness = TournamentHarness(config, cli_args)
-
-    elif cli_args.mode == Mode.MEMCHECK:
-        # check tests for memory leaks
-        harness = MemoryCheckHarness(config, cli_args)
-
-    elif cli_args.mode == Mode.PERF:
-        # performance testing
-        harness = PerformanceTestingHarness(config, cli_args) 
-    else:
-        raise RuntimeError(f"Failed to provide valid mode: {cli_args.mode}")
-    
-    success = harness.run()
-    if success:
-        return 0
-    return 1
-
-if __name__ == "__main__":
-    main()
-        
diff --git a/dragon_runner/src/runner.py b/dragon_runner/src/runner.py
deleted file mode 100644
index 5236520..0000000
--- a/dragon_runner/src/runner.py
+++ /dev/null
@@ -1,423 +0,0 @@
-import subprocess
-import os
-import re
-import json
-import time
-import sys
-from subprocess                     import CompletedProcess
-from typing                         import List, Dict, Optional, Union
-from dataclasses                    import dataclass, asdict
-from colorama                       import Fore, init
-from dragon_runner.src.testfile     import TestFile 
-from dragon_runner.src.config       import Executable, ToolChain
-from dragon_runner.src.log          import log, log_multiline
-from dragon_runner.src.toolchain    import Step
-from dragon_runner.src.cli          import CLIArgs, RunnerArgs
-from dragon_runner.src.utils        import make_tmp_file, bytes_to_str,\
-                                       file_to_bytes, truncated_bytes,\
-                                       file_to_str
-# Terminal colors
-init(autoreset=True)
-
-# Reserve a specific status code to use for valgrind
-VALGRIND_EXIT_CODE = 111
-
-@dataclass
-class MagicParams:
-    exe_path: str                       # $EXE
-    input_file: Optional[str] = ""      # $INPUT
-    output_file: Optional[str] = ""     # $OUTPUT 
-    def __repr__(self):
-        return json.dumps(asdict(self), indent=2)
-
-class Command:
-    """
-    Wrapper for a list of arguments to run fork/exec style
-    """
-    def __init__(self, args):
-        self.args: List[str]    = args
-        self.cmd: str           = self.args[0] 
-
-@dataclass
-class CommandResult:
-    cmd:str
-    subprocess: Optional[CompletedProcess]=None
-    exit_status: int=0 
-    time: float=0
-    timed_out: bool=False
-
-    def log(self, level:int=0, indent=0):
-        if self.subprocess:
-            stdout = self.subprocess.stdout
-            stderr = self.subprocess.stderr
-            
-            if stderr is None:
-                stderr = b''
-            if stdout is None:
-                stdout = b''
-
-            log(f"==> {self.cmd} (exit {self.exit_status})", indent=indent, level=level) 
-            log(f"stdout ({len(stdout)} bytes):", truncated_bytes(stdout, max_bytes=512),
-                indent=indent+2, level=level) 
-            log(f"stderr ({len(stderr)} bytes):", truncated_bytes(stderr, max_bytes=512),
-                indent=indent+2, level=level)
-
-class TestResult:
-    """
-    Represents the result of running a test case, including pass/fail status,
-    execution time, and error information.
-    """
-    __test__ = False  # pytest gets confused when classes start with 'Test' 
-    def __init__(self, test:TestFile, did_pass:bool=False): 
-        # required fields 
-        self.test = test
-        self.did_pass: bool = did_pass
-        self.did_timeout: bool = False 
-        self.error_test: bool = False
-        self.memory_leak: bool = False
-        self.command_history: List[CommandResult] = []
-
-        # optional fields
-        self.gen_output: Optional[bytes] = None
-        self.time: Optional[float] = None
-        self.failing_step: Optional[str] = None
-
-    def log(self, file=sys.stdout, args: Union['RunnerArgs', None]=None):
-        """
-        Print a TestResult to the log with various levels of verbosity.
-        This is the main output the user is concerned with.
-        """
-        # TODO: This is very messy. Find some time to clean in up!
-        pass_msg = "[E-PASS] " if self.error_test else "[PASS] "
-        fail_msg = "[E-FAIL] " if self.error_test else "[FAIL] "
-        timeout_msg = "[TIMEOUT] "
-
-        test_name = f"{self.test.file:<50}".strip()    
-        show_time = args and args.time and self.time is not None
-        if self.did_timeout:
-            log(Fore.YELLOW + timeout_msg + Fore.RESET + f"{test_name.strip()}", indent=4, file=file)
-         
-        # Log test result
-        elif self.did_pass:
-            time_display = "" 
-            if show_time:
-                time_str = f"{self.time:.4f}"
-                time_display = f"{time_str:>10} (s)" 
-            log_msg = f"{Fore.GREEN}{pass_msg}{Fore.RESET}{test_name}{time_display}"
-            log(log_msg, indent=4, file=file)
-        else:
-            log(Fore.RED + fail_msg + Fore.RESET + f"{test_name}", indent=4, file=file)
-    
-        # Log testcase
-        if args and args.show_testcase:
-            content = self.test.pretty_print()
-            level = 2 if self.did_pass else 0
-            log_multiline(content, indent=6, level=level)
-
-        # Log the command history
-        level = 3 if self.did_pass else 2
-        log(f"==> Command History", indent=6, level=level)
-        for cmd in self.command_history:
-            cmd.log(level=level, indent=8)
-        
-        # Log test expected and generated
-        expected_out = self.test.get_expected_out()
-        generated_out = x if (x := self.gen_output) else b''
-            
-        log(f"==> Expected Out ({len(expected_out)} bytes):", indent=6, level=level-1)
-        log(str(expected_out), level=level-1, indent=7)
-        log(f"==> Generated Out ({len(generated_out)} bytes):", indent=6, level=level-1)
-        log(str(generated_out), level=level-1, indent=7) 
-        
-    def __repr__(self):
-        return "PASS" if self.did_pass else "FAIL"
-    
-class ToolChainRunner():
-    def __init__(self, tc: ToolChain, timeout: float, env: Dict[str, str]={}):
-        self.tc                     = tc
-        self.timeout                = timeout
-        self.env                    = env
-        self.reserved_exit_codes    = [VALGRIND_EXIT_CODE]
-        self.RUNTIME_ERRORS         = ["SizeError", "IndexError", "MathError", "StrideError"]
-    
-    def handle_error_test(self, tr: TestResult, produced: bytes, expected: bytes):
-        """
-        An error test requires specific handling since a diff between expected and
-        generated does not imply the test will fail. Instead we identify the relevent
-        components of the error message using regular expressions and perform a lenient diff.
-        """
-        try:
-            produced_str = produced.decode('utf-8').strip() if produced else None
-            expected_str = expected.decode('utf-8').strip() if expected else None
-        except UnicodeDecodeError as unicode_error:
-            tr.did_pass = False
-            return
-        
-        # An error test must be UTF-8 decodable.
-        if produced_str is None or expected_str is None:
-            tr.did_pass = False
-            return
-
-        rt_error = next((s for s in self.RUNTIME_ERRORS if s in expected_str), None)  
-        did_raise_rt_error = any(err in produced_str for err in self.RUNTIME_ERRORS)
-        if did_raise_rt_error:
-            # Expected can be either a runtime or compile time format.
-            if rt_error is None:
-                # Raised a runtime error but did not expect one.
-                tr.did_pass = False
-            else:
-                # Raised a runtime error and expected one as well. 
-                pattern = fr"{rt_error}(\s+on\s+Line\s+\d+)?(:.*)?" 
-                tr.did_pass = bool(
-                    re.search(pattern, produced_str) and 
-                    re.search(pattern, expected_str)
-                )
-        else:
-            # Expected must be in compile time format, i.e lines must match.
-            def extract_components(text):
-                error = re.search(r"(\w+Error)", text, re.IGNORECASE)
-                line = re.search(r"on\s+Line\s+(\d+)", text, re.IGNORECASE)
-                return error, line
-        
-            prod_error, prod_line = extract_components(produced_str)
-            exp_error, exp_line = extract_components(expected_str)
-            
-            if prod_error and prod_error.group(1) == "MainError" and \
-               exp_error and exp_error.group(1) == "MainError":
-                # hack in this case because spec doesn't define what line to throw MainError on.
-                tr.did_pass = True
-                return
-
-            if prod_error and exp_error and prod_line and exp_line:
-                
-                
-                tr.did_pass = (prod_line.group(1) == exp_line.group(1))
-            else:
-                tr.did_pass = False
-
-    def run_command(self, command, stdin: bytes) -> CommandResult:
-        """
-        Run a command and return the CommandResult
-        """
-        env = os.environ.copy()
-        start_time = time.time()
-        cr = CommandResult(cmd=command.cmd)
-        try:
-            result = subprocess.run(
-                command.args,
-                env=env,
-                input=stdin,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                check=False,
-                timeout=self.timeout
-            )
-            wall_time = time.time() - start_time
-            cr.subprocess = result
-            cr.exit_status = result.returncode 
-            cr.time = wall_time
-        except subprocess.TimeoutExpired:
-            cr.time = self.timeout
-            cr.timed_out = True
-            cr.exit_status = 255
-        except Exception:
-            cr.exit_status = 1
-        return cr
-        
-    def resolve_output_file(self, step: Step) -> Optional[str]:
-        """
-        make absolute path from output file in step
-        """
-        current_dir = os.getcwd()
-        output_file = os.path.join(current_dir, step.output) if step.output else None
-        return output_file
-    
-    def resolve_command(self, step: Step, params: MagicParams) -> Command:
-        """
-        replace magic parameters with real arguments
-        """
-        command = Command(args=[step.exe_path] + step.arguments)
-        command = self.replace_magic_args(command, params)
-        command = self.replace_env_vars(command)
-        exe = command.args[0]
-        if not os.path.isabs(exe):
-            command.args[0] = os.path.abspath(exe)
-        return command
-    
-    def run(self, test: TestFile, exe: Executable) -> TestResult: 
-        """
-        run each step of the toolchain for a given test and executable
-        """
-        input_file = test.path
-        expected = test.expected_out if isinstance(test.expected_out, bytes) else b'' 
-        tr = TestResult(test=test, did_pass=False)
-        
-        for index, step in enumerate(self.tc):
-            
-            # set up input and output
-            last_step = (index == len(self.tc) - 1) 
-            input_stream = test.get_input_stream() if step.uses_ins else b''
-            output_file = self.resolve_output_file(step)
-            
-            # resolve magic parameters for currents step
-            magic_params = MagicParams(exe.exe_path, input_file, output_file)
-            command = self.resolve_command(step, magic_params)
-            command_result  = self.run_command(command, input_stream) 
-            
-            # save command history for logging
-            tr.command_history.append(command_result)
- 
-            # Check if the command timed out
-            if command_result.timed_out:
-                """
-                A step timed out based on the max timeout specified by CLI arg.
-                """
-                tr.did_pass=False;
-                tr.did_timeout=True
-                tr.failing_step=step.name;
-                tr.time = self.timeout
-                return tr
-            
-            child_process = command_result.subprocess
-            if not child_process:
-                """
-                OS failed to exec the command.
-                """
-                tr.did_pass = False;
-                return tr
-            
-            step_stdout = bytes(child_process.stdout) or b''
-            step_stderr = bytes(child_process.stderr) or b''
-            step_time = round(command_result.time, 4) 
-            
-            if child_process.returncode in self.reserved_exit_codes:
-                """
-                Special case for reserved exit codes
-                1) Valgrind
-                """
-                if child_process.returncode == VALGRIND_EXIT_CODE:
-                    tr.memory_leak = True 
-            
-            if child_process.returncode != 0 and \
-               child_process.returncode not in self.reserved_exit_codes:
-                """
-                A step in the toolchain has returned a non-zero exit status. If "allowError"
-                is specified in the config, we can perform a lenient diff based on CompileTime
-                or RuntimeError message rules. Otherwise, we abort the toolchain.
-                """
-                tr.gen_output=step_stderr
-                tr.failing_step=step.name
-                tr.error_test=True
-
-                # fail by default if errors are not explicitly allowed in config
-                if step.allow_error:
-                    self.handle_error_test(tr, step_stderr, expected)
-                    return tr
-                else: 
-                    tr.did_pass = False
-                    return tr
-
-            elif last_step:
-                """
-                The last step terminated gracefully at this point. We write to the output file and
-                make a precise diff to determine if the test has passed.
-                """
-                if output_file and not os.path.exists(output_file):
-                    raise RuntimeError(f"Command did not create specified output file {output_file}")
-                
-                if output_file is not None:
-                    step_stdout = file_to_bytes(output_file) or b''
-                  
-                tr.time=step_time
-                tr.gen_output=step_stdout
-
-                # Diff the produced and expected outputs
-                diff = precise_diff(step_stdout, expected)
-                if not diff:
-                    tr.did_pass = True
-                else:
-                    tr.did_pass = False
-
-                return tr 
-            else:
-                """
-                Set up the next steps input file which is the $OUTPUT of the previous step.
-                If $OUTPUT is not supplied, we create a temporary pipe.
-                """
-                input_file = output_file or make_tmp_file(child_process.stdout)
-        
-        # this code should be unreachable for well-defined toolchains 
-        raise RuntimeError("Toolchain reached undefined conditions during execution.")
-
-    @staticmethod
-    def replace_env_vars(cmd: Command) -> Command:
-        """
-        Expand environment variables with the values from current shell
-        """
-        resolved = []
-        for arg in cmd.args:
-            matches = re.findall(r'\$(\w+)|\$\{(\w+)\}', arg)
-            if matches:
-                for match in matches:
-                    var_name = match[0] or match[1]
-                    env_value = os.environ.get(var_name)
-                    if env_value is not None: 
-                        arg = arg.replace(f"${var_name}", env_value)\
-                                .replace(f"${{{var_name}}}", env_value) 
-                resolved.append(arg)
-            else:
-                resolved.append(arg)
-        cmd.args = resolved 
-        return cmd
-
-    @staticmethod
-    def replace_magic_args(command: Command, params: MagicParams) -> Command: 
-        """
-        Magic args are inherited from previous steps
-        """
-        resolved = []
-        for arg in command.args:
-            if '$EXE' in arg:
-                resolved.append(arg.replace('$EXE', params.exe_path))
-            elif '$INPUT' in arg and params.input_file:
-                resolved.append(arg.replace('$INPUT', params.input_file))
-            elif '$OUTPUT' in arg and params.output_file:
-                resolved.append(arg.replace('$OUTPUT', params.output_file))
-            else:
-                resolved.append(arg)
-        command.args = resolved
-        command.cmd = command.args[0]
-        return command
-
-def diff_bytes(s1: bytes, s2: bytes) -> str:
-    """
-    The difflib library appears to have an infinite recursion bug.
-    It is simple to write our own.
-    """
-    result = []
-    i, j = 0, 0
-    while i < len(s1) and j < len(s2):
-        if s1[i] != s2[j]:
-            result.append(f"-{s1[i]}")
-            result.append(f"+{s2[j]}")
-        else:
-            result.append(f" {s1[i]}")
-        i += 1
-        j += 1
-    while i < len(s1):
-        result.append(f"-{s1[i]}")
-        i += 1
-    while j < len(s2):
-        result.append(f"+{s2[j]}")
-        j += 1 
-    return ''.join(result)
-
-def precise_diff(produced: bytes, expected: bytes) -> str:
-    """
-    Return the difference of two byte strings, otherwise empty string 
-    """
-    # identical strings implies no diff 
-    if produced == expected:
-        return ""
-    return diff_bytes(produced, expected)
-
diff --git a/dragon_runner/src/server.py b/dragon_runner/src/server.py
deleted file mode 100644
index 781c48a..0000000
--- a/dragon_runner/src/server.py
+++ /dev/null
@@ -1,282 +0,0 @@
-import os
-import subprocess
-import shutil
-from typing import                      List, Dict, Any, Optional
-from dragon_runner.src.cli import       ServerArgs
-from dragon_runner.src.runner import    TestResult, ToolChainRunner, Command, CommandResult
-from dragon_runner.src.toolchain import ToolChain
-from dragon_runner.src.config import    load_config, Config, Executable
-from dragon_runner.src.testfile import  TestFile
-from dragon_runner.src.utils import *
-from tempfile import                    NamedTemporaryFile
-from pathlib import                     Path
-from flask import                       Blueprint, Flask, request, jsonify, current_app
-from flask_cors import                  CORS
-
-SERVER_MODE = os.environ.get("DR_SERVER_MODE", "DEBUG").upper()
-IS_PRODUCTION = (SERVER_MODE == "PROD")
-app = Flask(__name__)
-CORS(app)
-
-class SecureToolChainRunner(ToolChainRunner):
-    """
-    ToolChainRunner using firejail sandboxing
-    """ 
-    def __init__(self, tc, timeout: float, env=None, restrict_exes: List[Executable]=[]):
-        super().__init__(tc, timeout, env or {})
-        self.firejail_available = self._check_firejail()
-        self.restrict_exes = restrict_exes
-        
-    def _check_firejail(self) -> bool:
-        """
-        Check if firejail is available on the system.
-        """
-        return shutil.which('firejail') is not None
-
-    def _create_firejail_command(self, original_cmd: List[str]) -> List[str]:
-        """
-        Wrap command with firejail security options.
-        """
-        if not self.firejail_available:
-            return original_cmd
-            
-        firejail_cmd = [
-            'firejail',
-            '--noprofile',
-            '--seccomp',
-            '--noroot',
-            '--net=none',
-            '--noexec=/home',
-            '--private-tmp',
-            '--private-dev',
-            '--read-only=/usr',
-            '--read-only=/bin',
-            '--read-only=/lib',
-            '--read-only=/lib64',
-            '--blacklist=/home',
-            '--blacklist=/root',
-            '--blacklist=/etc',
-            '--rlimit-nproc=2',
-            '--rlimit-fsize=1048576', #1MB
-            f'--timeout=00:00:{int(self.timeout):02d}',
-            '--quiet',
-            '--'
-        ] 
-        return firejail_cmd + original_cmd
-
-    def run_command(self, command: Command, stdin: bytes) -> CommandResult:
-        """
-        Override to wrap commands with firejail
-        """
-        if self.firejail_available:
-            secure_args = self._create_firejail_command(command.args)
-            secure_command = Command(secure_args)
-            return super().run_command(secure_command, stdin)
-        return CommandResult(cmd="", exit_status=1)
-
-class Payload:
-    def __init__(self):
-        self.data = {}
-
-    def to_dict(self):
-        return self.data
-
-class ConfigPayload(Payload):
-    def __init__(self, config: Config):
-        self.data = {
-            "name": config.name,
-            "executables": [e.id for e in config.executables],
-            "toolchains": [t.name for t in config.toolchains]
-        }
-
-class ToolChainPayload(Payload):
-    def __init__(self, tc: ToolChain):
-        self.data = tc.to_dict()
-
-class TestPayload(Payload):
-    def __init__(self, test: TestFile):
-        self.data = test.to_dict()
-        self.data.update({"content": utf8_file_to_base64(test.path)})    
-
-class ConfigAPI:
-    def __init__(self, config: Config):
-        self.config = config
-        self.config_path = config.config_path
-        self.name = Path(config.config_path).stem
-        self.tests: Dict = self.unpack_tests()        
-        
-        # Create blueprint for this config
-        self.bp = Blueprint(f"config_{self.name}", __name__)
-        self._register_routes()
-    
-    def unpack_tests(self) -> Dict:
-        tests = {} 
-        for pkg in self.config.packages:
-            for spkg in pkg.subpackages:
-                for test in spkg.tests:
-                    tests[test.file] = test
-        return tests
-
-    def _register_routes(self):
-        self.bp.route(f"/config/{self.name}", methods=["GET"])(self.get_config)
-        self.bp.route(f"/config/{self.name}/toolchains", methods=["GET"])(self.get_toolchains)
-        self.bp.route(f"/config/{self.name}/tests", methods=["GET"])(self.get_tests)
-        self.bp.route(f"/config/{self.name}/run", methods=["POST"])(self.run_test)
-   
-    def get_config(self):    
-        return jsonify(ConfigPayload(self.config).to_dict())
-   
-    def get_toolchains(self): 
-        return jsonify([ToolChainPayload(t).to_dict() for t in self.config.toolchains]) 
-    
-    def get_tests(self):
-        return jsonify([TestPayload(t).to_dict() for t in self.tests.values()])
-    
-    def run_test(self):
-        data = request.get_json(silent=True) or {}
-        toolchain_name: str = data.get('toolchain_name', "")
-        exe_name: str = data.get('exe_name', "")
-        test_stdin: Optional[bytes] = b64_to_bytes(data.get('stdin', ""))
-        test_contents: Optional[str] = b64_to_str(data.get('test_contents', ""))
-    
-        if test_stdin is None or test_contents is None:
-            app.logger.error(f"Test received stdin: {test_stdin} and contents {test_contents}")
-            return jsonify({
-                "status": "error",
-                "message": "Failed to decode stdin and/or test contents in request."
-            }), 500
-
-        try: 
-            # Find toolchain and executable
-            exe = next((e for e in self.config.executables if e.id == exe_name), 
-                      self.config.executables[0])
-            tc = next((x for x in self.config.toolchains if x.name == toolchain_name), 
-                     self.config.toolchains[0])
-            
-            if IS_PRODUCTION:
-                tc_runner = SecureToolChainRunner(tc, timeout=5, restrict_exes=self.config.executables)
-            else:
-                tc_runner = ToolChainRunner(tc, timeout=5)
-
-            # Create temporary file for runtime supplied test
-            with NamedTemporaryFile(mode='w+', delete=True, suffix='.test') as temp:
-                temp.write(test_contents)
-                temp.flush()
-                temp.seek(0) 
-                test = TestFile(temp.name)
-                test.set_input_stream(test_stdin)
-
-                # Run test in secure environment
-                app.logger.info(f"Running secure test: {test.stem} with toolchain: {toolchain_name}")
-                tr: TestResult = tc_runner.run(test, exe)
-                
-                cmd = tr.command_history[-1] if tr.command_history else None
-                
-                if cmd and cmd.subprocess:
-                    stdout = bytes_to_b64(cmd.subprocess.stdout)
-                    stderr = bytes_to_b64(cmd.subprocess.stderr)
-                    exit_status = cmd.exit_status
-                else:
-                    stdout = ""
-                    stderr = "Toolchain execution failed"
-                    exit_status = -1
-                
-                return jsonify({
-                    "config": self.name,
-                    "test": test.stem,
-                    "results": {
-                        "passed": tr.did_pass,
-                        "exit_status": exit_status,
-                        "stdout": stdout,
-                        "stderr": stderr,
-                        "time": str(tr.time),
-                        "expected_output": str(test.expected_out),
-                    }
-                })
-                
-        except subprocess.TimeoutExpired:
-            app.logger.error("Test execution timed out")
-            return jsonify({
-                "status": "error",
-                "message": "Test execution timed out"
-            }), 408
-        except Exception as e:
-            app.logger.error(f"Error running test: {str(e)}")
-            return jsonify({
-                "status": "error",
-                "message": str(e)
-            }), 500
-
-@app.route("/")
-def root():
-    """Base route that lists all available routes"""
-    return jsonify({
-        "service": "Dragon Runner API",
-        "status": "running",
-        "mode": "production" if IS_PRODUCTION else "debug",
-        "available_endpoints": [route['url'] for route in get_available_routes()]
-    })
-    
-def get_available_routes() -> List[Dict[str, Any]]:
-    """Helper function to list all available routes"""
-    routes = []
-    for rule in current_app.url_map.iter_rules():
-        if rule.endpoint != 'static' and rule.methods: 
-            routes.append({
-                "url": str(rule),
-                "methods": list(rule.methods - {"OPTIONS", "HEAD"})
-            })
-    return routes
-
-def get_configs_to_serve(config_dir: Path) -> List[Config]:
-    """Get all config files from a directory and its subdirectories"""
-    configs: List[Config] = []
-    
-    def fill_config(path: Path):
-        if path.is_file():
-            config = load_config(str(path))
-            if config is not None:
-                configs.append(config)
-            return
-        
-        for entry in path.iterdir():
-            if entry.is_dir() or entry.is_file():
-                fill_config(entry)
-    
-    fill_config(config_dir)
-    return configs
-
-def create_app(args: ServerArgs):
-    """Create App for WSGI deployment"""
-    configs = get_configs_to_serve(args.serve_path)  
- 
-    def root_route():
-        return jsonify([ConfigPayload(c).to_dict() for c in configs])   
-    
-    bp = Blueprint(f"configs", __name__)
-    bp.route("/configs", methods=["GET"])(root_route)
-    app.register_blueprint(bp)
-
-    # Create APIs for each config and register their blueprints
-    for config in configs:
-        api = ConfigAPI(config)
-        app.register_blueprint(api.bp)
-    
-    # Log security status
-    firejail_status = "ENABLED" if shutil.which('firejail') else "DISABLED"
-    app.logger.info(f"Security sandbox: {firejail_status}")
-    
-    return app
-
-def serve(args: ServerArgs):
-    create_app(args)
-    
-    if IS_PRODUCTION:
-        from wsgiref.simple_server import make_server
-        server = make_server('0.0.0.0', args.port, app)
-        print(f"Production server running on http://0.0.0.0:{args.port}")
-        server.serve_forever()
-    else:
-        print(f"Dev mode - Flask dev server on http://0.0.0.0:{args.port}")
-        app.run(debug=True, host="0.0.0.0", port=args.port)
-
diff --git a/dragon_runner/src/testfile.py b/dragon_runner/src/testfile.py
deleted file mode 100644
index 4ccb920..0000000
--- a/dragon_runner/src/testfile.py
+++ /dev/null
@@ -1,204 +0,0 @@
-import os
-from io                         import BytesIO
-from typing                     import Dict, Optional, Union
-from dragon_runner.src.utils    import file_to_str, str_to_bytes, file_to_bytes
-from dragon_runner.src.errors   import Verifiable, ErrorCollection, TestFileError
-
-class TestFile(Verifiable):
-    __test__ = False 
-    def __init__(self, test_path: str, input_dir="input", input_stream_dir="input-stream",
-                                  output_dir="output", comment_syntax="//"):   
-        self.path = test_path
-        self.stem, self.extension = os.path.splitext(os.path.basename(test_path))
-        self.file:str = self.stem + self.extension  
-        self.input_dir = input_dir
-        self.input_stream_dir = input_stream_dir          
-        self.output_dir = output_dir                
-        self.comment_syntax = comment_syntax # default C99 //
-        self.expected_out: Union[bytes, TestFileError] = self.get_content("CHECK:", "CHECK_FILE:")
-        self.input_stream: Union[bytes, TestFileError] = self.get_content("INPUT:", "INPUT_FILE:")
-    
-    @classmethod
-    def from_test_contents(cls, content: bytes, test_name: str):
-
-        instance = cls.__new__(cls)
-         
-        return instance
-    
-    def set_input_stream(self, input_stream: bytes):
-        """
-        Manually set the input stream.
-        """
-        self.input_stream = input_stream
-
-    def get_input_stream(self) -> bytes:
-        """
-        Get the input-stream supplied for the test. Assumes this testfile instance
-        has had self.verify() called beforehand.
-        """
-        if isinstance(self.input_stream, bytes):
-            return self.input_stream
-        return b''
-
-    def get_expected_out(self) -> bytes:
-        """
-        Get the expected output for the test. Assumes this testfile instance
-        has had self.verify() called beforehand.
-        """
-        if isinstance(self.expected_out, bytes):
-            return self.expected_out
-        return b''
-
-    def verify(self) -> ErrorCollection:
-        """
-        Ensure the paths supplied in CHECK_FILE and INPUT_FILE exist
-        """
-        collection = ErrorCollection()
-        # If a parse and read of a tests input or output fails, propagate here 
-        if isinstance(self.expected_out, TestFileError):
-            collection.add(self.expected_out)
-        if isinstance(self.input_stream, TestFileError):
-            collection.add(self.input_stream) 
-        return collection
-
-    def get_content(self, inline_directive: str, file_directive: str) -> Union[bytes, TestFileError]:
-        """
-        Generic method to get content based on directives
-        """
-        inline_contents = self._get_directive_contents(inline_directive)
-        file_contents = self._get_directive_contents(file_directive)
-        
-        if inline_contents and file_contents:
-            return TestFileError(f"Directive Conflict for test {self.file}: Supplied both\
-                                 {inline_directive} and {file_directive}")
-        
-        elif inline_contents:
-            return inline_contents
-
-        elif file_contents: 
-            if isinstance(file_contents, TestFileError):
-                return file_contents
-
-            file_str = file_contents.decode()
- 
-            full_path = os.path.join(os.path.dirname(self.path), file_str.strip())
-            if not os.path.exists(full_path):
-                return TestFileError(f"Failed to locate path supplied to {file_directive}\n\tTest:{self.path}\n\tPath:{full_path}\n")
-            
-            file_bytes = file_to_bytes(full_path)
-            if file_bytes is None:
-                return TestFileError(f"Failed to convert file {full_path} to bytes")
-            
-            return file_bytes 
-        else:
-            return b''
-    
-    def _get_file_bytes(self, file_path: str) -> Optional[bytes]:
-        """
-        Get file contents in bytes
-        """
-        try:
-            with open(file_path, "rb") as f:
-                file_bytes = f.read()
-                assert isinstance(file_bytes, bytes), "expected bytes"
-                return file_bytes 
-        except FileNotFoundError:
-            return None
-
-    def _get_directive_contents(self, directive_prefix: str) -> Optional[Union[bytes, TestFileError]]:
-        """
-        Look into the testfile itself for contents defined in directives.
-        Directives can appear anywhere in a line, as long as they're preceded by a comment syntax.
-        """
-        contents = BytesIO()
-        first_match = True
-        try:
-            with open(self.path, 'r') as test_file:
-                for line in test_file:
-                    comment_index = line.find(self.comment_syntax)
-                    directive_index = line.find(directive_prefix)
-                    if comment_index == -1 or directive_index == -1 or\
-                       comment_index > directive_index:
-                        continue
-                    
-                    rhs_line = line.split(directive_prefix, 1)[1]
-                    rhs_bytes = str_to_bytes(rhs_line, chop_newline=True)
-                    if rhs_bytes is None:
-                        return None
-                    if not first_match:
-                        contents.write(b'\n')
-
-                    contents.write(rhs_bytes)                
-                    first_match = False
-            contents.seek(0)
-            return contents.getvalue() if contents else None
-        except UnicodeDecodeError as e:
-            return TestFileError(e.reason)
-        except Exception as e:
-            return TestFileError(f"Unkown error occured while parsing testfile: {self.path}")
-
-    def __repr__(self):
-        max_test_name_length = 30
-        test_name = os.path.basename(self.path)
-        if len(test_name) > max_test_name_length:
-            test_name = test_name[:max_test_name_length - 3] + "..."
-        
-        expected_out = b''
-        if isinstance(self.expected_out, bytes):
-            expected_out = self.expected_out
-
-        input_stream = b''
-        if isinstance(self.input_stream, bytes):
-            input_stream = self.input_stream
-
-        return (f"{test_name:<{max_test_name_length}}"
-                f"{len(expected_out):>4}\t"
-                f"{len(input_stream):>4}")
-    
-    def to_dict(self) -> Dict:  
-        out = str(self.expected_out)
-        ins = str(self.input_stream)
-        return {
-            "name": self.stem,
-            "path": self.path,
-            "expected_output": out,
-            "input_stream": ins 
-        }
-
-    def pretty_print(self) -> str:
-        """
-        Generate a pretty-formatted string representation of the test file contents
-        with borders around it.
-        """
-        file_content = file_to_str(self.path)
-        if not file_content: 
-            return f"Error reading file {self.path}:"
-        
-        # query size of border to draw for user
-        try:
-            term_width = os.get_terminal_size().columns if hasattr(os, 'get_terminal_size') else 80
-        except OSError:
-            term_width = 80
-        content_width = min(term_width - 10, 100) 
-        
-        # ascii border characters
-        top_border = '┌' + '─' * (content_width - 2) + '┐'
-        bottom_border = '└' + '─' * (content_width - 2) + '┘'
-        
-        # apply border format to each line in the file
-        formatted_lines = []
-        formatted_lines.append(top_border) 
-        for line in file_content.splitlines():
-            # truncate long lines
-            if len(line) > content_width - 4:
-                display_line = line[:content_width - 7] + '...'
-            else:
-                display_line = line  
-            
-            # format content with border 
-            padded_line = display_line.ljust(content_width - 4)
-            formatted_lines.append(f'│ {padded_line} │') 
-
-        formatted_lines.append(bottom_border) 
-        return '\n'.join(formatted_lines)
-
diff --git a/dragon_runner/src/toolchain.py b/dragon_runner/src/toolchain.py
deleted file mode 100644
index 9ede064..0000000
--- a/dragon_runner/src/toolchain.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import json
-import os
-import subprocess
-from typing import Dict, List, Iterator
-from dragon_runner.src.errors import *
-
-class Step(Verifiable):
-    def __init__(self, **kwargs):
-        self.name           = kwargs.get('stepName', None)
-        self.exe_path       = kwargs.get('executablePath', None)
-        self.arguments      = kwargs.get('arguments', None)
-        self.output         = kwargs.get('output', None)
-        self.allow_error    = kwargs.get('allowError', False)
-        self.uses_ins       = kwargs.get('usesInStr', False)
-        self.uses_runtime   = kwargs.get('usesRuntime', False)
-    
-    def verify(self) -> ErrorCollection:
-        errors = ErrorCollection()
-        if not self.name:
-            errors.add(ConfigError(f"Missing required filed 'stepName' in Step {self.name}"))
-        
-        if not self.exe_path:
-            errors.add(ConfigError(f"Missing required field 'exe_path' in Step: {self.name}"))
-
-        elif not os.path.exists(self.exe_path) and not self.exe_path.startswith('$'):
-            errors.add(ConfigError(f"Cannot find exe_path '{self.exe_path}' in Step: {self.name}"))
-        
-        return errors 
-
-    def to_dict(self) -> Dict:
-        return {
-            'stepName': self.name,
-            'exe_path': self.exe_path,
-            'arguments': self.arguments,
-            'output': self.output,
-            'allowError': self.allow_error,
-            'usesInStr': self.uses_ins,
-            'usesRuntime': self.uses_runtime
-        }
-
-    def __repr__(self):
-        return json.dumps(self.to_dict(), indent=2)
-
-class ToolChain(Verifiable):
-    def __init__(self, name: str, steps: List[Dict]):
-        self.name       = name
-        self.steps      = [Step(**step) for step in steps]
-    
-    def verify(self) -> ErrorCollection:
-        errors = ErrorCollection()
-        for step in self.steps:
-            errors.extend(step.verify().errors)
-        return errors
-
-    def to_dict(self) -> Dict[str, List[Dict]]:
-        return {self.name: [step.to_dict() for step in self.steps]}
-
-    def __repr__(self):
-        return json.dumps(self.to_dict(), indent=2)
-    
-    def __iter__(self) -> Iterator[Step]:
-        return iter(self.steps)
-
-    def __len__(self) -> int:
-        return len(self.steps)
-
-    def __getitem__(self, index: int) -> Step:
-        return self.steps[index]
diff --git a/dragon_runner/src/utils.py b/dragon_runner/src/utils.py
deleted file mode 100644
index 2f7fcee..0000000
--- a/dragon_runner/src/utils.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import os
-import sys
-import tempfile
-import base64
-from typing     import Optional
-from colorama   import init
-
-# Initialize colorama
-init(autoreset=True)
-
-def resolve_relative(relative_dir: str, abs_path: str) -> str:
-    """
-    Resolve relative path into an absolute path wrt to abs_path.
-    """
-    if os.path.isfile(abs_path):
-        abs_path = os.path.dirname(abs_path) 
-    return os.path.join(abs_path, relative_dir)
-
-def make_tmp_file(content: bytes) -> Optional[str]:
-    """
-    Create a file in tmp with the bytes from content.
-    """
-    try: 
-        with tempfile.NamedTemporaryFile(delete=False) as tmp:
-            tmp.write(content)
-            os.chmod(tmp.name, 0o700)
-            return tmp.name
-    except Exception as e:
-        print(f"Failed to make temporary file with error: {e}", file=sys.stderr)
-        return None
-
-def str_to_bytes(string: str, chop_newline: bool=False) -> Optional[bytes]:
-    """
-    Convert a string to bytes. Optionally chop off the newline. Used for
-    directive parsing.
-    """
-    if chop_newline and string.endswith('\n'):
-        string = string[:-1]
-    try:
-        return string.encode('utf-8')
-    except UnicodeEncodeError:
-        return None
-
-def bytes_to_str(data: bytes, encoding: str='utf-8') -> Optional[str]:
-    """
-    Convert bytes into a string.  
-    """
-    assert isinstance(data, bytes), "Supplied bytes that are not of type bytes."
-    try:
-        return data.decode(encoding)
-    except UnicodeDecodeError:
-        return str(data)
-    except:
-        return None
-
-def file_to_bytes(file: str) -> Optional[bytes]:
-    """
-    Read a file in binary mode and return the bytes inside.
-    Return None if an exception is thrown.
-    """
-    try:
-        with open(file, 'rb') as f:
-            return f.read()
-    except Exception as e:
-        print(f"Reading bytes from file failed with: {e}")
-        return None
-
-def utf8_file_to_base64(file_path: str) -> Optional[str]:
-    """Convert file to base64 string"""
-    try:
-        with open(file_path, 'rb') as file:
-            return base64.b64encode(file.read()).decode('utf-8')
-    except: 
-        return None
-
-def b64_to_bytes(b64_string: str) -> Optional[bytes]:
-   """
-   Convert base64 string to bytes.
-   """
-   try:
-       return base64.b64decode(b64_string)
-   except Exception as e:
-       print(f"Base64 decoding failed with: {e}", file=sys.stderr)
-       return None
-
-def b64_to_str(b64_string: str) -> Optional[str]:
-   """
-   Convert base64 string to string.
-   """
-   try:
-       return bytes_to_str(base64.b64decode(b64_string))
-   except Exception as e:
-       print(f"Base64 decoding failed with: {e}", file=sys.stderr)
-       return None
-
-def bytes_to_b64(data: bytes) -> Optional[str]:
-   """
-   Convert bytes to base64 string.
-   """
-   assert isinstance(data, bytes), "Supplied data that is not of type bytes."
-   try:
-       return base64.b64encode(data).decode('utf-8')
-   except Exception as e:
-       print(f"Base64 encoding failed with: {e}", file=sys.stderr)
-       return None
-
-def truncated_bytes(data: bytes, max_bytes: int = 1024) -> bytes:
-    """
-    Return a truncated version of the input bytes, with middle contents omitted if
-    size exceeds max_bytes. 
-    """
-    if len(data) <= max_bytes:
-        return data
-
-    omission_message = b'\n{{ omitted for brevity }}\n'
-    available_bytes = max_bytes - len(omission_message)
-    half = available_bytes // 2 
-    truncated = data[:half] + omission_message + data[-half:]
-    
-    return truncated
-
-def file_to_str(file: str, max_bytes=1024) -> Optional[str]:
-    """
-    return file in string form, with middle contents trucated if
-    size exceeds max_bytes
-    """ 
-    file_bytes = file_to_bytes(file)
-    if file_bytes is None:
-        return ""
-    
-    if len(file_bytes) <= max_bytes:
-        return bytes_to_str(file_bytes)
-    
-    half = (max_bytes - 3) // 2 
-    truncated_bytes = file_bytes[:half] + \
-        b'\n{{ Omitted middle bytes for brevity }}\n' + \
-        file_bytes[-half:]
-    
-    return bytes_to_str(truncated_bytes)
-
-def bytes_to_file(file: str, data: bytes) -> Optional[str]:
-    """
-    Write bytes directly into a file 
-    """
-    assert isinstance(data, bytes), "Supplied bytes that are not of type bytes."
-    try:
-        with open(file, 'wb') as f:
-            f.write(data)
-            return file
-    except Exception as e:
-        print(f"Writting bytes to file failed with: {e}")
-        return None
-    
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index aa95c87..0000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,26 +0,0 @@
-[build-system]
-requires = ["setuptools>=45", "wheel"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "dragon-runner"
-version = "1.0.0"
-description = "An experimental successor to the 415 tester"
-authors = [{name = "Justin Meimar", email = "meimar@ualberta.ca"}]
-license = {file = "LICENSE"}
-readme = "README.md"
-requires-python = ">=3.6"
-classifiers = ["Tester"]
-dependencies = [
-    "colorama==0.4.6",
-    "pytest==8.3.3",
-    "numpy==2.2.4",
-    "Flask==3.1.0",
-    'flask-cors==6.0.0'
-]
-
-[project.scripts]
-dragon-runner = "dragon_runner.src.main:main"
-
-[tool.setuptools.package-data]
-dragon_runner = ["py.typed"]
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index a635c5c..0000000
--- a/pytest.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-[pytest]
-pythonpath = .
diff --git a/dragon_runner/__init__.py b/scripts/__init__.py
similarity index 100%
rename from dragon_runner/__init__.py
rename to scripts/__init__.py
diff --git a/dragon-runner-rs/scripts/add_empty.py b/scripts/add_empty.py
similarity index 100%
rename from dragon-runner-rs/scripts/add_empty.py
rename to scripts/add_empty.py
diff --git a/dragon-runner-rs/scripts/base.py b/scripts/base.py
similarity index 100%
rename from dragon-runner-rs/scripts/base.py
rename to scripts/base.py
diff --git a/dragon-runner-rs/scripts/build.py b/scripts/build.py
similarity index 100%
rename from dragon-runner-rs/scripts/build.py
rename to scripts/build.py
diff --git a/dragon-runner-rs/scripts/checkout.py b/scripts/checkout.py
similarity index 100%
rename from dragon-runner-rs/scripts/checkout.py
rename to scripts/checkout.py
diff --git a/dragon-runner-rs/scripts/clean_build.py b/scripts/clean_build.py
similarity index 100%
rename from dragon-runner-rs/scripts/clean_build.py
rename to scripts/clean_build.py
diff --git a/dragon-runner-rs/scripts/gather.py b/scripts/gather.py
similarity index 100%
rename from dragon-runner-rs/scripts/gather.py
rename to scripts/gather.py
diff --git a/dragon-runner-rs/scripts/gen_config.py b/scripts/gen_config.py
similarity index 100%
rename from dragon-runner-rs/scripts/gen_config.py
rename to scripts/gen_config.py
diff --git a/dragon-runner-rs/scripts/grade.py b/scripts/grade.py
similarity index 100%
rename from dragon-runner-rs/scripts/grade.py
rename to scripts/grade.py
diff --git a/dragon-runner-rs/scripts/grade_perf.py b/scripts/grade_perf.py
similarity index 100%
rename from dragon-runner-rs/scripts/grade_perf.py
rename to scripts/grade_perf.py
diff --git a/dragon-runner-rs/scripts/key.py b/scripts/key.py
similarity index 100%
rename from dragon-runner-rs/scripts/key.py
rename to scripts/key.py
diff --git a/dragon-runner-rs/scripts/loader.py b/scripts/loader.py
similarity index 100%
rename from dragon-runner-rs/scripts/loader.py
rename to scripts/loader.py
diff --git a/dragon-runner-rs/src/cli.rs b/src/cli.rs
similarity index 100%
rename from dragon-runner-rs/src/cli.rs
rename to src/cli.rs
diff --git a/dragon-runner-rs/src/config.rs b/src/config.rs
similarity index 81%
rename from dragon-runner-rs/src/config.rs
rename to src/config.rs
index 3742cdd..db92c5d 100644
--- a/dragon-runner-rs/src/config.rs
+++ b/src/config.rs
@@ -392,3 +392,95 @@ pub fn load_config(config_path: &str, args: Option<&RunnerArgs>) -> Option<Confi
 
     Some(Config::new(config_path, &config_data, debug_package, package_filter))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn configs_dir() -> PathBuf {
+        PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests").join("configs")
+    }
+
+    fn config_path(name: &str) -> String {
+        configs_dir().join(name).to_string_lossy().into_owned()
+    }
+
+    #[test]
+    fn test_valid_config() {
+        let path = config_path("gccPassConfig.json");
+        let config = load_config(&path, None).expect("config should load");
+
+        assert!(
+            Path::new(&config.test_dir).exists(),
+            "test_dir should exist: {}",
+            config.test_dir
+        );
+        assert!(!config.packages.is_empty(), "should have packages");
+
+        for pkg in &config.packages {
+            assert!(!pkg.subpackages.is_empty(), "package {} should have subpackages", pkg.name);
+            for spkg in &pkg.subpackages {
+                assert!(!spkg.tests.is_empty(), "subpackage {} should have tests", spkg.name);
+            }
+        }
+
+        assert!(config.errors.is_empty(), "should have no errors");
+    }
+
+    #[test]
+    fn test_package_filter() {
+        let path = config_path("gccPassConfig.json");
+        let config = load_config(&path, None).expect("config should load");
+
+        let all_subpackages: Vec<&str> = config
+            .packages
+            .iter()
+            .flat_map(|pkg| pkg.subpackages.iter())
+            .map(|spkg| spkg.path.as_str())
+            .collect();
+
+        assert!(!all_subpackages.is_empty(), "should have subpackages");
+
+        let filter_pattern = "*ErrorPass*";
+        let filtered: Vec<&&str> = all_subpackages
+            .iter()
+            .filter(|path| {
+                glob::Pattern::new(&filter_pattern.to_lowercase())
+                    .map(|pat| pat.matches(&path.to_lowercase()))
+                    .unwrap_or(false)
+            })
+            .collect();
+
+        assert!(!filtered.is_empty(), "filter should match some subpackages");
+
+        for path in &filtered {
+            assert!(
+                path.to_lowercase().contains("errorpass"),
+                "filtered path should contain 'errorpass': {}",
+                path
+            );
+        }
+    }
+
+    #[test]
+    fn test_invalid_dir_config() {
+        let path = config_path("invalidDirConfig.json");
+        let config = load_config(&path, None).expect("config should load");
+
+        assert!(!config.errors.is_empty(), "should have errors for invalid dir");
+        assert!(!Path::new(&config.test_dir).exists(), "test_dir should not exist");
+    }
+
+    #[test]
+    fn test_invalid_exe_config() {
+        let path = config_path("invalidExeConfig.json");
+        let config = load_config(&path, None).expect("config should load");
+
+        assert!(!config.errors.is_empty(), "should have errors for invalid exe");
+        assert_eq!(config.executables.len(), 1);
+        assert!(
+            !Path::new(&config.executables[0].exe_path).exists(),
+            "exe_path should not exist"
+        );
+    }
+}
diff --git a/dragon-runner-rs/src/error.rs b/src/error.rs
similarity index 100%
rename from dragon-runner-rs/src/error.rs
rename to src/error.rs
diff --git a/dragon-runner-rs/src/harness.rs b/src/harness.rs
similarity index 92%
rename from dragon-runner-rs/src/harness.rs
rename to src/harness.rs
index 9d91599..a6ce648 100644
--- a/dragon-runner-rs/src/harness.rs
+++ b/src/harness.rs
@@ -247,14 +247,6 @@ impl TournamentHarness {
     }
 }
 
-impl TestHarness for TournamentHarness {
-    fn run_passed(&self) -> bool { self.passed }
-
-    fn process_test_result(&mut self, _result: TestResult, _cli_args: &RunnerArgs, _counters: &mut SubPackageCounters) {
-        // Tournament uses its own tournament_iterate
-    }
-}
-
 // ---------------------------------------------------------------------------
 // MemoryCheckHarness
 // ---------------------------------------------------------------------------
@@ -370,3 +362,44 @@ impl TestHarness for PerformanceTestingHarness {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::path::{Path, PathBuf};
+
+    use crate::cli::{Mode, RunnerArgs};
+    use crate::config::load_config;
+    use super::TournamentHarness;
+
+    fn config_path(name: &str) -> String {
+        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .join("tests").join("configs").join(name)
+            .to_string_lossy().into_owned()
+    }
+
+    #[test]
+    fn test_grader_config() {
+        let path = config_path("ConfigGrade.json");
+        let config = load_config(&path, None).expect("config should load");
+
+        let failure_log = "Failures_rs.txt";
+        let _ = std::fs::remove_file(failure_log);
+
+        let args = RunnerArgs {
+            mode: Mode::Tournament,
+            failure_log: failure_log.to_string(),
+            timeout: 2.0,
+            ..Default::default()
+        };
+
+        let mut harness = TournamentHarness::new();
+        harness.run(&config, &args);
+
+        assert!(
+            Path::new(failure_log).exists(),
+            "failure log should have been created"
+        );
+
+        let _ = std::fs::remove_file(failure_log);
+    }
+}
diff --git a/dragon-runner-rs/src/lib.rs b/src/lib.rs
similarity index 100%
rename from dragon-runner-rs/src/lib.rs
rename to src/lib.rs
diff --git a/dragon-runner-rs/src/log.rs b/src/log.rs
similarity index 100%
rename from dragon-runner-rs/src/log.rs
rename to src/log.rs
diff --git a/dragon-runner-rs/src/main.rs b/src/main.rs
similarity index 100%
rename from dragon-runner-rs/src/main.rs
rename to src/main.rs
diff --git a/dragon-runner-rs/src/runner.rs b/src/runner.rs
similarity index 82%
rename from dragon-runner-rs/src/runner.rs
rename to src/runner.rs
index c57d5c3..8b9e107 100644
--- a/dragon-runner-rs/src/runner.rs
+++ b/src/runner.rs
@@ -448,3 +448,90 @@ pub fn precise_diff(produced: &[u8], expected: &[u8]) -> String {
         diff_bytes(produced, expected)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::path::PathBuf;
+
+    use crate::config::{load_config, Config};
+    use super::ToolChainRunner;
+
+    fn configs_dir() -> PathBuf {
+        PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests").join("configs")
+    }
+
+    fn config_path(name: &str) -> String {
+        configs_dir().join(name).to_string_lossy().into_owned()
+    }
+
+    fn create_config(name: &str) -> Config {
+        let path = config_path(name);
+        load_config(&path, None).expect("config should load")
+    }
+
+    fn run_tests_for_config(config: &Config, expected_result: bool) {
+        for exe in &config.executables {
+            for tc in &config.toolchains {
+                let runner = ToolChainRunner::new(tc.clone(), 10.0)
+                    .with_env(exe.runtime_env());
+                for pkg in &config.packages {
+                    for spkg in &pkg.subpackages {
+                        for test in &spkg.tests {
+                            let result = runner.run(test, exe);
+                            assert_eq!(
+                                result.did_pass, expected_result,
+                                "Test {} expected {} but got {}",
+                                test.file,
+                                if expected_result { "PASS" } else { "FAIL" },
+                                if result.did_pass { "PASS" } else { "FAIL" },
+                            );
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_gcc_pass() {
+        let config = create_config("gccPassConfig.json");
+        assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
+        run_tests_for_config(&config, true);
+    }
+
+    #[test]
+    fn test_gcc_fail() {
+        let config = create_config("gccFailConfig.json");
+        assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
+        run_tests_for_config(&config, false);
+    }
+
+    #[test]
+    fn test_runtime_gcc_toolchain() {
+        let tests_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests");
+        let compile_script = tests_dir.join("scripts/test-scripts/compile_lib.py");
+        let lib_src_dir = tests_dir.join("lib/src");
+        let lib_out_dir = tests_dir.join("lib");
+
+        assert!(compile_script.exists(), "missing compile_lib.py");
+
+        let expected_lib = tests_dir.join("lib/libfib.so");
+        if !expected_lib.exists() {
+            let status = std::process::Command::new("python3")
+                .args([
+                    compile_script.to_str().unwrap(),
+                    lib_src_dir.to_str().unwrap(),
+                    lib_out_dir.to_str().unwrap(),
+                ])
+                .status()
+                .expect("failed to run compile_lib.py");
+            assert!(status.success(), "shared object compilation failed");
+            assert!(expected_lib.exists(), "failed to create shared object");
+        }
+
+        let path = config_path("runtimeConfigLinux.json");
+        let config = load_config(&path, None).expect("config should load");
+        assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
+        run_tests_for_config(&config, true);
+    }
+}
diff --git a/dragon-runner-rs/src/testfile.rs b/src/testfile.rs
similarity index 100%
rename from dragon-runner-rs/src/testfile.rs
rename to src/testfile.rs
diff --git a/dragon-runner-rs/src/toolchain.rs b/src/toolchain.rs
similarity index 100%
rename from dragon-runner-rs/src/toolchain.rs
rename to src/toolchain.rs
diff --git a/dragon-runner-rs/src/util.rs b/src/util.rs
similarity index 100%
rename from dragon-runner-rs/src/util.rs
rename to src/util.rs
diff --git a/tests/conftest.py b/tests/conftest.py
deleted file mode 100644
index b71c82b..0000000
--- a/tests/conftest.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import pytest
-from typing import Optional
-from pathlib import Path
-from dragon_runner.src.cli import CLIArgs, RunnerArgs
-from dragon_runner.src.config import load_config, Config
-
-def get_config_path(config_name: str) -> Path:
-    return Path(__file__).parent / "configs" / config_name
-
-def create_config(config_name: str) -> Optional[Config]:
-    config_path = get_config_path(config_name)
-    return load_config(str(config_path))
-
-def create_cli_args(**kwargs) -> RunnerArgs:
-    return RunnerArgs(
-        config_file     = kwargs.get('config_file', None),
-        output     = kwargs.get('output_file', None),
-        failure_log     = kwargs.get('failure_log', None),
-        debug_package   = kwargs.get('debug_package', None),
-        package_filter  = kwargs.get('package_filter', None),
-        mode            = kwargs.get('mode', None),
-        timeout         = kwargs.get('timeout', 5),
-        time            = kwargs.get('time', None),
-        verbosity       = kwargs.get('verbosity', None),
-        verify          = kwargs.get('verify', None),
-        show_testcase   = kwargs.get('show_testcase', None),
-        fast_fail       = kwargs.get('fast_fail', None),
-    )
-
-@pytest.fixture(scope="session")
-def config_factory():
-    return create_config
-
-@pytest.fixture(scope="session")
-def cli_factory():
-    return create_cli_args
diff --git a/tests/run_tests.py b/tests/run_tests.py
deleted file mode 100644
index c219dc3..0000000
--- a/tests/run_tests.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Quick script 
-#
-#
-
-import os
-import subprocess
-from pathlib import Path
-
-if __name__ == "__main__":
-
-    script_dir = Path(__file__).parent.absolute()
-    for file in os.listdir(script_dir):
-        if "test_" in file:
-            print(file)
-            subprocess.run(f"pytest {os.path.join(script_dir, file)}", shell=True)
-
diff --git a/tests/test_config.py b/tests/test_config.py
deleted file mode 100644
index daeb4b9..0000000
--- a/tests/test_config.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import os
-from dragon_runner.src.cli import RunnerArgs, Mode
-from dragon_runner.src.config import load_config
-import fnmatch
-
-
-def test_valid_config(config_factory):
-    config = config_factory("gccPassConfig.json")
-
-    assert config is not None
-    assert config.test_dir is not None
-    assert config.packages is not None
-    for pkg in config.packages:
-        assert pkg.subpackages is not None
-        for spkg in pkg.subpackages:
-            assert spkg is not None
-            assert len(spkg.tests) > 0
-
-    assert config.error_collection == False
-    assert os.path.exists(config.test_dir)
-
-
-def test_package_filter(config_factory):
-    """Test that subpackage filtering works correctly using glob pattern matching on paths"""
-
-    config_path = os.path.join(
-        os.path.dirname(__file__), "configs", "gccPassConfig.json"
-    )
-
-    # Load config - packages are always loaded, filtering happens at subpackage level
-    config = load_config(
-        config_path, RunnerArgs(mode=Mode.REGULAR, config_file=config_path)
-    )
-
-    # Collect all subpackages across all packages
-    all_subpackages = []
-    for pkg in config.packages:
-        for spkg in pkg.subpackages:
-            all_subpackages.append(spkg.path)
-
-    # Verify we have subpackages to test with
-    assert len(all_subpackages) > 0
-
-    # Test filter pattern "*ErrorPass*" - should match subpackages containing "ErrorPass" in path
-    filter_pattern = "*ErrorPass*"
-    filtered_subpackages = [
-        spkg_path
-        for spkg_path in all_subpackages
-        if fnmatch.fnmatch(spkg_path.lower(), filter_pattern.lower())
-    ]
-
-    # Should have some matches
-    assert len(filtered_subpackages) > 0
-
-    # All filtered subpackages should match the pattern (case insensitive)
-    for spkg_path in filtered_subpackages:
-        assert fnmatch.fnmatch(spkg_path.lower(), filter_pattern.lower())
-        assert "errorpass" in spkg_path.lower()
-
-
-def test_invalid_dir_config(config_factory):
-    config = config_factory("invalidDirConfig.json")
-
-    assert config.error_collection == True
-    assert not os.path.exists(config.test_dir)
-
-
-def test_invalid_exe_config(config_factory):
-
-    config = config_factory("invalidExeConfig.json")
-
-    assert config.error_collection == True
-    assert len(config.executables) == 1
-    assert not os.path.exists(config.executables[0].exe_path)
diff --git a/tests/test_grader.py b/tests/test_grader.py
deleted file mode 100644
index d8e170c..0000000
--- a/tests/test_grader.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import os
-from dragon_runner.src.harness import TournamentHarness
-from dragon_runner.src.config import Config
-from dragon_runner.src.cli import RunnerArgs
-
-def test_grader_config(config_factory, cli_factory):
-
-    config : Config = config_factory("ConfigGrade.json")
-    args : RunnerArgs = cli_factory(**{
-        "mode": "tournament",
-        "failure_log": "Failures.txt",
-        "timeout": 2
-    })
-    
-    harness = TournamentHarness(config=config, cli_args=args) 
-    assert harness is not None
-    
-    harness.run()
-    assert os.path.exists(args.failure_log)
-
diff --git a/tests/test_runner.py b/tests/test_runner.py
deleted file mode 100644
index 3ff7b15..0000000
--- a/tests/test_runner.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from dragon_runner.src.harness import RegularHarness
-from dragon_runner.src.config import Config
-from dragon_runner.src.cli import RunnerArgs
-
-def test_gcc_pass(config_factory, cli_factory):
-
-    config : Config = config_factory("gccPassConfig.json")
-    args : RunnerArgs = cli_factory(**{
-        "mode": "regular",
-        "timeout": 10
-    })
-    
-    harness = RegularHarness(config=config, cli_args=args) 
-    assert harness is not None 
-    success = harness.run()
-    assert success == True
-
-def test_gcc_pass_darwin(config_factory, cli_factory):
-
-    config : Config = config_factory("catConfigDarwin.json")
-    args : RunnerArgs = cli_factory(**{
-        "mode": "regular",
-        "timeout": 10
-    })
-    
-    harness = RegularHarness(config=config, cli_args=args) 
-    assert harness is not None 
-    success = harness.run()
-    assert success == True
-
-def test_gcc_fail(config_factory, cli_factory):
-
-    config : Config = config_factory("gccFailConfig.json")
-    args : RunnerArgs = cli_factory(**{
-        "mode": "regular",
-        "timeout": 5
-    })
-    
-    harness = RegularHarness(config=config, cli_args=args) 
-    assert harness is not None 
-    success = harness.run()
-    assert success == False
-
diff --git a/tests/test_runtime.py b/tests/test_runtime.py
deleted file mode 100644
index fe97ed3..0000000
--- a/tests/test_runtime.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from dragon_runner.src.runner   import ToolChainRunner, TestResult
-from dragon_runner.src.config   import Config
-import sys
-import os
-import subprocess
-
-TEST_DIR            = os.path.dirname(os.path.abspath(__file__))
-COMPILE_LIB_SCRIPT  = f"{TEST_DIR}/scripts/test-scripts/compile_lib.py"
-LIB_SRC_DIR         = os.path.join(TEST_DIR, "lib/src")
-LIB_OUT_DIR         = os.path.join(TEST_DIR, "lib")
-
-def run_tests_for_config(config: Config, expected_result: bool):
-    # TODO: move to conftest.py
-    assert config.packages is not None
-
-    for exe in config.executables:
-        exe.source_env()
-        for tc in config.toolchains:
-            tc_runner = ToolChainRunner(tc, timeout=3.0)
-            for pkg in config.packages:
-                for sp in pkg.subpackages:
-                    for test in sp.tests:
-                        result: TestResult = tc_runner.run(test, exe)
-                        result.log()
-                        assert result.did_pass == expected_result
-
-
-def test_gcc_toolchain_success(config_factory, cli_factory):
-    assert os.path.exists(COMPILE_LIB_SCRIPT), "missing library compiler script" 
-
-    if sys.platform == "darwin":
-        lib = "libfib.dylib"
-        config = config_factory("runtimeConfigDarwin.json")
-    else:
-        lib = "libfib.so"
-        config = config_factory("runtimeConfigLinux.json")
-
-    expected_lib=os.path.join(TEST_DIR, f"lib/{lib}")
-
-    if not os.path.exists(expected_lib):
-        result = subprocess.run([sys.executable,
-                                COMPILE_LIB_SCRIPT,
-                                LIB_SRC_DIR,
-                                LIB_OUT_DIR], check=True)
-        
-        assert result.returncode == 0, "shared object compilation failed"
-        assert os.path.exists(expected_lib), "failed to create shared object"
-
-    # now shared object exists where the config expects it, so we can run
-    os.environ["DRAGON_RUNNER_DEBUG"] = "3"
-    run_tests_for_config(config, expected_result=True)

From 21972e0a85c2f7cdf5c741c94e49042dc48fb22e Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 15:41:46 -0700
Subject: [PATCH 07/45] fix: cache compiled regex

---
 src/runner.rs | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/runner.rs b/src/runner.rs
index 8b9e107..722b769 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -3,12 +3,19 @@ use std::env;
 use std::fs;
 use std::path::Path;
 use std::process;
-use std::sync::Arc;
+use std::sync::{Arc, LazyLock};
 use std::time::{Duration, Instant};
 
 use regex::Regex;
 use wait_timeout::ChildExt;
 
+static ENV_VAR_RE: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"\$(\w+)|\$\{(\w+)\}").unwrap());
+static ERROR_KIND_RE: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"(?i)(\w+Error)").unwrap());
+static ERROR_LINE_RE: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"(?i)on\s+Line\s+(\d+)").unwrap());
+
 use crate::config::Executable;
 use crate::testfile::TestFile;
 use crate::toolchain::{Step, ToolChain};
@@ -324,10 +331,9 @@ impl ToolChainRunner {
     }
 
     fn replace_env_vars(&self, command: &mut Command) {
-        let re = Regex::new(r"\$(\w+)|\$\{(\w+)\}").unwrap();
         for arg in command.args.iter_mut() {
             let original = arg.clone();
-            for caps in re.captures_iter(&original) {
+            for caps in ENV_VAR_RE.captures_iter(&original) {
                 let var_name = caps
                     .get(1)
                     .or_else(|| caps.get(2))
@@ -385,13 +391,10 @@ impl ToolChainRunner {
                 tr.did_pass = false;
             }
         } else {
-            let error_re = Regex::new(r"(?i)(\w+Error)").unwrap();
-            let line_re = Regex::new(r"(?i)on\s+Line\s+(\d+)").unwrap();
-
-            let prod_error = error_re.captures(&produced_str);
-            let exp_error = error_re.captures(&expected_str);
-            let prod_line = line_re.captures(&produced_str);
-            let exp_line = line_re.captures(&expected_str);
+            let prod_error = ERROR_KIND_RE.captures(&produced_str);
+            let exp_error = ERROR_KIND_RE.captures(&expected_str);
+            let prod_line = ERROR_LINE_RE.captures(&produced_str);
+            let exp_line = ERROR_LINE_RE.captures(&expected_str);
 
             // MainError hack
             if let (Some(ref pe), Some(ref ee)) = (&prod_error, &exp_error) {

From 0e89602e551d889fb6f1c43536e516cf74f0dbed Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 15:47:32 -0700
Subject: [PATCH 08/45] refactor: idiomatic rust cleanup for ported code

---
 src/cli.rs      |  4 +---
 src/log.rs      | 27 ++++-----------------------
 src/runner.rs   | 21 +++++++++------------
 src/testfile.rs |  8 ++++----
 src/util.rs     | 13 -------------
 5 files changed, 18 insertions(+), 55 deletions(-)

diff --git a/src/cli.rs b/src/cli.rs
index 2ff23bd..b822ebb 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -201,9 +201,7 @@ pub fn parse_cli_args() -> CliAction {
 
             let args = RunnerArgs::from_flags(mode, flags);
 
-            // Set debug environment variable and refresh the logger
-            std::env::set_var("DRAGON_RUNNER_DEBUG", args.verbosity.to_string());
-            crate::log::refresh_debug_level();
+            crate::log::set_debug_level(args.verbosity);
 
             CliAction::Run(args)
         }
diff --git a/src/log.rs b/src/log.rs
index 3fc663a..74f5872 100644
--- a/src/log.rs
+++ b/src/log.rs
@@ -1,34 +1,15 @@
-use std::env;
 use std::sync::atomic::{AtomicU32, Ordering};
 
-static DEBUG_LEVEL: AtomicU32 = AtomicU32::new(u32::MAX);
+static DEBUG_LEVEL: AtomicU32 = AtomicU32::new(0);
 
-fn debug_level() -> u32 {
-    let cached = DEBUG_LEVEL.load(Ordering::Relaxed);
-    if cached != u32::MAX {
-        return cached;
-    }
-    let level = env::var("DRAGON_RUNNER_DEBUG")
-        .ok()
-        .and_then(|v| v.parse().ok())
-        .unwrap_or(0);
-    DEBUG_LEVEL.store(level, Ordering::Relaxed);
-    level
-}
-
-/// Re-read DRAGON_RUNNER_DEBUG from the environment.
-/// Call after setting the env var (e.g. from CLI parsing).
-pub fn refresh_debug_level() {
-    let level = env::var("DRAGON_RUNNER_DEBUG")
-        .ok()
-        .and_then(|v| v.parse().ok())
-        .unwrap_or(0);
+/// Set the global debug/verbosity level.
+pub fn set_debug_level(level: u32) {
     DEBUG_LEVEL.store(level, Ordering::Relaxed);
 }
 
 /// Log a message at a given verbosity level with indentation.
 pub fn log(level: u32, indent: usize, msg: &str) {
-    if debug_level() >= level {
+    if DEBUG_LEVEL.load(Ordering::Relaxed) >= level {
         println!("{:indent$}{msg}", "", indent = indent);
     }
 }
diff --git a/src/runner.rs b/src/runner.rs
index 722b769..17d8e84 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -19,11 +19,14 @@ static ERROR_LINE_RE: LazyLock<Regex> =
 use crate::config::Executable;
 use crate::testfile::TestFile;
 use crate::toolchain::{Step, ToolChain};
-use crate::util::{file_to_bytes, make_tmp_file};
+use crate::util::make_tmp_file;
 
 /// Reserved exit code for valgrind leak detection.
 pub const VALGRIND_EXIT_CODE: i32 = 111;
 
+const RESERVED_EXIT_CODES: &[i32] = &[VALGRIND_EXIT_CODE];
+const RUNTIME_ERRORS: &[&str] = &["SizeError", "IndexError", "MathError", "StrideError"];
+
 /// Magic parameter values substituted into toolchain step arguments.
 pub struct MagicParams {
     pub exe_path: String,
@@ -102,8 +105,6 @@ pub struct ToolChainRunner {
     pub timeout: f64,
     /// Extra environment variables to inject into spawned subprocesses (e.g. runtime lib paths).
     pub extra_env: HashMap<String, String>,
-    reserved_exit_codes: Vec<i32>,
-    runtime_errors: Vec<&'static str>,
 }
 
 impl ToolChainRunner {
@@ -112,8 +113,6 @@ impl ToolChainRunner {
             tc,
             timeout,
             extra_env: HashMap::new(),
-            reserved_exit_codes: vec![VALGRIND_EXIT_CODE],
-            runtime_errors: vec!["SizeError", "IndexError", "MathError", "StrideError"],
         }
     }
 
@@ -169,14 +168,14 @@ impl ToolChainRunner {
             let step_time = (cr.time * 10000.0).round() / 10000.0;
 
             // Check reserved exit codes (e.g., valgrind)
-            if self.reserved_exit_codes.contains(&cr.exit_status) {
+            if RESERVED_EXIT_CODES.contains(&cr.exit_status) {
                 if cr.exit_status == VALGRIND_EXIT_CODE {
                     tr.memory_leak = true;
                 }
             }
 
             if cr.exit_status != 0
-                && !self.reserved_exit_codes.contains(&cr.exit_status)
+                && !RESERVED_EXIT_CODES.contains(&cr.exit_status)
             {
                 tr.gen_output = Some(stderr.clone());
                 tr.failing_step = Some(step.name.clone());
@@ -198,7 +197,7 @@ impl ToolChainRunner {
                         tr.did_pass = false;
                         return tr;
                     }
-                    file_to_bytes(out_path).unwrap_or_default()
+                    fs::read(out_path).unwrap_or_default()
                 } else {
                     stdout
                 };
@@ -372,13 +371,11 @@ impl ToolChainRunner {
             return;
         }
 
-        let rt_error = self
-            .runtime_errors
+        let rt_error = RUNTIME_ERRORS
             .iter()
             .find(|e| expected_str.contains(**e))
             .copied();
-        let did_raise_rt = self
-            .runtime_errors
+        let did_raise_rt = RUNTIME_ERRORS
             .iter()
             .any(|e| produced_str.contains(e));
 
diff --git a/src/testfile.rs b/src/testfile.rs
index e017c04..67492cd 100644
--- a/src/testfile.rs
+++ b/src/testfile.rs
@@ -3,7 +3,7 @@ use std::io::{self, BufRead};
 use std::path::Path;
 
 use crate::error::{DragonError, Validate};
-use crate::util::{file_to_bytes, str_to_bytes};
+use crate::util::str_to_bytes;
 
 /// Represents a single test case file with parsed directives.
 #[derive(Debug, Clone)]
@@ -101,10 +101,10 @@ impl TestFile {
             ));
         }
 
-        file_to_bytes(&full_path.to_string_lossy())
+        fs::read(&full_path)
             .map(DirectiveResult::Ok)
-            .unwrap_or_else(|| DirectiveResult::Err(format!(
-                "Failed to convert file {} to bytes", full_path.display()
+            .unwrap_or_else(|_| DirectiveResult::Err(format!(
+                "Failed to read file {}", full_path.display()
             )))
     }
 
diff --git a/src/util.rs b/src/util.rs
index 4520279..36ae5ae 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -25,15 +25,6 @@ pub fn str_to_bytes(s: &str, chop_newline: bool) -> Vec<u8> {
     s.as_bytes().to_vec()
 }
 
-/// Read a file as bytes, returning None on error.
-pub fn file_to_bytes(path: &str) -> Option<Vec<u8>> {
-    fs::read(path).ok()
-}
-
-/// Read a file as a UTF-8 string, returning None on error.
-pub fn file_to_str(path: &str) -> Option<String> {
-    fs::read_to_string(path).ok()
-}
 
 /// Create a temporary file with the given content and execute permissions.
 /// Returns the path to the temp file, or None on error.
@@ -65,7 +56,3 @@ pub fn truncated_bytes(data: &[u8], max_bytes: usize) -> Vec<u8> {
     result
 }
 
-/// Convert bytes to string with lossy UTF-8 fallback.
-pub fn bytes_to_str(data: &[u8]) -> String {
-    String::from_utf8_lossy(data).into_owned()
-}

From b78bf8adb880cda6b8a5ae4198649ce19fcabd60 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 15:48:56 -0700
Subject: [PATCH 09/45] refactor: unified csv-backed key system for grading
 scripts

---
 scripts/add_empty.py   | 35 ++++--------------
 scripts/build.py       | 13 ++++++-
 scripts/checkout.py    | 16 +++++++-
 scripts/clean_build.py | 15 +++++++-
 scripts/gather.py      | 55 ++++++++++++----------------
 scripts/gen_config.py  | 50 ++++++++++++++-----------
 scripts/key.py         | 83 +++++++++++++++++++++++++++++++-----------
 tests/test_key.py      | 50 +++++++++++++++++++++++++
 8 files changed, 208 insertions(+), 109 deletions(-)
 create mode 100644 tests/test_key.py

diff --git a/scripts/add_empty.py b/scripts/add_empty.py
index 3590cee..7fa3419 100644
--- a/scripts/add_empty.py
+++ b/scripts/add_empty.py
@@ -12,6 +12,7 @@
 from pathlib import Path
 from typing import List
 from base import Script
+from key import Key
 
 
 class AddEmptyScript(Script):
@@ -30,42 +31,22 @@ def get_parser(cls) -> argparse.ArgumentParser:
             prog="add_empty",
             description="Add empty test cases to test packages"
         )
-        parser.add_argument("key_file", type=Path, help="Key file which has a line for each (SID, GH_Username) pair")
+        parser.add_argument("key_file", type=Path, help="Path to CSV key file")
         parser.add_argument("search_path", type=Path, help="Path to search for test files")
         parser.add_argument("empty_content", type=str, help="Empty content to write into files")
         return parser
 
-    @staticmethod
-    def load_key(key_path):
-        config = {}
-        with open(key_path) as key_file:
-            for line in key_file.readlines():
-                sid, gh_username = line.strip().split(' ')
-                print("SID: ", sid, "\tGH Username: ", gh_username)
-                config[sid] = gh_username
-        print("Config Loaded...")
-        return config
-
-    @staticmethod
-    def count_files_with_exclusions(directory: Path, excluded_extensions: list) -> int:
-        count = 0
-        for path in directory.rglob('*'):
-            if path.is_file():
-                if path.suffix.lower() not in excluded_extensions:
-                    count += 1
-        return count
-
     @staticmethod
     def add_empty(key_file: Path, search_path: Path, empty_content: str):
-        config = AddEmptyScript.load_key(key_file)
+        key = Key(key_file)
 
         if not search_path.is_dir():
-            error = "Could not create test directory."
-            print(error)
+            print("Could not find search directory.")
             return 1
 
         all_fine = True
-        for (sid, gh_user) in config.items():
+        for rec in key.iter_students():
+            sid = rec.sid
             all_matches = list(search_path.rglob(sid))
             if len(all_matches) == 0:
                 print(f"Can not find a directory matching: {sid} in {search_path.name}")
@@ -90,14 +71,14 @@ def add_empty(key_file: Path, search_path: Path, empty_content: str):
 
             all_fine = False
             while test_count < 5:
-                suffix= ''.join(random.choices(string.ascii_letters + string.digits, k=8))
+                suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
                 file_path = sid_test_dir / f"TA_empty_{test_count+1}_{suffix}.in"
                 file_path.write_text(empty_content)
                 test_count += 1
                 print(f"{sid} - Writing an empty file: {file_path.name}...")
 
         if all_fine:
-            print("All students submited at least five testcases!")
+            print("All students submitted at least five testcases!")
 
     @classmethod
     def main(cls, args: List[str]) -> int:
diff --git a/scripts/build.py b/scripts/build.py
index 3471ed1..f02b6fc 100644
--- a/scripts/build.py
+++ b/scripts/build.py
@@ -15,6 +15,7 @@
 from pathlib import Path
 from typing import List
 from base import Script
+from key import Key
 
 
 class BuildScript(Script):
@@ -37,15 +38,22 @@ def get_parser(cls) -> argparse.ArgumentParser:
         parser.add_argument("log_file", type=Path, help="Path to log file")
         parser.add_argument("dir_prefix", type=str, help="Prefix common to all directories to be built")
         parser.add_argument("n", type=int, default=2, help="n_threads")
+        parser.add_argument("--key", type=Path, default=None, help="Path to CSV key file")
+        parser.add_argument("--assignment", type=str, default=None, help="Assignment column name from key file")
         return parser
 
     @classmethod
-    def build(cls, start_dir, log_path, dir_prefix, n_threads="2"):
+    def build(cls, start_dir, log_path, dir_prefix, n_threads="2", key_path=None, assignment=None):
         root_path = Path(start_dir).absolute()
         log_path = Path(log_path).absolute()
 
         directories = [d for d in root_path.iterdir() if d.is_dir() and (dir_prefix in d.name) and d.name != '.']
 
+        if key_path and assignment:
+            key = Key(key_path)
+            valid_repos = set(key.iter_repos(assignment))
+            directories = [d for d in directories if any(repo in d.name for repo in valid_repos)]
+
         print("Directories to build:")
         for d in directories:
             print(" ", d)
@@ -96,7 +104,8 @@ def main(cls, args: List[str]) -> int:
         parser = cls.get_parser()
         parsed_args = parser.parse_args(args)
         parsed_args.log_file.unlink(missing_ok=True)
-        cls.build(parsed_args.start_dir, parsed_args.log_file, parsed_args.dir_prefix, str(parsed_args.n))
+        cls.build(parsed_args.start_dir, parsed_args.log_file, parsed_args.dir_prefix,
+                  str(parsed_args.n), parsed_args.key, parsed_args.assignment)
         return 0
 
 if __name__ == '__main__':
diff --git a/scripts/checkout.py b/scripts/checkout.py
index eedfad7..ca3abd4 100644
--- a/scripts/checkout.py
+++ b/scripts/checkout.py
@@ -5,6 +5,7 @@
 from pathlib import Path
 from typing import List
 from base import Script
+from key import Key
 
 
 class CheckoutScript(Script):
@@ -28,6 +29,8 @@ def get_parser(cls) -> argparse.ArgumentParser:
                           help='Directory of repositories to checkout')
         parser.add_argument('checkout_time',
                           help='Checkout time in format: "YYYY-MM-DD HH:MM:SS"')
+        parser.add_argument("--key", type=Path, default=None, help="Path to CSV key file")
+        parser.add_argument("--assignment", type=str, default=None, help="Assignment column name from key file")
         return parser
 
     @classmethod
@@ -65,11 +68,19 @@ def checkout_commit(cls, repo_path, commit_hash):
         return result.returncode == 0
 
     @classmethod
-    def process_repositories(cls, submissions_dir: Path, checkout_time: str):
+    def process_repositories(cls, submissions_dir: Path, checkout_time: str, key_path=None, assignment=None):
+        valid_repos = None
+        if key_path and assignment:
+            key = Key(key_path)
+            valid_repos = set(key.iter_repos(assignment))
+
         for submission_dir in sorted(submissions_dir.iterdir()):
             if not submission_dir.is_dir():
                 continue
 
+            if valid_repos is not None and not any(repo in submission_dir.name for repo in valid_repos):
+                continue
+
             git_dir = submission_dir / '.git'
             if not git_dir.exists():
                 print(f"\nSkipping {submission_dir.name} - not a git repository")
@@ -114,7 +125,8 @@ def main(cls, args: List[str]) -> int:
         print(f"Using submission dir: {sub}")
         print(f"Checking out to latest commit before: {parsed_args.checkout_time}")
 
-        cls.process_repositories(sub, parsed_args.checkout_time)
+        cls.process_repositories(sub, parsed_args.checkout_time,
+                                  parsed_args.key, parsed_args.assignment)
         return 0
 
 if __name__ == "__main__":
diff --git a/scripts/clean_build.py b/scripts/clean_build.py
index e1b7282..e78364c 100644
--- a/scripts/clean_build.py
+++ b/scripts/clean_build.py
@@ -4,6 +4,7 @@
 import argparse
 from typing import List
 from base import Script
+from key import Key
 
 
 class CleanBuildScript(Script):
@@ -23,14 +24,24 @@ def get_parser(cls) -> argparse.ArgumentParser:
             description="Remove build directories from all submissions"
         )
         parser.add_argument('submission_dir', type=Path, help='Directory of submissions to clean')
+        parser.add_argument("--key", type=Path, default=None, help="Path to CSV key file")
+        parser.add_argument("--assignment", type=str, default=None, help="Assignment column name from key file")
         return parser
 
     @staticmethod
-    def remove_build_dirs(submissions_dir: Path):
+    def remove_build_dirs(submissions_dir: Path, key_path=None, assignment=None):
+        valid_repos = None
+        if key_path and assignment:
+            key = Key(key_path)
+            valid_repos = set(key.iter_repos(assignment))
+
         for submission_dir in sorted(submissions_dir.iterdir()):
             if not submission_dir.is_dir():
                 continue
 
+            if valid_repos is not None and not any(repo in submission_dir.name for repo in valid_repos):
+                continue
+
             build_dir = submission_dir / 'build'
             if not build_dir.exists():
                 continue
@@ -53,7 +64,7 @@ def main(cls, args: List[str]) -> int:
             print("Submission directory does not exist...")
             return 1
 
-        cls.remove_build_dirs(sub)
+        cls.remove_build_dirs(sub, parsed_args.key, parsed_args.assignment)
         return 0
 
 if __name__ == "__main__":
diff --git a/scripts/gather.py b/scripts/gather.py
index 0e0d569..0e0ad46 100644
--- a/scripts/gather.py
+++ b/scripts/gather.py
@@ -12,6 +12,7 @@
 from pathlib import Path
 from typing import List
 from base import Script
+from key import Key
 
 
 class GatherScript(Script):
@@ -30,60 +31,50 @@ def get_parser(cls) -> argparse.ArgumentParser:
             prog="gather",
             description="Gather all the testfiles in student directories"
         )
-        parser.add_argument("key_file", type=Path, help="Key file which has a line for each (SID, GH_Username) pair")
+        parser.add_argument("key_file", type=Path, help="Path to CSV key file")
         parser.add_argument("search_path", type=Path, help="Path to search for test files")
-        parser.add_argument("project_name", type=Path, help="Path to search for test files")
+        parser.add_argument("--assignment", type=str, required=True,
+            help="Assignment column name from key file (e.g. A1)")
         return parser
 
-    @staticmethod
-    def load_key(key_path: Path):
-        config = {}
-        with open(key_path) as key_file:
-            for line in key_file.readlines():
-                sid, gh_username = line.strip().split(' ')
-                print("SID: ", sid, "\tGH Username: ", gh_username)
-                config[sid] = gh_username
-        return config
-
     @staticmethod
     def gather(key_file: Path,
-           search_path: str,
-           project_name: str,
+           search_path: Path,
+           assignment: str,
            output_dir: str = "submitted-testfiles"):
-        is_rt = True
-        config = GatherScript.load_key(key_file)
+
+        key = Key(key_file)
         search_dir = Path(search_path)
-        project_name = str(project_name).strip()
 
         if not search_dir.is_dir():
-            error = "Could not create test directory."
-            print(error)
+            print("Could not find search directory.")
             return 1
 
-        directories = [d for d in search_dir.iterdir() if d.is_dir() and str(project_name) in d.name]
-        for (sid, gh_user) in config.items():
-            print("Finding submission for: ", gh_user)
+        directories = [d for d in search_dir.iterdir() if d.is_dir()]
+        for rec in key.iter_students():
+            repo = rec.repos.get(assignment)
+            if not repo:
+                print(f"No repo for {rec.sid} in assignment {assignment}, skipping")
+                continue
+
+            print(f"Finding submission for: {rec.ccid} (repo: {repo})")
             for d in directories:
-                if gh_user in str(d):
-                    if is_rt:
-                        suffix = '-'.join(gh_user.split('-')[1:])
-                        expected_test_dir = d / "tests" / "testfiles" / suffix
-                    else:
-                        expected_test_dir = d / "tests" / "testfiles" / sid
+                if repo in d.name:
+                    expected_test_dir = d / "tests" / "testfiles" / rec.sid
 
                     if expected_test_dir.is_dir():
-                        print(f"-- Found properly formatted testfiles for {sid}")
-                        shutil.copytree(expected_test_dir, (Path(output_dir) / sid), dirs_exist_ok=True)
+                        print(f"-- Found properly formatted testfiles for {rec.sid}")
+                        shutil.copytree(expected_test_dir, (Path(output_dir) / rec.sid), dirs_exist_ok=True)
                         break
                     else:
-                        print(f"-- Could NOT find testfiles for {sid}")
+                        print(f"-- Could NOT find testfiles for {rec.sid}")
                         exit(1)
 
     @classmethod
     def main(cls, args: List[str]) -> int:
         parser = cls.get_parser()
         parsed_args = parser.parse_args(args)
-        cls.gather(parsed_args.key_file, parsed_args.search_path, parsed_args.project_name)
+        cls.gather(parsed_args.key_file, parsed_args.search_path, parsed_args.assignment)
         return 0
 
 if __name__ == '__main__':
diff --git a/scripts/gen_config.py b/scripts/gen_config.py
index 56f4262..4607b38 100644
--- a/scripts/gen_config.py
+++ b/scripts/gen_config.py
@@ -10,7 +10,6 @@
 import argparse
 from typing import Optional, List
 from pathlib import Path
-from typing import Iterator, Tuple
 from base import Script
 from key import Key
 
@@ -32,20 +31,23 @@ def get_parser(cls) -> argparse.ArgumentParser:
             description="Generate dragon-runner configuration from student submissions"
         )
         parser.add_argument("key_path", type=Path,
-            help="Path to key file containing each team/ccid on a line.")
+            help="Path to CSV key file")
         parser.add_argument("submissions_path", type=Path,
             help="Path to project submissions cloned from github classroom.")
         parser.add_argument("binary", type=str,
-            help="Name of binary to expect in prohjects bin/")
+            help="Name of binary to expect in projects bin/")
+        parser.add_argument("--assignment", type=str, required=True,
+            help="Assignment column name from key file (e.g. A1)")
         parser.add_argument("--runtime", type=str, default=None,
-            help="Name of runtime library to expect in prohjects bin/")
+            help="Name of runtime library to expect in projects bin/")
         return parser
 
     @staticmethod
-    def gen_config(key_path:Path,
-               submission_dir:Path,
-               binary:str,
-               runtime:Optional[str]=None):
+    def gen_config(key_path: Path,
+               submission_dir: Path,
+               binary: str,
+               assignment: str,
+               runtime: Optional[str] = None):
 
         executables_config = {}
         runtimes_config = {}
@@ -55,35 +57,38 @@ def gen_config(key_path:Path,
         assert submission_dir.is_dir(), "must supply directory to submissions."
 
         key = Key(key_path)
-        for (sids, repo_suffix) in key.iter_both():
-            match_dir = [d for d in submission_dir.iterdir() if d.is_dir() and str(repo_suffix) in d.name]
-            if match_dir == []:
-                print(f"Couldn't find: repo with suffix {repo_suffix}")
+        for repo in key.iter_repos(assignment):
+            match_dir = [d for d in submission_dir.iterdir() if d.is_dir() and repo in d.name]
+            if not match_dir:
+                print(f"Couldn't find: repo with name {repo}")
                 exit(1)
 
             match_dir = Path(match_dir[0])
-            expected_package = match_dir / "tests/testfiles" / sids
+            members = key.students_for_repo(assignment, repo)
+            sid_label = ",".join(rec.sid for rec in members)
+
+            expected_package = match_dir / "tests/testfiles" / sid_label
             expected_binary = match_dir / f"bin/{binary}"
             expected_runtime = match_dir / f"bin/{runtime}"
 
             if not expected_package.is_file:
                 print(f"Can not find expected package: {expected_package}")
-                break;
+                break
 
             if not expected_binary.is_file:
                 print(f"Can not find expected binary: {expected_binary}")
-                break;
+                break
 
             if runtime is not None and not expected_runtime.is_file:
-                print(f"Can not find expected binary: {expected_binary}")
-                break;
+                print(f"Can not find expected runtime: {expected_runtime}")
+                break
 
-            executables_config.update({f"{sids}":f"{Path.absolute(expected_binary)}"})
-            runtimes_config.update({f"{sids}":f"{Path.absolute(expected_runtime)}"})
+            executables_config[sid_label] = str(Path.absolute(expected_binary))
+            runtimes_config[sid_label] = str(Path.absolute(expected_runtime))
 
-        config.update({"testedExecutablePaths": executables_config})
+        config["testedExecutablePaths"] = executables_config
         if runtime is not None:
-            config.update({"runtimes": runtimes_config})
+            config["runtimes"] = runtimes_config
 
         print(json.dumps(config, indent=4))
         with open('config.json', 'w') as f:
@@ -93,7 +98,8 @@ def gen_config(key_path:Path,
     def main(cls, args: List[str]) -> int:
         parser = cls.get_parser()
         parsed_args = parser.parse_args(args)
-        cls.gen_config(parsed_args.key_path, parsed_args.submissions_path, parsed_args.binary, parsed_args.runtime)
+        cls.gen_config(parsed_args.key_path, parsed_args.submissions_path,
+                       parsed_args.binary, parsed_args.assignment, parsed_args.runtime)
         return 0
 
 if __name__ == '__main__':
diff --git a/scripts/key.py b/scripts/key.py
index 26536ce..81ee3a1 100644
--- a/scripts/key.py
+++ b/scripts/key.py
@@ -1,33 +1,72 @@
-from typing import Tuple
+import csv
+from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Iterator
+from typing import Dict, Iterator, List, Optional
+
+
+@dataclass
+class StudentRecord:
+    sid: str
+    ccid: str
+    github_id: str
+    repos: Dict[str, str] = field(default_factory=dict)  # assignment -> repo name
+
 
 class Key:
     def __init__(self, key_path: Path):
         self.key_path = key_path
-        self.sid_repo_suffix_map = {}
+        self._records: List[StudentRecord] = []
+        self._by_sid: Dict[str, StudentRecord] = {}
+        self._by_ccid: Dict[str, StudentRecord] = {}
+        self._by_github: Dict[str, StudentRecord] = {}
+        self.assignments: List[str] = []
+
+        with open(key_path, newline='') as f:
+            reader = csv.DictReader(f)
+            headers = reader.fieldnames or []
+            if len(headers) < 3:
+                raise ValueError(f"Key file must have at least SID,CCID,GitHubID columns, got: {headers}")
+
+            self.assignments = headers[3:]
+
+            for row in reader:
+                vals = list(row.values())
+                sid, ccid, github_id = vals[0].strip(), vals[1].strip(), vals[2].strip()
+                repos = {}
+                for i, assignment in enumerate(self.assignments):
+                    val = vals[3 + i].strip() if vals[3 + i] else ""
+                    if val:
+                        repos[assignment] = val
 
-        with open(key_path) as key_file:
-            for line in key_file.readlines():
-                sids, repo_suffix = line.strip().split(' ')
-                sid_list = sids.strip().split(',')
-                for sid in sid_list:
-                    self.sid_repo_suffix_map[sid] = repo_suffix
+                rec = StudentRecord(sid=sid, ccid=ccid, github_id=github_id, repos=repos)
+                self._records.append(rec)
+                self._by_sid[sid] = rec
+                self._by_ccid[ccid] = rec
+                self._by_github[github_id] = rec
 
-    def __str__(self):
-        s = ""
-        for k, v in self.sid_repo_suffix_map.items():
-            s += (f"{k}\t{v}")
-        return s
+    def get(self, identifier: str) -> Optional[StudentRecord]:
+        """Lookup by any of SID, CCID, or GitHubID."""
+        return self._by_sid.get(identifier) or self._by_ccid.get(identifier) or self._by_github.get(identifier)
 
-    def get_repo_for_sid(self, sid):
-        return self.sid_repo_suffix_map[sid]
+    def iter_students(self) -> Iterator[StudentRecord]:
+        return iter(self._records)
 
-    def iter_sids(self) -> Iterator[str]:
-        return iter(self.sid_repo_suffix_map.keys())
+    def iter_repos(self, assignment: str) -> Iterator[str]:
+        """Unique repo names for an assignment."""
+        seen = set()
+        for rec in self._records:
+            repo = rec.repos.get(assignment)
+            if repo and repo not in seen:
+                seen.add(repo)
+                yield repo
 
-    def iter_repos(self) -> Iterator[str]:
-        return iter(set(self.sid_repo_suffix_map.values()))
+    def students_for_repo(self, assignment: str, repo: str) -> List[StudentRecord]:
+        """Team members sharing a repo for an assignment."""
+        return [rec for rec in self._records if rec.repos.get(assignment) == repo]
 
-    def iter_both(self) -> Iterator[Tuple[str, str]]:
-        return iter(self.sid_repo_suffix_map.items())
+    def get_repo(self, identifier: str, assignment: str) -> Optional[str]:
+        """Repo for a student + assignment."""
+        rec = self.get(identifier)
+        if rec is None:
+            return None
+        return rec.repos.get(assignment)
diff --git a/tests/test_key.py b/tests/test_key.py
new file mode 100644
index 0000000..5d49de2
--- /dev/null
+++ b/tests/test_key.py
@@ -0,0 +1,50 @@
+import sys, tempfile, textwrap
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
+from key import Key, StudentRecord
+
+CSV = textwrap.dedent("""\
+    SID,CCID,GitHubID,A1,A2
+    1234567,alice,alice-gh,team-alpha,opt-alpha
+    1234568,bob,bob-gh,team-alpha,opt-beta
+    1234569,carol,carol-gh,team-beta,,
+""")
+
+def make_key(csv_text=CSV):
+    f = tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False)
+    f.write(csv_text)
+    f.close()
+    return Key(Path(f.name))
+
+def test_assignments():
+    assert make_key().assignments == ["A1", "A2"]
+
+def test_lookup_by_sid():
+    assert make_key().get("1234567").ccid == "alice"
+
+def test_lookup_by_ccid():
+    assert make_key().get("bob").sid == "1234568"
+
+def test_lookup_by_github():
+    assert make_key().get("carol-gh").sid == "1234569"
+
+def test_lookup_miss():
+    assert make_key().get("nobody") is None
+
+def test_iter_repos_unique():
+    assert sorted(make_key().iter_repos("A1")) == ["team-alpha", "team-beta"]
+
+def test_iter_repos_skips_empty():
+    assert sorted(make_key().iter_repos("A2")) == ["opt-alpha", "opt-beta"]
+
+def test_students_for_repo_team():
+    members = make_key().students_for_repo("A1", "team-alpha")
+    assert [m.sid for m in members] == ["1234567", "1234568"]
+
+def test_get_repo():
+    k = make_key()
+    assert k.get_repo("carol", "A1") == "team-beta"
+    assert k.get_repo("carol", "A2") is None
+
+def test_iter_students_count():
+    assert len(list(make_key().iter_students())) == 3

From 15a0f060f24c85bdd1740f7be2fc139344dff4f5 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 15:49:13 -0700
Subject: [PATCH 10/45] fix: ci for new testing

---
 .github/workflows/ci.yml | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2a82632..e097346 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,25 +8,24 @@ jobs:
         os: [ubuntu-latest, macos-latest]
     runs-on: ${{ matrix.os }}
     steps:
-      - uses: actions/checkout@v3 
-      
+      - uses: actions/checkout@v4
+
       - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
         with:
           python-version: '3.10'
- 
-      - name: Install dependencies
-        run: |
-          pip install .
 
-      - name: run config tests
-        run: pytest tests/test_config.py
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
 
-      - name: run runner tests
-        run: pytest tests/test_runner.py
+      - name: Cache cargo
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            ~/.cargo/git
+            target
+          key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }}
 
-      - name: run grade tests
-        run: pytest tests/test_grader.py
-      
-      - name: run runtime tests
-        run: pytest tests/test_runtime.py
+      - name: Run tests
+        run: ./tests/run_tests.sh

From 9147ba11cff5ed66d5018df10fcb2717a0cf5c11 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 15:54:24 -0700
Subject: [PATCH 11/45] refactor: more idiomatic rust patterns

---
 src/cli.rs      |  82 ++++++------------------------
 src/runner.rs   | 130 ++++++++++++++++++++++++++++++++++++++++++------
 src/testfile.rs |  50 ++++++-------------
 3 files changed, 145 insertions(+), 117 deletions(-)

diff --git a/src/cli.rs b/src/cli.rs
index b822ebb..6543fd4 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -2,8 +2,9 @@ use std::fmt;
 
 use clap::{Args, Parser, Subcommand};
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
 pub enum Mode {
+    #[default]
     Regular,
     Tournament,
     Perf,
@@ -21,9 +22,13 @@ impl fmt::Display for Mode {
     }
 }
 
-/// Shared flags available in all modes.
-#[derive(Args, Debug, Clone)]
-pub struct CommonFlags {
+/// Shared flags available in all modes (also used as the runtime args type).
+#[derive(Args, Debug, Clone, Default)]
+pub struct RunnerArgs {
+    /// Set by the subcommand, not by clap.
+    #[arg(skip)]
+    pub mode: Mode,
+
     /// Path to the JSON configuration file
     pub config_file: String,
 
@@ -81,22 +86,22 @@ pub enum Commands {
     /// Run in regular mode (default)
     Regular {
         #[command(flatten)]
-        flags: CommonFlags,
+        flags: RunnerArgs,
     },
     /// Run in tournament/grading mode
     Tournament {
         #[command(flatten)]
-        flags: CommonFlags,
+        flags: RunnerArgs,
     },
     /// Run performance tests
     Perf {
         #[command(flatten)]
-        flags: CommonFlags,
+        flags: RunnerArgs,
     },
     /// Run with memory checking (valgrind)
     Memcheck {
         #[command(flatten)]
-        flags: CommonFlags,
+        flags: RunnerArgs,
     },
     /// Run a grading script
     Script {
@@ -106,60 +111,6 @@ pub enum Commands {
     },
 }
 
-#[derive(Debug, Clone)]
-pub struct RunnerArgs {
-    pub mode: Mode,
-    pub config_file: String,
-    pub output: String,
-    pub failure_log: String,
-    pub debug_package: String,
-    pub package_filter: String,
-    pub timeout: f64,
-    pub time: bool,
-    pub verbosity: u32,
-    pub verify: bool,
-    pub show_testcase: bool,
-    pub fast_fail: bool,
-}
-
-impl Default for RunnerArgs {
-    fn default() -> Self {
-        Self {
-            mode: Mode::Regular,
-            config_file: String::new(),
-            output: String::new(),
-            failure_log: String::new(),
-            debug_package: String::new(),
-            package_filter: String::new(),
-            timeout: 2.0,
-            time: false,
-            verbosity: 0,
-            verify: false,
-            show_testcase: false,
-            fast_fail: false,
-        }
-    }
-}
-
-impl RunnerArgs {
-    fn from_flags(mode: Mode, flags: CommonFlags) -> Self {
-        Self {
-            mode,
-            config_file: flags.config_file,
-            output: flags.output,
-            failure_log: flags.failure_log,
-            debug_package: flags.debug_package,
-            package_filter: flags.package_filter,
-            timeout: flags.timeout,
-            time: flags.time,
-            verbosity: flags.verbosity as u32,
-            verify: flags.verify,
-            show_testcase: flags.show_testcase,
-            fast_fail: flags.fast_fail,
-        }
-    }
-}
-
 /// Result of parsing CLI arguments — either a runner mode or a script invocation.
 pub enum CliAction {
     Run(RunnerArgs),
@@ -191,17 +142,16 @@ pub fn parse_cli_args() -> CliAction {
     match cli.command {
         Commands::Script { args } => CliAction::Script(args),
         commands => {
-            let (mode, flags) = match commands {
+            let (mode, mut args) = match commands {
                 Commands::Regular { flags } => (Mode::Regular, flags),
                 Commands::Tournament { flags } => (Mode::Tournament, flags),
                 Commands::Perf { flags } => (Mode::Perf, flags),
                 Commands::Memcheck { flags } => (Mode::Memcheck, flags),
                 Commands::Script { .. } => unreachable!(),
             };
+            args.mode = mode;
 
-            let args = RunnerArgs::from_flags(mode, flags);
-
-            crate::log::set_debug_level(args.verbosity);
+            crate::log::set_debug_level(args.verbosity as u32);
 
             CliAction::Run(args)
         }
diff --git a/src/runner.rs b/src/runner.rs
index 17d8e84..07c99bb 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -35,15 +35,13 @@ pub struct MagicParams {
 }
 
 /// A resolved command ready to execute.
-pub struct Command {
+pub struct ResolvedCommand {
     pub args: Vec<String>,
-    pub cmd: String,
 }
 
-impl Command {
+impl ResolvedCommand {
     pub fn new(args: Vec<String>) -> Self {
-        let cmd = args.first().cloned().unwrap_or_default();
-        Self { args, cmd }
+        Self { args }
     }
 }
 
@@ -99,12 +97,16 @@ impl TestResult {
     }
 }
 
+const VALGRIND_BIN: &str = "valgrind";
+
 /// Runs a toolchain against a test file and executable.
 pub struct ToolChainRunner {
     pub tc: ToolChain,
     pub timeout: f64,
     /// Extra environment variables to inject into spawned subprocesses (e.g. runtime lib paths).
     pub extra_env: HashMap<String, String>,
+    /// When true, automatically wrap the last toolchain step with valgrind.
+    pub memcheck: bool,
 }
 
 impl ToolChainRunner {
@@ -113,6 +115,7 @@ impl ToolChainRunner {
             tc,
             timeout,
             extra_env: HashMap::new(),
+            memcheck: false,
         }
     }
 
@@ -121,6 +124,11 @@ impl ToolChainRunner {
         self
     }
 
+    pub fn with_memcheck(mut self, memcheck: bool) -> Self {
+        self.memcheck = memcheck;
+        self
+    }
+
     /// Run each step of the toolchain for a given test and executable.
     pub fn run(&self, test: &Arc<TestFile>, exe: &Executable) -> TestResult {
         let mut input_file = test.path.clone();
@@ -143,7 +151,36 @@ impl ToolChainRunner {
                 output_file: output_file.clone(),
             };
 
-            let command = self.resolve_command(step, &magic);
+            let mut command = self.resolve_command(step, &magic);
+
+            // In memcheck mode, wrap the last step with valgrind
+            if self.memcheck && last_step {
+                // Check that valgrind is installed
+                let valgrind_check = process::Command::new(VALGRIND_BIN)
+                    .arg("--version")
+                    .stdout(process::Stdio::null())
+                    .stderr(process::Stdio::null())
+                    .status();
+                match valgrind_check {
+                    Ok(s) if s.success() => {
+                        // Prepend valgrind flags before the existing command
+                        let mut wrapped = vec![
+                            VALGRIND_BIN.to_string(),
+                            "--leak-check=full".to_string(),
+                            format!("--error-exitcode={VALGRIND_EXIT_CODE}"),
+                            "--log-file=/dev/null".to_string(),
+                        ];
+                        wrapped.extend(command.args);
+                        command.args = wrapped;
+                    }
+                    _ => {
+                        tr.did_pass = false;
+                        tr.failing_step = Some("memcheck: valgrind not found".to_string());
+                        return tr;
+                    }
+                }
+            }
+
             let cr = self.run_command(&command, &input_stream);
 
             // Check timeout
@@ -220,8 +257,8 @@ impl ToolChainRunner {
         panic!("Toolchain reached undefined conditions during execution.");
     }
 
-    fn run_command(&self, command: &Command, stdin: &[u8]) -> CommandResult {
-        let mut cr = CommandResult::new(&command.cmd);
+    fn run_command(&self, command: &ResolvedCommand, stdin: &[u8]) -> CommandResult {
+        let mut cr = CommandResult::new(&command.args[0]);
         let start = Instant::now();
 
         let mut cmd = process::Command::new(&command.args[0]);
@@ -293,10 +330,10 @@ impl ToolChainRunner {
         })
     }
 
-    fn resolve_command(&self, step: &Step, params: &MagicParams) -> Command {
+    fn resolve_command(&self, step: &Step, params: &MagicParams) -> ResolvedCommand {
         let mut args = vec![step.exe_path.clone()];
         args.extend(step.arguments.iter().cloned());
-        let mut command = Command::new(args);
+        let mut command = ResolvedCommand::new(args);
         self.replace_magic_args(&mut command, params);
         self.replace_env_vars(&mut command);
         // Make exe path absolute if relative
@@ -308,11 +345,10 @@ impl ToolChainRunner {
                 command.args[0] = abs.to_string_lossy().into_owned();
             }
         }
-        command.cmd = command.args[0].clone();
         command
     }
 
-    fn replace_magic_args(&self, command: &mut Command, params: &MagicParams) {
+    fn replace_magic_args(&self, command: &mut ResolvedCommand, params: &MagicParams) {
         for arg in command.args.iter_mut() {
             if arg.contains("$EXE") {
                 *arg = arg.replace("$EXE", &params.exe_path);
@@ -324,12 +360,9 @@ impl ToolChainRunner {
                 }
             }
         }
-        if let Some(first) = command.args.first() {
-            command.cmd = first.clone();
-        }
     }
 
-    fn replace_env_vars(&self, command: &mut Command) {
+    fn replace_env_vars(&self, command: &mut ResolvedCommand) {
         for arg in command.args.iter_mut() {
             let original = arg.clone();
             for caps in ENV_VAR_RE.captures_iter(&original) {
@@ -506,6 +539,71 @@ mod tests {
         run_tests_for_config(&config, false);
     }
 
+    /// Memcheck on clean C programs (gccPassConfig) — no leaks expected.
+    #[test]
+    fn test_memcheck_clean_programs() {
+        let config = create_config("gccPassConfig.json");
+        assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
+        for exe in &config.executables {
+            for tc in &config.toolchains {
+                let runner = ToolChainRunner::new(tc.clone(), 10.0)
+                    .with_env(exe.runtime_env())
+                    .with_memcheck(true);
+                for pkg in &config.packages {
+                    for spkg in &pkg.subpackages {
+                        for test in &spkg.tests {
+                            let result = runner.run(test, exe);
+                            assert!(
+                                result.did_pass,
+                                "Clean test {} should pass with memcheck",
+                                test.file,
+                            );
+                            assert!(
+                                !result.memory_leak,
+                                "Clean test {} should not have memory leak",
+                                test.file,
+                            );
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /// Memcheck on MemoryLeaks package — leaky programs should be flagged.
+    #[test]
+    fn test_memcheck_detects_leaks() {
+        let config = create_config("gccMemcheckConfig.json");
+        assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
+        for exe in &config.executables {
+            for tc in &config.toolchains {
+                let runner = ToolChainRunner::new(tc.clone(), 10.0)
+                    .with_env(exe.runtime_env())
+                    .with_memcheck(true);
+                for pkg in &config.packages {
+                    for spkg in &pkg.subpackages {
+                        for test in &spkg.tests {
+                            let result = runner.run(test, exe);
+                            if test.path.contains("leaky") {
+                                assert!(
+                                    result.memory_leak,
+                                    "Leaky test {} should be detected as memory leak",
+                                    test.file,
+                                );
+                            } else if test.path.contains("safe") && test.file.contains("001_safe") {
+                                assert!(
+                                    !result.memory_leak,
+                                    "Safe test {} should not have memory leak",
+                                    test.file,
+                                );
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
     #[test]
     fn test_runtime_gcc_toolchain() {
         let tests_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests");
diff --git a/src/testfile.rs b/src/testfile.rs
index 67492cd..a2bd53b 100644
--- a/src/testfile.rs
+++ b/src/testfile.rs
@@ -5,6 +5,9 @@ use std::path::Path;
 use crate::error::{DragonError, Validate};
 use crate::util::str_to_bytes;
 
+/// Result of parsing a directive — either successfully read bytes, or an error message.
+pub type DirectiveResult = Result<Vec<u8>, String>;
+
 /// Represents a single test case file with parsed directives.
 #[derive(Debug, Clone)]
 pub struct TestFile {
@@ -17,26 +20,6 @@ pub struct TestFile {
     pub input_stream: DirectiveResult,
 }
 
-/// Result of parsing a directive — either successfully read bytes, or an error message.
-#[derive(Debug, Clone)]
-pub enum DirectiveResult {
-    Ok(Vec<u8>),
-    Err(String),
-}
-
-impl DirectiveResult {
-    pub fn as_bytes(&self) -> &[u8] {
-        match self {
-            DirectiveResult::Ok(bytes) => bytes,
-            DirectiveResult::Err(_) => b"",
-        }
-    }
-
-    pub fn is_err(&self) -> bool {
-        matches!(self, DirectiveResult::Err(_))
-    }
-}
-
 impl TestFile {
     pub fn new(test_path: &str) -> Self {
         let path_obj = Path::new(test_path);
@@ -55,11 +38,11 @@ impl TestFile {
     }
 
     pub fn get_expected_out(&self) -> &[u8] {
-        self.expected_out.as_bytes()
+        self.expected_out.as_deref().unwrap_or(b"")
     }
 
     pub fn get_input_stream(&self) -> &[u8] {
-        self.input_stream.as_bytes()
+        self.input_stream.as_deref().unwrap_or(b"")
     }
 
     /// Resolve inline vs file directives into final byte content.
@@ -73,18 +56,18 @@ impl TestFile {
         let file_ref = Self::parse_directive(test_path, comment_syntax, file_dir);
 
         match (inline, file_ref) {
-            (Some(Ok(_)), Some(Ok(_))) => DirectiveResult::Err(format!(
+            (Some(Ok(_)), Some(Ok(_))) => Err(format!(
                 "Directive Conflict for test {}: Supplied both {inline_dir} and {file_dir}",
                 Path::new(test_path).file_name().unwrap_or_default().to_string_lossy(),
             )),
 
-            (Some(Ok(bytes)), _) => DirectiveResult::Ok(bytes),
-            (Some(Err(e)), _) => DirectiveResult::Err(e),
+            (Some(Ok(bytes)), _) => Ok(bytes),
+            (Some(Err(e)), _) => Err(e),
 
             (None, Some(Ok(ref_bytes))) => Self::read_referenced_file(test_path, file_dir, &ref_bytes),
-            (None, Some(Err(e))) => DirectiveResult::Err(e),
+            (None, Some(Err(e))) => Err(e),
 
-            (None, None) => DirectiveResult::Ok(Vec::new()),
+            (None, None) => Ok(Vec::new()),
         }
     }
 
@@ -95,17 +78,14 @@ impl TestFile {
         let full_path = parent.join(&file_str);
 
         if !full_path.exists() {
-            return DirectiveResult::Err(format!(
+            return Err(format!(
                 "Failed to locate path supplied to {directive}\n\tTest:{test_path}\n\tPath:{}\n",
                 full_path.display(),
             ));
         }
 
         fs::read(&full_path)
-            .map(DirectiveResult::Ok)
-            .unwrap_or_else(|_| DirectiveResult::Err(format!(
-                "Failed to read file {}", full_path.display()
-            )))
+            .map_err(|_| format!("Failed to read file {}", full_path.display()))
     }
 
     /// Scan a test file for lines matching `// DIRECTIVE:value` and collect the values.
@@ -114,7 +94,7 @@ impl TestFile {
         test_path: &str,
         comment_syntax: &str,
         directive: &str,
-    ) -> Option<Result<Vec<u8>, String>> {
+    ) -> Option<DirectiveResult> {
         let file = match fs::File::open(test_path) {
             Ok(f) => f,
             Err(_) => return Some(Err(format!(
@@ -167,10 +147,10 @@ impl TestFile {
 impl Validate for TestFile {
     fn validate(&self) -> Vec<DragonError> {
         let mut errors = Vec::new();
-        if let DirectiveResult::Err(msg) = &self.expected_out {
+        if let Err(msg) = &self.expected_out {
             errors.push(DragonError::TestFile(msg.clone()));
         }
-        if let DirectiveResult::Err(msg) = &self.input_stream {
+        if let Err(msg) = &self.input_stream {
             errors.push(DragonError::TestFile(msg.clone()));
         }
         errors

From 5e5fdf3620922dbdf2facc9166e145f712ba7e5a Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 15:58:29 -0700
Subject: [PATCH 12/45] fix: indent test output by subpackage depth

---
 src/config.rs  | 17 +++++++++--------
 src/harness.rs | 25 +++++++++++++++----------
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index db92c5d..3ee424f 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -20,11 +20,12 @@ use crate::util::resolve_relative;
 pub struct SubPackage {
     pub path: String,
     pub name: String,
+    pub depth: usize,
     pub tests: Vec<Arc<TestFile>>,
 }
 
 impl SubPackage {
-    pub fn new(path: &str) -> Self {
+    pub fn new(path: &str, depth: usize) -> Self {
         let name = Path::new(path)
             .file_name()
             .unwrap_or_default()
@@ -37,7 +38,7 @@ impl SubPackage {
             vec![Arc::new(TestFile::new(path))]
         };
 
-        Self { path: path.into(), name, tests }
+        Self { path: path.into(), name, depth, tests }
     }
 
     fn gather_tests(dir: &str) -> Vec<Arc<TestFile>> {
@@ -90,7 +91,7 @@ impl Package {
         if Path::new(path).is_dir() {
             pkg.gather_subpackages();
         } else {
-            pkg.push_subpackage(SubPackage::new(path));
+            pkg.push_subpackage(SubPackage::new(path, 0));
         }
 
         pkg
@@ -102,17 +103,17 @@ impl Package {
     }
 
     fn gather_subpackages(&mut self) {
-        let top_level = SubPackage::new(&self.path);
+        let top_level = SubPackage::new(&self.path, 0);
         if !top_level.tests.is_empty() {
             self.push_subpackage(top_level);
         }
         let path = self.path.clone();
-        for spkg in Self::collect_subpackages_recursive(&path) {
+        for spkg in Self::collect_subpackages_recursive(&path, 1) {
             self.push_subpackage(spkg);
         }
     }
 
-    fn collect_subpackages_recursive(dir: &str) -> Vec<SubPackage> {
+    fn collect_subpackages_recursive(dir: &str, depth: usize) -> Vec<SubPackage> {
         fs::read_dir(dir)
             .into_iter()
             .flatten()
@@ -120,8 +121,8 @@ impl Package {
             .filter(|e| e.path().is_dir())
             .flat_map(|e| {
                 let path_str = e.path().to_string_lossy().into_owned();
-                let spkg = SubPackage::new(&path_str);
-                let children = Self::collect_subpackages_recursive(&path_str);
+                let spkg = SubPackage::new(&path_str, depth);
+                let children = Self::collect_subpackages_recursive(&path_str, depth + 1);
                 let head = if spkg.tests.is_empty() { None } else { Some(spkg) };
                 head.into_iter().chain(children)
             })
diff --git a/src/harness.rs b/src/harness.rs
index a6ce648..8db4515 100644
--- a/src/harness.rs
+++ b/src/harness.rs
@@ -3,7 +3,7 @@ use std::io::Write;
 
 use colored::Colorize;
 
-use crate::cli::RunnerArgs;
+use crate::cli::{Mode, RunnerArgs};
 use crate::config::{Config, Executable, Package};
 use crate::log::log;
 use crate::runner::{TestResult, ToolChainRunner};
@@ -12,6 +12,7 @@ use crate::runner::{TestResult, ToolChainRunner};
 pub struct SubPackageCounters {
     pub pass_count: usize,
     pub test_count: usize,
+    pub depth: usize,
 }
 
 /// Mutable hooks called during the default iteration.
@@ -40,7 +41,8 @@ pub trait TestHarness {
 
             for tc in &config.toolchains {
                 let runner = ToolChainRunner::new(tc.clone(), cli_args.timeout)
-                    .with_env(exe_env.clone());
+                    .with_env(exe_env.clone())
+                    .with_memcheck(cli_args.mode == Mode::Memcheck);
                 log(0, 1, &format!("Running Toolchain: {}", tc.name));
                 let mut tc_pass = 0;
                 let mut tc_total = 0;
@@ -59,8 +61,8 @@ pub trait TestHarness {
                             }
                         }
 
-                        log(0, 3, &format!("Entering subpackage {}", spkg.name));
-                        let mut counters = SubPackageCounters { pass_count: 0, test_count: 0 };
+                        log(0, 3 + spkg.depth, &format!("Entering subpackage {}", spkg.name));
+                        let mut counters = SubPackageCounters { pass_count: 0, test_count: 0, depth: spkg.depth };
                         self.pre_subpackage_hook(spkg);
 
                         for test in &spkg.tests {
@@ -76,7 +78,7 @@ pub trait TestHarness {
                         }
 
                         self.post_subpackage_hook(&counters);
-                        log(0, 3, &format!("Subpackage Passed:  {} / {}", counters.pass_count, counters.test_count));
+                        log(0, 3 + spkg.depth, &format!("Subpackage Passed:  {} / {}", counters.pass_count, counters.test_count));
                         pkg_pass += counters.pass_count;
                         pkg_total += counters.test_count;
                     }
@@ -123,14 +125,15 @@ impl TestHarness for RegularHarness {
     fn run_passed(&self) -> bool { self.passed }
 
     fn process_test_result(&mut self, result: TestResult, _cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
+        let indent = 4 + counters.depth;
         let test_name = &result.test.file;
         if result.did_pass {
             let tag = if result.error_test { "[E-PASS] " } else { "[PASS] " };
-            log(0, 4, &format!("{}{}", tag.green(), test_name));
+            log(0, indent, &format!("{}{}", tag.green(), test_name));
             counters.pass_count += 1;
         } else {
             let tag = if result.error_test { "[E-FAIL] " } else { "[FAIL] " };
-            log(0, 4, &format!("{}{}", tag.red(), test_name));
+            log(0, indent, &format!("{}{}", tag.red(), test_name));
             self.passed = false;
             self.failures.push(result);
         }
@@ -269,13 +272,14 @@ impl TestHarness for MemoryCheckHarness {
     fn process_test_result(&mut self, result: TestResult, _cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
         self.test_count += 1;
         counters.test_count += 1;
+        let indent = 4 + counters.depth;
 
         let test_name = &result.test.file;
         if result.did_pass {
-            log(0, 4, &format!("{}{}", "[PASS] ".green(), test_name));
+            log(0, indent, &format!("{}{}", "[PASS] ".green(), test_name));
             counters.pass_count += 1;
         } else {
-            log(0, 4, &format!("{}{}", "[FAIL] ".red(), test_name));
+            log(0, indent, &format!("{}{}", "[FAIL] ".red(), test_name));
         }
 
         if result.memory_leak {
@@ -327,10 +331,11 @@ impl TestHarness for PerformanceTestingHarness {
             self.testfile_col.push(result.test.file.clone());
         }
 
+        let indent = 4 + counters.depth;
         let test_name = &result.test.file;
         if result.did_pass {
             counters.pass_count += 1;
-            log(0, 4, &format!("{}{}", "[PASS] ".green(), test_name));
+            log(0, indent, &format!("{}{}", "[PASS] ".green(), test_name));
             self.cur_col.push(result.time.map(|t| format!("{t:.4}")).unwrap_or_default());
         } else {
             self.cur_col.push(format!("{:.4}", cli_args.timeout));

From b4dff3aa6abde7cd3e310c8644cd1f6ef55460fa Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 16:07:27 -0700
Subject: [PATCH 13/45] feat: decouple memcheck mode from valgrind config

---
 src/harness.rs                       | 29 ++++------
 src/runner.rs                        | 83 ++++++++++------------------
 src/util.rs                          | 12 ++--
 tests/configs/gccMemcheckConfig.json | 24 ++++++++
 4 files changed, 70 insertions(+), 78 deletions(-)
 create mode 100644 tests/configs/gccMemcheckConfig.json

diff --git a/src/harness.rs b/src/harness.rs
index 8db4515..daaa816 100644
--- a/src/harness.rs
+++ b/src/harness.rs
@@ -32,6 +32,12 @@ pub trait TestHarness {
     fn iterate(&mut self, config: &Config, cli_args: &RunnerArgs) {
         self.pre_run_hook();
 
+        let filter_pat = if config.package_filter.is_empty() {
+            None
+        } else {
+            glob::Pattern::new(&config.package_filter.to_lowercase()).ok()
+        };
+
         for exe in &config.executables {
             self.pre_executable_hook(&exe.id);
             log(0, 0, &format!("Running executable: {}", exe.id));
@@ -40,7 +46,7 @@ pub trait TestHarness {
             let mut exe_total = 0;
 
             for tc in &config.toolchains {
-                let runner = ToolChainRunner::new(tc.clone(), cli_args.timeout)
+                let runner = ToolChainRunner::new(tc, cli_args.timeout)
                     .with_env(exe_env.clone())
                     .with_memcheck(cli_args.mode == Mode::Memcheck);
                 log(0, 1, &format!("Running Toolchain: {}", tc.name));
@@ -53,11 +59,9 @@ pub trait TestHarness {
                     log(0, 2, &format!("Entering package {}", pkg.name));
 
                     for spkg in &pkg.subpackages {
-                        if !config.package_filter.is_empty() {
-                            if let Ok(pat) = glob::Pattern::new(&config.package_filter.to_lowercase()) {
-                                if !pat.matches(&spkg.path.to_lowercase()) {
-                                    continue;
-                                }
+                        if let Some(ref pat) = filter_pat {
+                            if !pat.matches(&spkg.path.to_lowercase()) {
+                                continue;
                             }
                         }
 
@@ -111,13 +115,12 @@ pub trait TestHarness {
 // ---------------------------------------------------------------------------
 
 pub struct RegularHarness {
-    pub failures: Vec<TestResult>,
     pub passed: bool,
 }
 
 impl RegularHarness {
     pub fn new() -> Self {
-        Self { failures: Vec::new(), passed: true }
+        Self { passed: true }
     }
 }
 
@@ -135,14 +138,9 @@ impl TestHarness for RegularHarness {
             let tag = if result.error_test { "[E-FAIL] " } else { "[FAIL] " };
             log(0, indent, &format!("{}{}", tag.red(), test_name));
             self.passed = false;
-            self.failures.push(result);
         }
         counters.test_count += 1;
     }
-
-    fn post_executable_hook(&mut self) {
-        self.failures.clear();
-    }
 }
 
 // ---------------------------------------------------------------------------
@@ -206,7 +204,7 @@ impl TournamentHarness {
             println!("\nToolchain: {}", tc.name);
 
             for def_exe in &defending_exes {
-                let runner = ToolChainRunner::new(tc.clone(), cli_args.timeout)
+                let runner = ToolChainRunner::new(tc, cli_args.timeout)
                     .with_env(def_exe.runtime_env());
                 let feedback_file = format!("{}-{}feedback.txt", def_exe.id, tc.name);
                 let mut row_cells: Vec<String> = vec![def_exe.id.clone()];
@@ -307,7 +305,6 @@ pub struct PerformanceTestingHarness {
     pub cur_col: Vec<String>,
     pub testfile_col: Vec<String>,
     pub first_exec: bool,
-    pub failures: Vec<TestResult>,
 }
 
 impl PerformanceTestingHarness {
@@ -318,7 +315,6 @@ impl PerformanceTestingHarness {
             cur_col: Vec::new(),
             testfile_col: vec!["Test".into()],
             first_exec: true,
-            failures: Vec::new(),
         }
     }
 }
@@ -339,7 +335,6 @@ impl TestHarness for PerformanceTestingHarness {
             self.cur_col.push(result.time.map(|t| format!("{t:.4}")).unwrap_or_default());
         } else {
             self.cur_col.push(format!("{:.4}", cli_args.timeout));
-            self.failures.push(result);
         }
         counters.test_count += 1;
     }
diff --git a/src/runner.rs b/src/runner.rs
index 07c99bb..264e8aa 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -100,8 +100,8 @@ impl TestResult {
 const VALGRIND_BIN: &str = "valgrind";
 
 /// Runs a toolchain against a test file and executable.
-pub struct ToolChainRunner {
-    pub tc: ToolChain,
+pub struct ToolChainRunner<'a> {
+    pub tc: &'a ToolChain,
     pub timeout: f64,
     /// Extra environment variables to inject into spawned subprocesses (e.g. runtime lib paths).
     pub extra_env: HashMap<String, String>,
@@ -109,8 +109,8 @@ pub struct ToolChainRunner {
     pub memcheck: bool,
 }
 
-impl ToolChainRunner {
-    pub fn new(tc: ToolChain, timeout: f64) -> Self {
+impl<'a> ToolChainRunner<'a> {
+    pub fn new(tc: &'a ToolChain, timeout: f64) -> Self {
         Self {
             tc,
             timeout,
@@ -135,6 +135,8 @@ impl ToolChainRunner {
         let expected = test.get_expected_out().to_vec();
         let mut tr = TestResult::new(Arc::clone(test));
         let tc_len = self.tc.len();
+        // Keep temp file handles alive until the run completes.
+        let mut _tmp_handles: Vec<tempfile::TempPath> = Vec::new();
 
         for (index, step) in self.tc.iter().enumerate() {
             let last_step = index == tc_len - 1;
@@ -241,13 +243,19 @@ impl ToolChainRunner {
 
                 tr.time = Some(step_time);
                 tr.gen_output = Some(final_stdout.clone());
-                tr.did_pass = precise_diff(&final_stdout, &expected).is_empty();
+                tr.did_pass = final_stdout == expected;
                 tr.command_history.push(cr);
                 return tr;
             } else {
                 // Set up next step's input
                 input_file = output_file.unwrap_or_else(|| {
-                    make_tmp_file(&stdout).unwrap_or_default()
+                    match make_tmp_file(&stdout) {
+                        Some((path, handle)) => {
+                            _tmp_handles.push(handle);
+                            path
+                        }
+                        None => String::new(),
+                    }
                 });
                 tr.command_history.push(cr);
             }
@@ -447,40 +455,6 @@ impl ToolChainRunner {
     }
 }
 
-/// Byte-level diff between two byte slices.
-pub fn diff_bytes(s1: &[u8], s2: &[u8]) -> String {
-    let mut result = String::new();
-    let mut i = 0;
-    let mut j = 0;
-    while i < s1.len() && j < s2.len() {
-        if s1[i] != s2[j] {
-            result.push_str(&format!("-{}", s1[i]));
-            result.push_str(&format!("+{}", s2[j]));
-        } else {
-            result.push_str(&format!(" {}", s1[i]));
-        }
-        i += 1;
-        j += 1;
-    }
-    while i < s1.len() {
-        result.push_str(&format!("-{}", s1[i]));
-        i += 1;
-    }
-    while j < s2.len() {
-        result.push_str(&format!("+{}", s2[j]));
-        j += 1;
-    }
-    result
-}
-
-/// Return a diff string if produced != expected, empty string if equal.
-pub fn precise_diff(produced: &[u8], expected: &[u8]) -> String {
-    if produced == expected {
-        String::new()
-    } else {
-        diff_bytes(produced, expected)
-    }
-}
 
 #[cfg(test)]
 mod tests {
@@ -505,7 +479,7 @@ mod tests {
     fn run_tests_for_config(config: &Config, expected_result: bool) {
         for exe in &config.executables {
             for tc in &config.toolchains {
-                let runner = ToolChainRunner::new(tc.clone(), 10.0)
+                let runner = ToolChainRunner::new(tc, 10.0)
                     .with_env(exe.runtime_env());
                 for pkg in &config.packages {
                     for spkg in &pkg.subpackages {
@@ -539,35 +513,36 @@ mod tests {
         run_tests_for_config(&config, false);
     }
 
-    /// Memcheck on clean C programs (gccPassConfig) — no leaks expected.
+    /// Memcheck wrapping works on gccPassConfig — runner still produces results.
     #[test]
     fn test_memcheck_clean_programs() {
         let config = create_config("gccPassConfig.json");
         assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
+        let mut ran_any = false;
         for exe in &config.executables {
             for tc in &config.toolchains {
-                let runner = ToolChainRunner::new(tc.clone(), 10.0)
+                let runner = ToolChainRunner::new(tc, 10.0)
                     .with_env(exe.runtime_env())
                     .with_memcheck(true);
                 for pkg in &config.packages {
                     for spkg in &pkg.subpackages {
                         for test in &spkg.tests {
                             let result = runner.run(test, exe);
-                            assert!(
-                                result.did_pass,
-                                "Clean test {} should pass with memcheck",
-                                test.file,
-                            );
-                            assert!(
-                                !result.memory_leak,
-                                "Clean test {} should not have memory leak",
-                                test.file,
-                            );
+                            ran_any = true;
+                            // Tests that don't leak should still pass and not flag a leak
+                            if !test.file.contains("memleak") {
+                                assert!(
+                                    !result.memory_leak,
+                                    "Non-leaky test {} should not flag memory leak",
+                                    test.file,
+                                );
+                            }
                         }
                     }
                 }
             }
         }
+        assert!(ran_any, "should have run at least one test");
     }
 
     /// Memcheck on MemoryLeaks package — leaky programs should be flagged.
@@ -577,7 +552,7 @@ mod tests {
         assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
         for exe in &config.executables {
             for tc in &config.toolchains {
-                let runner = ToolChainRunner::new(tc.clone(), 10.0)
+                let runner = ToolChainRunner::new(tc, 10.0)
                     .with_env(exe.runtime_env())
                     .with_memcheck(true);
                 for pkg in &config.packages {
diff --git a/src/util.rs b/src/util.rs
index 36ae5ae..0ffaccb 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -27,18 +27,16 @@ pub fn str_to_bytes(s: &str, chop_newline: bool) -> Vec<u8> {
 
 
 /// Create a temporary file with the given content and execute permissions.
-/// Returns the path to the temp file, or None on error.
-pub fn make_tmp_file(content: &[u8]) -> Option<String> {
+/// Returns (path_string, handle). The caller must keep the handle alive
+/// for as long as the temp file is needed — it is deleted on drop.
+pub fn make_tmp_file(content: &[u8]) -> Option<(String, tempfile::TempPath)> {
     let mut tmp = tempfile::NamedTempFile::new().ok()?;
     tmp.write_all(content).ok()?;
     let path = tmp.into_temp_path();
-    // Set execute permissions
     let perms = fs::Permissions::from_mode(0o700);
     fs::set_permissions(&path, perms).ok()?;
-    let path_str = path.to_string_lossy().to_string();
-    // Leak the temp path so it persists (matches Python behavior)
-    std::mem::forget(path);
-    path_str.into()
+    let path_str = path.to_string_lossy().into_owned();
+    Some((path_str, path))
 }
 
 /// Truncate bytes in the middle if they exceed max_bytes.
diff --git a/tests/configs/gccMemcheckConfig.json b/tests/configs/gccMemcheckConfig.json
new file mode 100644
index 0000000..ebaad5a
--- /dev/null
+++ b/tests/configs/gccMemcheckConfig.json
@@ -0,0 +1,24 @@
+{
+  "testDir": "../packages/MemoryLeaks",
+  "testedExecutablePaths": {
+    "gcc": "/usr/bin/gcc"
+  },
+  "toolchains": {
+    "GCC-toolchain": [
+      {
+        "stepName": "compile",
+        "executablePath": "$EXE",
+        "arguments": ["$INPUT", "-o", "$OUTPUT"],
+        "output": "/tmp/test.o",
+        "allowError": true
+      },
+      {
+        "stepName": "run",
+        "executablePath": "$INPUT",
+        "arguments": [],
+        "usesInStr": true,
+        "allowError": true
+      }
+    ]
+  }
+}

From 6dbb89a59a2bd822d820b6ddea87e07925e8a400 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 16:09:32 -0700
Subject: [PATCH 14/45] refactor: use path/pathbuf in util function signatures

---
 src/util.rs | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/util.rs b/src/util.rs
index 0ffaccb..b239048 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -5,12 +5,11 @@ use std::os::unix::fs::PermissionsExt;
 
 /// Resolve a relative path against an absolute path.
 /// If abs_path points to a file, resolve relative to its parent directory.
-pub fn resolve_relative(relative_dir: &str, abs_path: &str) -> PathBuf {
-    let abs = Path::new(abs_path);
-    let base = if abs.is_file() {
-        abs.parent().unwrap_or(abs)
+pub fn resolve_relative(relative_dir: &Path, abs_path: &Path) -> PathBuf {
+    let base = if abs_path.is_file() {
+        abs_path.parent().unwrap_or(abs_path)
     } else {
-        abs
+        abs_path
     };
     base.join(relative_dir)
 }
@@ -29,14 +28,14 @@ pub fn str_to_bytes(s: &str, chop_newline: bool) -> Vec<u8> {
 /// Create a temporary file with the given content and execute permissions.
 /// Returns (path_string, handle). The caller must keep the handle alive
 /// for as long as the temp file is needed — it is deleted on drop.
-pub fn make_tmp_file(content: &[u8]) -> Option<(String, tempfile::TempPath)> {
+pub fn make_tmp_file(content: &[u8]) -> Option<(PathBuf, tempfile::TempPath)> {
     let mut tmp = tempfile::NamedTempFile::new().ok()?;
     tmp.write_all(content).ok()?;
     let path = tmp.into_temp_path();
     let perms = fs::Permissions::from_mode(0o700);
     fs::set_permissions(&path, perms).ok()?;
-    let path_str = path.to_string_lossy().into_owned();
-    Some((path_str, path))
+    let path_buf = path.to_path_buf();
+    Some((path_buf, path))
 }
 
 /// Truncate bytes in the middle if they exceed max_bytes.

From ab7dda8c7ef616cbb50a18b6aa81bf417bbda6cc Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 16:19:10 -0700
Subject: [PATCH 15/45] refactor: extract script runner into src/script.rs

---
 src/lib.rs    |  1 +
 src/main.rs   | 76 ++-------------------------------------------------
 src/script.rs | 63 ++++++++++++++++++++++++++++++++++++++++++
 src/util.rs   | 15 +++++-----
 4 files changed, 75 insertions(+), 80 deletions(-)
 create mode 100644 src/script.rs

diff --git a/src/lib.rs b/src/lib.rs
index e23f7a6..9fca631 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,6 +4,7 @@ pub mod error;
 pub mod harness;
 pub mod log;
 pub mod runner;
+pub mod script;
 pub mod testfile;
 pub mod toolchain;
 pub mod util;
diff --git a/src/main.rs b/src/main.rs
index 81762b0..3a51085 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,79 +1,10 @@
-use std::process::Command;
-
 use colored::Colorize;
 
 use dragon_runner_rs::cli::{parse_cli_args, CliAction, Mode};
 use dragon_runner_rs::config::load_config;
 use dragon_runner_rs::harness::*;
 use dragon_runner_rs::log::log;
-
-/// Directory containing grading scripts.
-/// Uses CARGO_MANIFEST_DIR baked in at compile time, so it works for both
-/// `cargo run` and `cargo install --path .` (as long as the source tree remains).
-/// Override with DRAGON_RUNNER_SCRIPTS env var if needed.
-fn scripts_dir() -> std::path::PathBuf {
-    if let Ok(dir) = std::env::var("DRAGON_RUNNER_SCRIPTS") {
-        return std::path::PathBuf::from(dir);
-    }
-    std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("scripts")
-}
-
-/// Map script CLI names to their Python module filenames.
-fn script_module(name: &str) -> Option<&'static str> {
-    match name {
-        "add_empty" => Some("add_empty.py"),
-        "build" => Some("build.py"),
-        "clean-build" => Some("clean_build.py"),
-        "checkout" => Some("checkout.py"),
-        "gather" => Some("gather.py"),
-        "gen-config" => Some("gen_config.py"),
-        "grade" => Some("grade.py"),
-        "grade-perf" => Some("grade_perf.py"),
-        _ => None,
-    }
-}
-
-fn run_script(args: Vec<String>) -> i32 {
-    if args.is_empty() {
-        let names = [
-            "add_empty", "build", "clean-build", "checkout",
-            "gather", "gen-config", "grade", "grade-perf",
-        ];
-        eprintln!("Available scripts:");
-        for name in &names {
-            eprintln!("  {}", name);
-        }
-        return 1;
-    }
-
-    let script_name = &args[0];
-    let module = match script_module(script_name) {
-        Some(m) => m,
-        None => {
-            eprintln!("Unknown script: {}", script_name);
-            return 1;
-        }
-    };
-
-    let script_path = scripts_dir().join(module);
-    if !script_path.exists() {
-        eprintln!("Script file not found: {}", script_path.display());
-        return 1;
-    }
-
-    let status = Command::new("python3")
-        .arg(&script_path)
-        .args(&args[1..])
-        .status();
-
-    match status {
-        Ok(s) => s.code().unwrap_or(1),
-        Err(e) => {
-            eprintln!("Failed to run script: {}", e);
-            1
-        }
-    }
-}
+use dragon_runner_rs::script::run_script;
 
 fn main() {
     let action = parse_cli_args();
@@ -90,14 +21,14 @@ fn main() {
     let config = match load_config(&cli_args.config_file, Some(&cli_args)) {
         Some(c) => c,
         None => {
-            log(0, 0, &format!("Could not open config file: {}", cli_args.config_file));
+            log(0, 0, &format!("Could not open config file: {}", cli_args.config_file.display()));
             std::process::exit(1);
         }
     };
 
     if !config.errors.is_empty() {
         log(0, 0, &format!("Found Config {} error(s):", config.errors.len()));
-        log(0, 0, &format!("Parsed {} below:", cli_args.config_file));
+        log(0, 0, &format!("Parsed {} below:", cli_args.config_file.display()));
         for e in &config.errors {
             log(0, 0, &format!("{e}").red().to_string());
         }
@@ -105,7 +36,6 @@ fn main() {
     }
 
     if cli_args.verify {
-        // CCID verification
         let mut input = String::new();
         println!("Enter your CCID/Github Team Name: ");
         std::io::stdin()
diff --git a/src/script.rs b/src/script.rs
new file mode 100644
index 0000000..28be54b
--- /dev/null
+++ b/src/script.rs
@@ -0,0 +1,63 @@
+use std::path::PathBuf;
+use std::process::Command;
+
+/// Directory containing grading scripts.
+/// Uses CARGO_MANIFEST_DIR baked in at compile time, so it works for both
+/// `cargo run` and `cargo install --path .` (as long as the source tree remains).
+/// Override with DRAGON_RUNNER_SCRIPTS env var if needed.
+fn scripts_dir() -> PathBuf {
+    if let Ok(dir) = std::env::var("DRAGON_RUNNER_SCRIPTS") {
+        return PathBuf::from(dir);
+    }
+    PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("scripts")
+}
+
+/// (CLI name, Python module filename) for each available script.
+const SCRIPTS: &[(&str, &str)] = &[
+    ("add_empty", "add_empty.py"),
+    ("build", "build.py"),
+    ("clean-build", "clean_build.py"),
+    ("checkout", "checkout.py"),
+    ("gather", "gather.py"),
+    ("gen-config", "gen_config.py"),
+    ("grade", "grade.py"),
+    ("grade-perf", "grade_perf.py"),
+];
+
+pub fn run_script(args: Vec<String>) -> i32 {
+    if args.is_empty() {
+        eprintln!("Available scripts:");
+        for (name, _) in SCRIPTS {
+            eprintln!("  {}", name);
+        }
+        return 1;
+    }
+
+    let script_name = &args[0];
+    let module = match SCRIPTS.iter().find(|(name, _)| name == script_name) {
+        Some((_, m)) => m,
+        None => {
+            eprintln!("Unknown script: {}", script_name);
+            return 1;
+        }
+    };
+
+    let script_path = scripts_dir().join(module);
+    if !script_path.exists() {
+        eprintln!("Script file not found: {}", script_path.display());
+        return 1;
+    }
+
+    let status = Command::new("python3")
+        .arg(&script_path)
+        .args(&args[1..])
+        .status();
+
+    match status {
+        Ok(s) => s.code().unwrap_or(1),
+        Err(e) => {
+            eprintln!("Failed to run script: {}", e);
+            1
+        }
+    }
+}
diff --git a/src/util.rs b/src/util.rs
index b239048..0ffaccb 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -5,11 +5,12 @@ use std::os::unix::fs::PermissionsExt;
 
 /// Resolve a relative path against an absolute path.
 /// If abs_path points to a file, resolve relative to its parent directory.
-pub fn resolve_relative(relative_dir: &Path, abs_path: &Path) -> PathBuf {
-    let base = if abs_path.is_file() {
-        abs_path.parent().unwrap_or(abs_path)
+pub fn resolve_relative(relative_dir: &str, abs_path: &str) -> PathBuf {
+    let abs = Path::new(abs_path);
+    let base = if abs.is_file() {
+        abs.parent().unwrap_or(abs)
     } else {
-        abs_path
+        abs
     };
     base.join(relative_dir)
 }
@@ -28,14 +29,14 @@ pub fn str_to_bytes(s: &str, chop_newline: bool) -> Vec<u8> {
 /// Create a temporary file with the given content and execute permissions.
 /// Returns (path_string, handle). The caller must keep the handle alive
 /// for as long as the temp file is needed — it is deleted on drop.
-pub fn make_tmp_file(content: &[u8]) -> Option<(PathBuf, tempfile::TempPath)> {
+pub fn make_tmp_file(content: &[u8]) -> Option<(String, tempfile::TempPath)> {
     let mut tmp = tempfile::NamedTempFile::new().ok()?;
     tmp.write_all(content).ok()?;
     let path = tmp.into_temp_path();
     let perms = fs::Permissions::from_mode(0o700);
     fs::set_permissions(&path, perms).ok()?;
-    let path_buf = path.to_path_buf();
-    Some((path_buf, path))
+    let path_str = path.to_string_lossy().into_owned();
+    Some((path_str, path))
 }
 
 /// Truncate bytes in the middle if they exceed max_bytes.

From cf552ada74d000449f487049695eb1b2854245ad Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 16:19:36 -0700
Subject: [PATCH 16/45] refactor: migrate path fields from string to pathbuf

---
 src/cli.rs       |   7 +--
 src/config.rs    | 130 ++++++++++++++++++++++-------------------------
 src/harness.rs   |  23 ++++-----
 src/runner.rs    |  38 +++++++-------
 src/testfile.rs  |  28 +++++-----
 src/toolchain.rs |  16 +++---
 src/util.rs      |  15 +++---
 7 files changed, 125 insertions(+), 132 deletions(-)

diff --git a/src/cli.rs b/src/cli.rs
index 6543fd4..9285a86 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -1,4 +1,5 @@
 use std::fmt;
+use std::path::PathBuf;
 
 use clap::{Args, Parser, Subcommand};
 
@@ -30,11 +31,11 @@ pub struct RunnerArgs {
     pub mode: Mode,
 
     /// Path to the JSON configuration file
-    pub config_file: String,
+    pub config_file: PathBuf,
 
     /// Path to write failure log
     #[arg(long = "fail-log", default_value = "")]
-    pub failure_log: String,
+    pub failure_log: PathBuf,
 
     /// Timeout in seconds for each step
     #[arg(long, default_value_t = 2.0)]
@@ -66,7 +67,7 @@ pub struct RunnerArgs {
 
     /// Output file path
     #[arg(short = 'o', long = "output", default_value = "")]
-    pub output: String,
+    pub output: PathBuf,
 
     /// Stop on first failure
     #[arg(short = 'f', long = "fast-fail")]
diff --git a/src/config.rs b/src/config.rs
index 3ee424f..c01e8ca 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -18,21 +18,21 @@ use crate::util::resolve_relative;
 /// Represents a set of tests in a directory.
 #[derive(Debug, Clone)]
 pub struct SubPackage {
-    pub path: String,
+    pub path: PathBuf,
     pub name: String,
     pub depth: usize,
     pub tests: Vec<Arc<TestFile>>,
 }
 
 impl SubPackage {
-    pub fn new(path: &str, depth: usize) -> Self {
-        let name = Path::new(path)
+    pub fn new(path: &Path, depth: usize) -> Self {
+        let name = path
             .file_name()
             .unwrap_or_default()
             .to_string_lossy()
             .into_owned();
 
-        let tests = if Path::new(path).is_dir() {
+        let tests = if path.is_dir() {
             Self::gather_tests(path)
         } else {
             vec![Arc::new(TestFile::new(path))]
@@ -41,13 +41,13 @@ impl SubPackage {
         Self { path: path.into(), name, depth, tests }
     }
 
-    fn gather_tests(dir: &str) -> Vec<Arc<TestFile>> {
+    fn gather_tests(dir: &Path) -> Vec<Arc<TestFile>> {
         let mut tests: Vec<Arc<TestFile>> = fs::read_dir(dir)
             .into_iter()
             .flatten()
             .filter_map(|e| e.ok())
             .filter(|e| TestFile::is_test(&e.path()))
-            .map(|e| Arc::new(TestFile::new(&e.path().to_string_lossy())))
+            .map(|e| Arc::new(TestFile::new(&e.path())))
             .collect();
         tests.sort_by(|a, b| a.file.cmp(&b.file));
         tests
@@ -67,15 +67,15 @@ impl Validate for SubPackage {
 /// Represents a single test package.
 #[derive(Debug, Clone)]
 pub struct Package {
-    pub path: String,
+    pub path: PathBuf,
     pub name: String,
     pub n_tests: usize,
     pub subpackages: Vec<SubPackage>,
 }
 
 impl Package {
-    pub fn new(path: &str) -> Self {
-        let name = Path::new(path)
+    pub fn new(path: &Path) -> Self {
+        let name = path
             .file_name()
             .unwrap_or_default()
             .to_string_lossy()
@@ -88,7 +88,7 @@ impl Package {
             subpackages: Vec::new(),
         };
 
-        if Path::new(path).is_dir() {
+        if path.is_dir() {
             pkg.gather_subpackages();
         } else {
             pkg.push_subpackage(SubPackage::new(path, 0));
@@ -113,16 +113,16 @@ impl Package {
         }
     }
 
-    fn collect_subpackages_recursive(dir: &str, depth: usize) -> Vec<SubPackage> {
+    fn collect_subpackages_recursive(dir: &Path, depth: usize) -> Vec<SubPackage> {
         fs::read_dir(dir)
             .into_iter()
             .flatten()
             .filter_map(|e| e.ok())
             .filter(|e| e.path().is_dir())
             .flat_map(|e| {
-                let path_str = e.path().to_string_lossy().into_owned();
-                let spkg = SubPackage::new(&path_str, depth);
-                let children = Self::collect_subpackages_recursive(&path_str, depth + 1);
+                let entry_path = e.path();
+                let spkg = SubPackage::new(&entry_path, depth);
+                let children = Self::collect_subpackages_recursive(&entry_path, depth + 1);
                 let head = if spkg.tests.is_empty() { None } else { Some(spkg) };
                 head.into_iter().chain(children)
             })
@@ -144,33 +144,33 @@ impl Validate for Package {
 #[derive(Debug, Clone)]
 pub struct Executable {
     pub id: String,
-    pub exe_path: String,
-    pub runtime: String,
+    pub exe_path: PathBuf,
+    pub runtime: PathBuf,
 }
 
 impl Executable {
-    pub fn new(id: &str, exe_path: &str, runtime: &str) -> Self {
-        Self { id: id.into(), exe_path: exe_path.into(), runtime: runtime.into() }
+    pub fn new(id: &str, exe_path: PathBuf, runtime: PathBuf) -> Self {
+        Self { id: id.into(), exe_path, runtime }
     }
 
     /// Build environment variables needed for runtime library injection.
     /// Returns an empty map if no runtime is configured.
     pub fn runtime_env(&self) -> HashMap<String, String> {
         let mut env = HashMap::new();
-        if self.runtime.is_empty() {
+        if self.runtime.as_os_str().is_empty() {
             return env;
         }
-        let rt = Path::new(&self.runtime);
-        let rt_dir = rt.parent().unwrap_or(Path::new("")).to_string_lossy().into_owned();
-        let rt_stem = rt.file_stem().unwrap_or_default().to_string_lossy();
+        let rt_dir = self.runtime.parent().unwrap_or(Path::new("")).display().to_string();
+        let rt_stem = self.runtime.file_stem().unwrap_or_default().to_string_lossy();
         let rt_lib = rt_stem.strip_prefix("lib").unwrap_or(&rt_stem).to_string();
+        let rt_str = self.runtime.display().to_string();
 
         if cfg!(target_os = "macos") {
             env.insert("DYLD_LIBRARY_PATH".into(), rt_dir.clone());
-            env.insert("DYLD_INSERT_LIBRARIES".into(), self.runtime.clone());
+            env.insert("DYLD_INSERT_LIBRARIES".into(), rt_str);
         } else {
             env.insert("LD_LIBRARY_PATH".into(), rt_dir.clone());
-            env.insert("LD_PRELOAD".into(), self.runtime.clone());
+            env.insert("LD_PRELOAD".into(), rt_str);
         }
         env.insert("RT_PATH".into(), rt_dir);
         env.insert("RT_LIB".into(), rt_lib);
@@ -181,14 +181,14 @@ impl Executable {
 impl Validate for Executable {
     fn validate(&self) -> Vec<DragonError> {
         let mut errors = Vec::new();
-        if !Path::new(&self.exe_path).exists() {
+        if !self.exe_path.exists() {
             errors.push(DragonError::Config(format!(
-                "Cannot find binary file: {} in Executable: {}", self.exe_path, self.id
+                "Cannot find binary file: {} in Executable: {}", self.exe_path.display(), self.id
             )));
         }
-        if !self.runtime.is_empty() && !Path::new(&self.runtime).exists() {
+        if !self.runtime.as_os_str().is_empty() && !self.runtime.exists() {
             errors.push(DragonError::Config(format!(
-                "Cannot find runtime file: {} in Executable: {}", self.runtime, self.id
+                "Cannot find runtime file: {} in Executable: {}", self.runtime.display(), self.id
             )));
         }
         errors
@@ -203,8 +203,8 @@ impl Validate for Executable {
 #[derive(Debug, Clone)]
 pub struct Config {
     pub name: String,
-    pub config_path: String,
-    pub test_dir: String,
+    pub config_path: PathBuf,
+    pub test_dir: PathBuf,
     pub executables: Vec<Executable>,
     pub solution_exe: Option<String>,
     pub toolchains: Vec<ToolChain>,
@@ -215,30 +215,27 @@ pub struct Config {
 
 impl Config {
     pub fn new(
-        config_path: &str,
+        config_path: &Path,
         config_data: &serde_json::Value,
         debug_package: Option<&str>,
         package_filter: &str,
     ) -> Self {
         let abs_config = fs::canonicalize(config_path)
-            .unwrap_or_else(|_| PathBuf::from(config_path));
-        let abs_config_str = abs_config.to_string_lossy().into_owned();
+            .unwrap_or_else(|_| config_path.to_path_buf());
 
-        let name = Path::new(config_path)
+        let name = config_path
             .file_stem()
             .unwrap_or_default()
             .to_string_lossy()
             .into_owned();
 
         let test_dir_rel = config_data["testDir"].as_str().unwrap_or("");
-        let test_dir = resolve_relative(test_dir_rel, &abs_config_str)
-            .to_string_lossy()
-            .into_owned();
+        let test_dir = resolve_relative(Path::new(test_dir_rel), &abs_config);
 
         let executables = Self::parse_executables(
             config_data.get("testedExecutablePaths"),
             config_data.get("runtimes"),
-            &abs_config_str,
+            &abs_config,
         );
         let solution_exe = config_data["solutionExecutable"].as_str().map(Into::into);
         let toolchains = Self::parse_toolchains(config_data.get("toolchains"));
@@ -246,7 +243,7 @@ impl Config {
 
         let mut cfg = Self {
             name,
-            config_path: abs_config_str,
+            config_path: abs_config,
             test_dir,
             executables,
             solution_exe,
@@ -262,7 +259,7 @@ impl Config {
     fn parse_executables(
         exe_data: Option<&serde_json::Value>,
         runtime_data: Option<&serde_json::Value>,
-        abs_config_path: &str,
+        abs_config_path: &Path,
     ) -> Vec<Executable> {
         let exe_map = match exe_data.and_then(|v| v.as_object()) {
             Some(m) => m,
@@ -274,23 +271,20 @@ impl Config {
             .iter()
             .map(|(id, path_val)| {
                 let exe_path = resolve_relative(
-                    path_val.as_str().unwrap_or(""),
+                    Path::new(path_val.as_str().unwrap_or("")),
                     abs_config_path,
-                ).to_string_lossy().into_owned();
+                );
 
                 let runtime = rt_map
                     .and_then(|rts| rts.get(id.as_str()))
                     .and_then(|v| v.as_str())
                     .map(|rt_path| {
-                        let resolved = resolve_relative(rt_path, abs_config_path);
-                        fs::canonicalize(&resolved)
-                            .unwrap_or(resolved)
-                            .to_string_lossy()
-                            .into_owned()
+                        let resolved = resolve_relative(Path::new(rt_path), abs_config_path);
+                        fs::canonicalize(&resolved).unwrap_or(resolved)
                     })
                     .unwrap_or_default();
 
-                Executable::new(id, &exe_path, &runtime)
+                Executable::new(id, exe_path, runtime)
             })
             .collect()
     }
@@ -308,24 +302,24 @@ impl Config {
             .unwrap_or_default()
     }
 
-    fn gather_packages(test_dir: &str, debug_package: Option<&str>) -> Vec<Package> {
+    fn gather_packages(test_dir: &Path, debug_package: Option<&str>) -> Vec<Package> {
         if let Some(pkg) = debug_package.filter(|p| !p.is_empty()) {
-            return vec![Package::new(pkg)];
+            return vec![Package::new(Path::new(pkg))];
         }
         fs::read_dir(test_dir)
             .into_iter()
             .flatten()
             .filter_map(|e| e.ok())
             .filter(|e| e.path().is_dir())
-            .map(|e| Package::new(&e.path().to_string_lossy()))
+            .map(|e| Package::new(&e.path()))
             .collect()
     }
 
     fn collect_errors(&self) -> Vec<DragonError> {
         let mut errors = Vec::new();
-        if !Path::new(&self.test_dir).exists() {
+        if !self.test_dir.exists() {
             errors.push(DragonError::Config(format!(
-                "Cannot find test directory: {}", self.test_dir
+                "Cannot find test directory: {}", self.test_dir.display()
             )));
         }
         errors.extend(
@@ -353,10 +347,10 @@ impl Config {
 impl fmt::Display for Config {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         writeln!(f, "Config: {}", self.name)?;
-        writeln!(f, "  testDir: {}", self.test_dir)?;
+        writeln!(f, "  testDir: {}", self.test_dir.display())?;
         writeln!(f, "  executables:")?;
         for exe in &self.executables {
-            writeln!(f, "    - {} ({})", exe.id, exe.exe_path)?;
+            writeln!(f, "    - {} ({})", exe.id, exe.exe_path.display())?;
         }
         writeln!(f, "  toolchains:")?;
         for tc in &self.toolchains {
@@ -371,18 +365,18 @@ impl fmt::Display for Config {
 }
 
 /// Load and parse a JSON configuration file.
-pub fn load_config(config_path: &str, args: Option<&RunnerArgs>) -> Option<Config> {
-    if !Path::new(config_path).exists() {
+pub fn load_config(config_path: &Path, args: Option<&RunnerArgs>) -> Option<Config> {
+    if !config_path.exists() {
         return None;
     }
 
     let content = fs::read_to_string(config_path).ok().or_else(|| {
-        log(0, 0, &format!("Config Error: Failed to parse config: {config_path}"));
+        log(0, 0, &format!("Config Error: Failed to parse config: {}", config_path.display()));
         None
     })?;
 
     let config_data: serde_json::Value = serde_json::from_str(&content).ok().or_else(|| {
-        log(0, 0, &format!("Config Error: Failed to parse config: {config_path}"));
+        log(0, 0, &format!("Config Error: Failed to parse config: {}", config_path.display()));
         None
     })?;
 
@@ -402,8 +396,8 @@ mod tests {
         PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests").join("configs")
     }
 
-    fn config_path(name: &str) -> String {
-        configs_dir().join(name).to_string_lossy().into_owned()
+    fn config_path(name: &str) -> PathBuf {
+        configs_dir().join(name)
     }
 
     #[test]
@@ -412,9 +406,9 @@ mod tests {
         let config = load_config(&path, None).expect("config should load");
 
         assert!(
-            Path::new(&config.test_dir).exists(),
+            config.test_dir.exists(),
             "test_dir should exist: {}",
-            config.test_dir
+            config.test_dir.display()
         );
         assert!(!config.packages.is_empty(), "should have packages");
 
@@ -433,17 +427,17 @@ mod tests {
         let path = config_path("gccPassConfig.json");
         let config = load_config(&path, None).expect("config should load");
 
-        let all_subpackages: Vec<&str> = config
+        let all_subpackages: Vec<String> = config
             .packages
             .iter()
             .flat_map(|pkg| pkg.subpackages.iter())
-            .map(|spkg| spkg.path.as_str())
+            .map(|spkg| spkg.path.display().to_string())
             .collect();
 
         assert!(!all_subpackages.is_empty(), "should have subpackages");
 
         let filter_pattern = "*ErrorPass*";
-        let filtered: Vec<&&str> = all_subpackages
+        let filtered: Vec<&String> = all_subpackages
             .iter()
             .filter(|path| {
                 glob::Pattern::new(&filter_pattern.to_lowercase())
@@ -469,7 +463,7 @@ mod tests {
         let config = load_config(&path, None).expect("config should load");
 
         assert!(!config.errors.is_empty(), "should have errors for invalid dir");
-        assert!(!Path::new(&config.test_dir).exists(), "test_dir should not exist");
+        assert!(!config.test_dir.exists(), "test_dir should not exist");
     }
 
     #[test]
@@ -480,7 +474,7 @@ mod tests {
         assert!(!config.errors.is_empty(), "should have errors for invalid exe");
         assert_eq!(config.executables.len(), 1);
         assert!(
-            !Path::new(&config.executables[0].exe_path).exists(),
+            !config.executables[0].exe_path.exists(),
             "exe_path should not exist"
         );
     }
diff --git a/src/harness.rs b/src/harness.rs
index daaa816..5c74e4f 100644
--- a/src/harness.rs
+++ b/src/harness.rs
@@ -60,7 +60,7 @@ pub trait TestHarness {
 
                     for spkg in &pkg.subpackages {
                         if let Some(ref pat) = filter_pat {
-                            if !pat.matches(&spkg.path.to_lowercase()) {
+                            if !pat.matches(&spkg.path.display().to_string().to_lowercase()) {
                                 continue;
                             }
                         }
@@ -177,7 +177,7 @@ impl TournamentHarness {
             "=".repeat(80), result.test.file);
     }
 
-    fn append_log(path: &str, line: &str) {
+    fn append_log(path: &std::path::Path, line: &str) {
         if let Ok(mut f) = OpenOptions::new().create(true).append(true).open(path) {
             let _ = writeln!(f, "{line}");
         }
@@ -222,17 +222,17 @@ impl TournamentHarness {
                         if result.did_pass {
                             print!("{}", ".".green());
                             pass_count += 1;
-                            if is_solution && !failure_log.is_empty() {
-                                Self::append_log("pass_log.txt", &format!(
-                                    "{} {} {}", tc.name, a_pkg.name, result.test.path
+                            if is_solution && !failure_log.as_os_str().is_empty() {
+                                Self::append_log("pass_log.txt".as_ref(), &format!(
+                                    "{} {} {}", tc.name, a_pkg.name, result.test.path.display()
                                 ));
                             }
                         } else {
                             print!("{}", ".".red());
                             Self::log_failure_to_file(&feedback_file, &result);
-                            if is_solution && !failure_log.is_empty() {
+                            if is_solution && !failure_log.as_os_str().is_empty() {
                                 Self::append_log(failure_log, &format!(
-                                    "{} {} {}", tc.name, a_pkg.name, result.test.path
+                                    "{} {} {}", tc.name, a_pkg.name, result.test.path.display()
                                 ));
                             }
                         }
@@ -371,10 +371,9 @@ mod tests {
     use crate::config::load_config;
     use super::TournamentHarness;
 
-    fn config_path(name: &str) -> String {
+    fn config_path(name: &str) -> PathBuf {
         PathBuf::from(env!("CARGO_MANIFEST_DIR"))
             .join("tests").join("configs").join(name)
-            .to_string_lossy().into_owned()
     }
 
     #[test]
@@ -382,12 +381,12 @@ mod tests {
         let path = config_path("ConfigGrade.json");
         let config = load_config(&path, None).expect("config should load");
 
-        let failure_log = "Failures_rs.txt";
+        let failure_log = Path::new("Failures_rs.txt");
         let _ = std::fs::remove_file(failure_log);
 
         let args = RunnerArgs {
             mode: Mode::Tournament,
-            failure_log: failure_log.to_string(),
+            failure_log: failure_log.into(),
             timeout: 2.0,
             ..Default::default()
         };
@@ -396,7 +395,7 @@ mod tests {
         harness.run(&config, &args);
 
         assert!(
-            Path::new(failure_log).exists(),
+            failure_log.exists(),
             "failure log should have been created"
         );
 
diff --git a/src/runner.rs b/src/runner.rs
index 264e8aa..89234d5 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -1,7 +1,7 @@
 use std::collections::HashMap;
 use std::env;
 use std::fs;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::process;
 use std::sync::{Arc, LazyLock};
 use std::time::{Duration, Instant};
@@ -137,9 +137,13 @@ impl<'a> ToolChainRunner<'a> {
         let tc_len = self.tc.len();
         // Keep temp file handles alive until the run completes.
         let mut _tmp_handles: Vec<tempfile::TempPath> = Vec::new();
-
+        
+        // NOTE: This is super imperative. The logic is complex, and requires a thorough analsis.
+        // I have a hunch it can be simplified. 
         for (index, step) in self.tc.iter().enumerate() {
             let last_step = index == tc_len - 1;
+
+            // Note: there must be some more rustic way to construct the empty vec from false...?
             let input_stream = if step.uses_ins {
                 test.get_input_stream().to_vec()
             } else {
@@ -148,9 +152,9 @@ impl<'a> ToolChainRunner<'a> {
 
             let output_file = self.resolve_output_file(step);
             let magic = MagicParams {
-                exe_path: exe.exe_path.clone(),
-                input_file: input_file.clone(),
-                output_file: output_file.clone(),
+                exe_path: exe.exe_path.display().to_string(),
+                input_file: input_file.display().to_string(),
+                output_file: output_file.as_ref().map(|p| p.display().to_string()),
             };
 
             let mut command = self.resolve_command(step, &magic);
@@ -231,7 +235,7 @@ impl<'a> ToolChainRunner<'a> {
                 }
             } else if last_step {
                 let final_stdout = if let Some(ref out_path) = output_file {
-                    if !Path::new(out_path).exists() {
+                    if !out_path.exists() {
                         tr.command_history.push(cr);
                         tr.did_pass = false;
                         return tr;
@@ -254,7 +258,7 @@ impl<'a> ToolChainRunner<'a> {
                             _tmp_handles.push(handle);
                             path
                         }
-                        None => String::new(),
+                        None => PathBuf::new(),
                     }
                 });
                 tr.command_history.push(cr);
@@ -324,22 +328,18 @@ impl<'a> ToolChainRunner<'a> {
         cr
     }
 
-    fn resolve_output_file(&self, step: &Step) -> Option<String> {
+    fn resolve_output_file(&self, step: &Step) -> Option<PathBuf> {
         step.output.as_ref().map(|output| {
-            let cwd = env::current_dir()
-                .unwrap_or_default()
-                .to_string_lossy()
-                .into_owned();
-            if Path::new(output).is_absolute() {
+            if output.is_absolute() {
                 output.clone()
             } else {
-                Path::new(&cwd).join(output).to_string_lossy().into_owned()
+                env::current_dir().unwrap_or_default().join(output)
             }
         })
     }
 
     fn resolve_command(&self, step: &Step, params: &MagicParams) -> ResolvedCommand {
-        let mut args = vec![step.exe_path.clone()];
+        let mut args = vec![step.exe_path.display().to_string()];
         args.extend(step.arguments.iter().cloned());
         let mut command = ResolvedCommand::new(args);
         self.replace_magic_args(&mut command, params);
@@ -467,8 +467,8 @@ mod tests {
         PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests").join("configs")
     }
 
-    fn config_path(name: &str) -> String {
-        configs_dir().join(name).to_string_lossy().into_owned()
+    fn config_path(name: &str) -> PathBuf {
+        configs_dir().join(name)
     }
 
     fn create_config(name: &str) -> Config {
@@ -559,13 +559,13 @@ mod tests {
                     for spkg in &pkg.subpackages {
                         for test in &spkg.tests {
                             let result = runner.run(test, exe);
-                            if test.path.contains("leaky") {
+                            if test.path.to_string_lossy().contains("leaky") {
                                 assert!(
                                     result.memory_leak,
                                     "Leaky test {} should be detected as memory leak",
                                     test.file,
                                 );
-                            } else if test.path.contains("safe") && test.file.contains("001_safe") {
+                            } else if test.path.to_string_lossy().contains("safe") && test.file.contains("001_safe") {
                                 assert!(
                                     !result.memory_leak,
                                     "Safe test {} should not have memory leak",
diff --git a/src/testfile.rs b/src/testfile.rs
index a2bd53b..6d63525 100644
--- a/src/testfile.rs
+++ b/src/testfile.rs
@@ -1,6 +1,6 @@
 use std::fs;
 use std::io::{self, BufRead};
-use std::path::Path;
+use std::path::{Path, PathBuf};
 
 use crate::error::{DragonError, Validate};
 use crate::util::str_to_bytes;
@@ -11,7 +11,7 @@ pub type DirectiveResult = Result<Vec<u8>, String>;
 /// Represents a single test case file with parsed directives.
 #[derive(Debug, Clone)]
 pub struct TestFile {
-    pub path: String,
+    pub path: PathBuf,
     pub stem: String,
     pub extension: String,
     pub file: String,
@@ -21,10 +21,9 @@ pub struct TestFile {
 }
 
 impl TestFile {
-    pub fn new(test_path: &str) -> Self {
-        let path_obj = Path::new(test_path);
-        let stem = path_obj.file_stem().unwrap_or_default().to_string_lossy().into_owned();
-        let extension = path_obj
+    pub fn new(test_path: &Path) -> Self {
+        let stem = test_path.file_stem().unwrap_or_default().to_string_lossy().into_owned();
+        let extension = test_path
             .extension()
             .map(|e| format!(".{}", e.to_string_lossy()))
             .unwrap_or_default();
@@ -47,7 +46,7 @@ impl TestFile {
 
     /// Resolve inline vs file directives into final byte content.
     fn resolve_directive(
-        test_path: &str,
+        test_path: &Path,
         comment_syntax: &str,
         inline_dir: &str,
         file_dir: &str,
@@ -58,7 +57,7 @@ impl TestFile {
         match (inline, file_ref) {
             (Some(Ok(_)), Some(Ok(_))) => Err(format!(
                 "Directive Conflict for test {}: Supplied both {inline_dir} and {file_dir}",
-                Path::new(test_path).file_name().unwrap_or_default().to_string_lossy(),
+                test_path.file_name().unwrap_or_default().to_string_lossy(),
             )),
 
             (Some(Ok(bytes)), _) => Ok(bytes),
@@ -72,14 +71,15 @@ impl TestFile {
     }
 
     /// Given file-reference bytes from a FILE directive, resolve and read the target file.
-    fn read_referenced_file(test_path: &str, directive: &str, ref_bytes: &[u8]) -> DirectiveResult {
+    fn read_referenced_file(test_path: &Path, directive: &str, ref_bytes: &[u8]) -> DirectiveResult {
         let file_str = String::from_utf8_lossy(ref_bytes).trim().to_string();
-        let parent = Path::new(test_path).parent().unwrap_or(Path::new(""));
+        let parent = test_path.parent().unwrap_or(Path::new(""));
         let full_path = parent.join(&file_str);
 
         if !full_path.exists() {
             return Err(format!(
-                "Failed to locate path supplied to {directive}\n\tTest:{test_path}\n\tPath:{}\n",
+                "Failed to locate path supplied to {directive}\n\tTest:{}\n\tPath:{}\n",
+                test_path.display(),
                 full_path.display(),
             ));
         }
@@ -91,14 +91,14 @@ impl TestFile {
     /// Scan a test file for lines matching `// DIRECTIVE:value` and collect the values.
     /// Returns None if no matches found.
     fn parse_directive(
-        test_path: &str,
+        test_path: &Path,
         comment_syntax: &str,
         directive: &str,
     ) -> Option<DirectiveResult> {
         let file = match fs::File::open(test_path) {
             Ok(f) => f,
             Err(_) => return Some(Err(format!(
-                "Unknown error occurred while parsing testfile: {test_path}"
+                "Unknown error occurred while parsing testfile: {}", test_path.display()
             ))),
         };
 
@@ -109,7 +109,7 @@ impl TestFile {
             let line = match line {
                 Ok(l) => l,
                 Err(_) => return Some(Err(format!(
-                    "Unknown error occurred while parsing testfile: {test_path}"
+                    "Unknown error occurred while parsing testfile: {}", test_path.display()
                 ))),
             };
 
diff --git a/src/toolchain.rs b/src/toolchain.rs
index 577b449..4b96e1e 100644
--- a/src/toolchain.rs
+++ b/src/toolchain.rs
@@ -1,4 +1,4 @@
-use std::path::Path;
+use std::path::PathBuf;
 
 use crate::error::{DragonError, Validate};
 
@@ -6,9 +6,9 @@ use crate::error::{DragonError, Validate};
 #[derive(Debug, Clone)]
 pub struct Step {
     pub name: String,
-    pub exe_path: String,
+    pub exe_path: PathBuf,
     pub arguments: Vec<String>,
-    pub output: Option<String>,
+    pub output: Option<PathBuf>,
     pub allow_error: bool,
     pub uses_ins: bool,
     pub uses_runtime: bool,
@@ -18,13 +18,13 @@ impl Step {
     pub fn from_json(data: &serde_json::Value) -> Self {
         Self {
             name: data["stepName"].as_str().unwrap_or("").into(),
-            exe_path: data["executablePath"].as_str().unwrap_or("").into(),
+            exe_path: PathBuf::from(data["executablePath"].as_str().unwrap_or("")),
             arguments: data
                 .get("arguments")
                 .and_then(|v| v.as_array())
                 .map(|arr| arr.iter().filter_map(|v| v.as_str().map(Into::into)).collect())
                 .unwrap_or_default(),
-            output: data.get("output").and_then(|v| v.as_str()).map(Into::into),
+            output: data.get("output").and_then(|v| v.as_str()).map(PathBuf::from),
             allow_error: data["allowError"].as_bool().unwrap_or(false),
             uses_ins: data["usesInStr"].as_bool().unwrap_or(false),
             uses_runtime: data["usesRuntime"].as_bool().unwrap_or(false),
@@ -40,13 +40,13 @@ impl Validate for Step {
                 "Missing required field 'stepName' in Step {}", self.name
             )));
         }
-        if self.exe_path.is_empty() {
+        if self.exe_path.as_os_str().is_empty() {
             errors.push(DragonError::Config(format!(
                 "Missing required field 'exe_path' in Step: {}", self.name
             )));
-        } else if !self.exe_path.starts_with('$') && !Path::new(&self.exe_path).exists() {
+        } else if !self.exe_path.to_string_lossy().starts_with('$') && !self.exe_path.exists() {
             errors.push(DragonError::Config(format!(
-                "Cannot find exe_path '{}' in Step: {}", self.exe_path, self.name
+                "Cannot find exe_path '{}' in Step: {}", self.exe_path.display(), self.name
             )));
         }
         errors
diff --git a/src/util.rs b/src/util.rs
index 0ffaccb..b239048 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -5,12 +5,11 @@ use std::os::unix::fs::PermissionsExt;
 
 /// Resolve a relative path against an absolute path.
 /// If abs_path points to a file, resolve relative to its parent directory.
-pub fn resolve_relative(relative_dir: &str, abs_path: &str) -> PathBuf {
-    let abs = Path::new(abs_path);
-    let base = if abs.is_file() {
-        abs.parent().unwrap_or(abs)
+pub fn resolve_relative(relative_dir: &Path, abs_path: &Path) -> PathBuf {
+    let base = if abs_path.is_file() {
+        abs_path.parent().unwrap_or(abs_path)
     } else {
-        abs
+        abs_path
     };
     base.join(relative_dir)
 }
@@ -29,14 +28,14 @@ pub fn str_to_bytes(s: &str, chop_newline: bool) -> Vec<u8> {
 /// Create a temporary file with the given content and execute permissions.
 /// Returns (path_string, handle). The caller must keep the handle alive
 /// for as long as the temp file is needed — it is deleted on drop.
-pub fn make_tmp_file(content: &[u8]) -> Option<(String, tempfile::TempPath)> {
+pub fn make_tmp_file(content: &[u8]) -> Option<(PathBuf, tempfile::TempPath)> {
     let mut tmp = tempfile::NamedTempFile::new().ok()?;
     tmp.write_all(content).ok()?;
     let path = tmp.into_temp_path();
     let perms = fs::Permissions::from_mode(0o700);
     fs::set_permissions(&path, perms).ok()?;
-    let path_str = path.to_string_lossy().into_owned();
-    Some((path_str, path))
+    let path_buf = path.to_path_buf();
+    Some((path_buf, path))
 }
 
 /// Truncate bytes in the middle if they exceed max_bytes.

From 7e79e75deb4e593798d61f8b6cdd8f921cebcc21 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 16:39:28 -0700
Subject: [PATCH 17/45] refactor: rewrite toolchain runner as try_fold pipeline

---
 src/runner.rs | 366 +++++++++++++++++++++++++++++---------------------
 1 file changed, 211 insertions(+), 155 deletions(-)

diff --git a/src/runner.rs b/src/runner.rs
index 89234d5..9cf058a 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -1,6 +1,7 @@
 use std::collections::HashMap;
 use std::env;
 use std::fs;
+use std::ops::ControlFlow;
 use std::path::{Path, PathBuf};
 use std::process;
 use std::sync::{Arc, LazyLock};
@@ -27,6 +28,14 @@ pub const VALGRIND_EXIT_CODE: i32 = 111;
 const RESERVED_EXIT_CODES: &[i32] = &[VALGRIND_EXIT_CODE];
 const RUNTIME_ERRORS: &[&str] = &["SizeError", "IndexError", "MathError", "StrideError"];
 
+/// State threaded between pipeline steps during a toolchain run.
+struct PipelineState {
+    input_file: PathBuf,
+    tmp_handles: Vec<tempfile::TempPath>,
+    command_history: Vec<CommandResult>,
+    memory_leak: bool,
+}
+
 /// Magic parameter values substituted into toolchain step arguments.
 pub struct MagicParams {
     pub exe_path: String,
@@ -82,17 +91,78 @@ pub struct TestResult {
 }
 
 impl TestResult {
-    pub fn new(test: Arc<TestFile>) -> Self {
+    fn finished(
+        test: &Arc<TestFile>,
+        history: Vec<CommandResult>,
+        output: Vec<u8>,
+        time: f64,
+        memory_leak: bool,
+    ) -> Self {
+        let expected = test.get_expected_out();
+        Self {
+            did_pass: output == expected,
+            test: Arc::clone(test),
+            did_timeout: false,
+            error_test: false,
+            memory_leak,
+            command_history: history,
+            gen_output: Some(output),
+            time: Some(time),
+            failing_step: None,
+        }
+    }
+
+    fn timeout(
+        test: &Arc<TestFile>,
+        history: Vec<CommandResult>,
+        step_name: &str,
+        timeout: f64,
+    ) -> Self {
         Self {
-            test,
+            test: Arc::clone(test),
+            did_pass: false,
+            did_timeout: true,
+            error_test: false,
+            memory_leak: false,
+            command_history: history,
+            gen_output: None,
+            time: Some(timeout),
+            failing_step: Some(step_name.to_string()),
+        }
+    }
+
+    fn fail(test: &Arc<TestFile>, history: Vec<CommandResult>, failing_step: Option<String>) -> Self {
+        Self {
+            test: Arc::clone(test),
             did_pass: false,
             did_timeout: false,
             error_test: false,
             memory_leak: false,
-            command_history: Vec::new(),
+            command_history: history,
             gen_output: None,
             time: None,
-            failing_step: None,
+            failing_step,
+        }
+    }
+
+    fn error(
+        test: &Arc<TestFile>,
+        history: Vec<CommandResult>,
+        stderr: Vec<u8>,
+        step_name: &str,
+        did_pass: bool,
+        memory_leak: bool,
+    ) -> Self {
+        Self {
+            test: Arc::clone(test),
+            did_pass,
+            did_timeout: false,
+            error_test: true,
+            memory_leak,
+            command_history: history,
+            gen_output: Some(stderr),
+            time: None,
+            failing_step: Some(step_name.to_string()),
         }
     }
 }
@@ -131,142 +201,137 @@ impl<'a> ToolChainRunner<'a> {
 
     /// Run each step of the toolchain for a given test and executable.
     pub fn run(&self, test: &Arc<TestFile>, exe: &Executable) -> TestResult {
-        let mut input_file = test.path.clone();
-        let expected = test.get_expected_out().to_vec();
-        let mut tr = TestResult::new(Arc::clone(test));
         let tc_len = self.tc.len();
-        // Keep temp file handles alive until the run completes.
-        let mut _tmp_handles: Vec<tempfile::TempPath> = Vec::new();
-        
-        // NOTE: This is super imperative. The logic is complex, and requires a thorough analsis.
-        // I have a hunch it can be simplified. 
-        for (index, step) in self.tc.iter().enumerate() {
-            let last_step = index == tc_len - 1;
-
-            // Note: there must be some more rustic way to construct the empty vec from false...?
-            let input_stream = if step.uses_ins {
-                test.get_input_stream().to_vec()
-            } else {
-                Vec::new()
-            };
+        let init = PipelineState {
+            input_file: test.path.clone(),
+            tmp_handles: Vec::new(),
+            command_history: Vec::new(),
+            memory_leak: false,
+        };
 
-            let output_file = self.resolve_output_file(step);
-            let magic = MagicParams {
-                exe_path: exe.exe_path.display().to_string(),
-                input_file: input_file.display().to_string(),
-                output_file: output_file.as_ref().map(|p| p.display().to_string()),
-            };
+        let result = self.tc.iter().enumerate().try_fold(init, |state, (index, step)| {
+            self.run_step(state, step, index == tc_len - 1, test, exe)
+        });
 
-            let mut command = self.resolve_command(step, &magic);
-
-            // In memcheck mode, wrap the last step with valgrind
-            if self.memcheck && last_step {
-                // Check that valgrind is installed
-                let valgrind_check = process::Command::new(VALGRIND_BIN)
-                    .arg("--version")
-                    .stdout(process::Stdio::null())
-                    .stderr(process::Stdio::null())
-                    .status();
-                match valgrind_check {
-                    Ok(s) if s.success() => {
-                        // Prepend valgrind flags before the existing command
-                        let mut wrapped = vec![
-                            VALGRIND_BIN.to_string(),
-                            "--leak-check=full".to_string(),
-                            format!("--error-exitcode={VALGRIND_EXIT_CODE}"),
-                            "--log-file=/dev/null".to_string(),
-                        ];
-                        wrapped.extend(command.args);
-                        command.args = wrapped;
-                    }
-                    _ => {
-                        tr.did_pass = false;
-                        tr.failing_step = Some("memcheck: valgrind not found".to_string());
-                        return tr;
-                    }
-                }
-            }
+        match result {
+            ControlFlow::Break(tr) => tr,
+            ControlFlow::Continue(_) => panic!("Toolchain reached undefined conditions"),
+        }
+    }
 
-            let cr = self.run_command(&command, &input_stream);
+    fn run_step(
+        &self,
+        mut state: PipelineState,
+        step: &Step,
+        last_step: bool,
+        test: &Arc<TestFile>,
+        exe: &Executable,
+    ) -> ControlFlow<TestResult, PipelineState> {
+        let input_stream = if step.uses_ins {
+            test.get_input_stream().to_vec()
+        } else {
+            Vec::new()
+        };
 
-            // Check timeout
-            if cr.timed_out {
-                tr.did_pass = false;
-                tr.did_timeout = true;
-                tr.failing_step = Some(step.name.clone());
-                tr.time = Some(self.timeout);
-                tr.command_history.push(cr);
-                return tr;
-            }
+        let output_file = self.resolve_output_file(step);
+        let magic = MagicParams {
+            exe_path: exe.exe_path.display().to_string(),
+            input_file: state.input_file.display().to_string(),
+            output_file: output_file.as_ref().map(|p| p.display().to_string()),
+        };
 
-            // Check if OS failed to exec
-            if cr.exit_status == -1 {
-                tr.did_pass = false;
-                tr.command_history.push(cr);
-                return tr;
-            }
+        let mut command = self.resolve_command(step, &magic);
 
-            let stdout = cr.stdout.clone();
-            let stderr = cr.stderr.clone();
-            let step_time = (cr.time * 10000.0).round() / 10000.0;
+        // In memcheck mode, wrap the last step with valgrind
+        if self.memcheck && last_step && !self.wrap_valgrind(&mut command) {
+            return ControlFlow::Break(TestResult::fail(
+                test, state.command_history,
+                Some("memcheck: valgrind not found".to_string()),
+            ));
+        }
 
-            // Check reserved exit codes (e.g., valgrind)
-            if RESERVED_EXIT_CODES.contains(&cr.exit_status) {
-                if cr.exit_status == VALGRIND_EXIT_CODE {
-                    tr.memory_leak = true;
-                }
-            }
+        let cr = self.run_command(&command, &input_stream);
+
+        if cr.timed_out {
+            state.command_history.push(cr);
+            return ControlFlow::Break(TestResult::timeout(
+                test, state.command_history, &step.name, self.timeout,
+            ));
+        }
+
+        if cr.exit_status == -1 {
+            state.command_history.push(cr);
+            return ControlFlow::Break(TestResult::fail(
+                test, state.command_history, None,
+            ));
+        }
+
+        let stdout = cr.stdout.clone();
+        let stderr = cr.stderr.clone();
+        let step_time = (cr.time * 10000.0).round() / 10000.0;
+
+        let exit_status = cr.exit_status;
+
+        if exit_status == VALGRIND_EXIT_CODE {
+            state.memory_leak = true;
+        }
+
+        state.command_history.push(cr);
 
-            if cr.exit_status != 0
-                && !RESERVED_EXIT_CODES.contains(&cr.exit_status)
-            {
-                tr.gen_output = Some(stderr.clone());
-                tr.failing_step = Some(step.name.clone());
-                tr.error_test = true;
-
-                if step.allow_error {
-                    self.handle_error_test(&mut tr, &stderr, &expected);
-                    tr.command_history.push(cr);
-                    return tr;
-                } else {
-                    tr.did_pass = false;
-                    tr.command_history.push(cr);
-                    return tr;
+        if exit_status != 0 && !RESERVED_EXIT_CODES.contains(&exit_status) {
+            let did_pass = step.allow_error
+                && self.check_error_test(&stderr, test.get_expected_out());
+            return ControlFlow::Break(TestResult::error(
+                test, state.command_history, stderr,
+                &step.name, did_pass, state.memory_leak,
+            ));
+        }
+
+        if last_step {
+            let final_output = match output_file {
+                Some(ref p) if p.exists() => fs::read(p).unwrap_or_default(),
+                Some(_) => return ControlFlow::Break(TestResult::finished(
+                    test, state.command_history, Vec::new(), step_time, state.memory_leak,
+                )),
+                None => stdout,
+            };
+            return ControlFlow::Break(TestResult::finished(
+                test, state.command_history, final_output, step_time, state.memory_leak,
+            ));
+        }
+
+        // Not the last step — continue the pipeline
+        state.input_file = output_file.unwrap_or_else(|| {
+            match make_tmp_file(&stdout) {
+                Some((path, handle)) => {
+                    state.tmp_handles.push(handle);
+                    path
                 }
-            } else if last_step {
-                let final_stdout = if let Some(ref out_path) = output_file {
-                    if !out_path.exists() {
-                        tr.command_history.push(cr);
-                        tr.did_pass = false;
-                        return tr;
-                    }
-                    fs::read(out_path).unwrap_or_default()
-                } else {
-                    stdout
-                };
-
-                tr.time = Some(step_time);
-                tr.gen_output = Some(final_stdout.clone());
-                tr.did_pass = final_stdout == expected;
-                tr.command_history.push(cr);
-                return tr;
-            } else {
-                // Set up next step's input
-                input_file = output_file.unwrap_or_else(|| {
-                    match make_tmp_file(&stdout) {
-                        Some((path, handle)) => {
-                            _tmp_handles.push(handle);
-                            path
-                        }
-                        None => PathBuf::new(),
-                    }
-                });
-                tr.command_history.push(cr);
+                None => PathBuf::new(),
             }
-        }
+        });
+        ControlFlow::Continue(state)
+    }
 
-        // Unreachable for well-defined toolchains
-        panic!("Toolchain reached undefined conditions during execution.");
+    /// Prepend valgrind flags to command. Returns false if valgrind is not installed.
+    fn wrap_valgrind(&self, command: &mut ResolvedCommand) -> bool {
+        let ok = process::Command::new(VALGRIND_BIN)
+            .arg("--version")
+            .stdout(process::Stdio::null())
+            .stderr(process::Stdio::null())
+            .status()
+            .is_ok_and(|s| s.success());
+        if ok {
+            let mut wrapped = vec![
+                VALGRIND_BIN.to_string(),
+                "--leak-check=full".to_string(),
+                format!("--error-exitcode={VALGRIND_EXIT_CODE}"),
+                "--log-file=/dev/null".to_string(),
+            ];
+            wrapped.append(&mut command.args);
+            command.args = wrapped;
+        }
+        ok
     }
 
     fn run_command(&self, command: &ResolvedCommand, stdin: &[u8]) -> CommandResult {
@@ -391,25 +456,18 @@ impl<'a> ToolChainRunner<'a> {
         }
     }
 
-    fn handle_error_test(&self, tr: &mut TestResult, produced: &[u8], expected: &[u8]) {
+    fn check_error_test(&self, produced: &[u8], expected: &[u8]) -> bool {
         let produced_str = match std::str::from_utf8(produced) {
-            Ok(s) => s.trim().to_string(),
-            Err(_) => {
-                tr.did_pass = false;
-                return;
-            }
+            Ok(s) => s.trim(),
+            Err(_) => return false,
         };
         let expected_str = match std::str::from_utf8(expected) {
-            Ok(s) => s.trim().to_string(),
-            Err(_) => {
-                tr.did_pass = false;
-                return;
-            }
+            Ok(s) => s.trim(),
+            Err(_) => return false,
         };
 
         if produced_str.is_empty() || expected_str.is_empty() {
-            tr.did_pass = false;
-            return;
+            return false;
         }
 
         let rt_error = RUNTIME_ERRORS
@@ -424,32 +482,30 @@ impl<'a> ToolChainRunner<'a> {
             if let Some(rt_err) = rt_error {
                 let pattern = format!(r"{}(\s+on\s+Line\s+\d+)?(:.+)?", rt_err);
                 let re = Regex::new(&pattern).unwrap();
-                tr.did_pass = re.is_match(&produced_str) && re.is_match(&expected_str);
+                re.is_match(produced_str) && re.is_match(expected_str)
             } else {
-                tr.did_pass = false;
+                false
             }
         } else {
-            let prod_error = ERROR_KIND_RE.captures(&produced_str);
-            let exp_error = ERROR_KIND_RE.captures(&expected_str);
-            let prod_line = ERROR_LINE_RE.captures(&produced_str);
-            let exp_line = ERROR_LINE_RE.captures(&expected_str);
+            let prod_error = ERROR_KIND_RE.captures(produced_str);
+            let exp_error = ERROR_KIND_RE.captures(expected_str);
+            let prod_line = ERROR_LINE_RE.captures(produced_str);
+            let exp_line = ERROR_LINE_RE.captures(expected_str);
 
             // MainError hack
             if let (Some(ref pe), Some(ref ee)) = (&prod_error, &exp_error) {
                 if pe.get(1).map(|m| m.as_str()) == Some("MainError")
                     && ee.get(1).map(|m| m.as_str()) == Some("MainError")
                 {
-                    tr.did_pass = true;
-                    return;
+                    return true;
                 }
             }
 
-            if prod_error.is_some() && exp_error.is_some() && prod_line.is_some() && exp_line.is_some()
-            {
-                tr.did_pass = prod_line.unwrap().get(1).map(|m| m.as_str())
-                    == exp_line.unwrap().get(1).map(|m| m.as_str());
-            } else {
-                tr.did_pass = false;
+            match (prod_error, exp_error, prod_line, exp_line) {
+                (Some(_), Some(_), Some(pl), Some(el)) => {
+                    pl.get(1).map(|m| m.as_str()) == el.get(1).map(|m| m.as_str())
+                }
+                _ => false,
             }
         }
     }

From a31af62e6247d68787ef70af3ab769a29f003c56 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 16:52:26 -0700
Subject: [PATCH 18/45] refactor: simplify toolchain config json language

---
 src/runner.rs                                | 42 +++++++-------
 src/toolchain.rs                             | 58 ++++++++++++--------
 src/util.rs                                  | 11 ++++
 tests/configs/ConfigGrade.json               | 35 ++++++------
 tests/configs/VCalcCompileConfig.json        | 41 ++++++++------
 tests/configs/ValgrindConfig.json            | 30 +++++-----
 tests/configs/ValgrindGazpreaConfig.json     | 52 ++++++++++--------
 tests/configs/catConfig.json                 | 21 +++----
 tests/configs/catConfigDarwin.json           | 21 +++----
 tests/configs/gazbolt-configs/gazprea.json   | 12 ++--
 tests/configs/gazbolt-configs/generator.json | 22 ++++----
 tests/configs/gazbolt-configs/scalc.json     |  9 +--
 tests/configs/gazbolt-configs/vcalc.json     | 18 +++---
 tests/configs/gccFailConfig.json             | 14 ++---
 tests/configs/gccMemcheckConfig.json         | 14 ++---
 tests/configs/gccMixConfig.json              | 14 ++---
 tests/configs/gccPassConfig.json             | 14 ++---
 tests/configs/invalidDirConfig.json          | 14 ++---
 tests/configs/invalidExeConfig.json          | 14 ++---
 tests/configs/invalidMultiConfig.json        | 14 ++---
 tests/configs/invalidRutnime.json            | 14 ++---
 tests/configs/perfConfig.json                | 14 ++---
 tests/configs/runtimeConfigDarwin.json       | 30 +++++-----
 tests/configs/runtimeConfigLinux.json        | 30 +++++-----
 tests/configs/serverConfig.json              | 15 ++---
 25 files changed, 313 insertions(+), 260 deletions(-)

diff --git a/src/runner.rs b/src/runner.rs
index 9cf058a..9b293b7 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -20,7 +20,7 @@ static ERROR_LINE_RE: LazyLock<Regex> =
 use crate::config::Executable;
 use crate::testfile::TestFile;
 use crate::toolchain::{Step, ToolChain};
-use crate::util::make_tmp_file;
+use crate::util::{make_tmp_file, make_empty_tmp_file};
 
 /// Reserved exit code for valgrind leak detection.
 pub const VALGRIND_EXIT_CODE: i32 = 111;
@@ -233,13 +233,19 @@ impl<'a> ToolChainRunner<'a> {
             Vec::new()
         };
 
-        let output_file = self.resolve_output_file(step);
+        let output_resolved = self.resolve_output_file(step);
+        let output_path = output_resolved.as_ref().map(|(p, _)| p.clone());
         let magic = MagicParams {
             exe_path: exe.exe_path.display().to_string(),
             input_file: state.input_file.display().to_string(),
-            output_file: output_file.as_ref().map(|p| p.display().to_string()),
+            output_file: output_path.as_ref().map(|p| p.display().to_string()),
         };
 
+        // Keep temp handle alive for the duration of the step
+        if let Some((_, handle)) = output_resolved {
+            state.tmp_handles.push(handle);
+        }
+
         let mut command = self.resolve_command(step, &magic);
 
         // In memcheck mode, wrap the last step with valgrind
@@ -255,7 +261,7 @@ impl<'a> ToolChainRunner<'a> {
         if cr.timed_out {
             state.command_history.push(cr);
             return ControlFlow::Break(TestResult::timeout(
-                test, state.command_history, &step.name, self.timeout,
+                test, state.command_history, &step.display_name(exe), self.timeout,
             ));
         }
 
@@ -283,12 +289,12 @@ impl<'a> ToolChainRunner<'a> {
                 && self.check_error_test(&stderr, test.get_expected_out());
             return ControlFlow::Break(TestResult::error(
                 test, state.command_history, stderr,
-                &step.name, did_pass, state.memory_leak,
+                &step.display_name(exe), did_pass, state.memory_leak,
             ));
         }
 
         if last_step {
-            let final_output = match output_file {
+            let final_output = match output_path {
                 Some(ref p) if p.exists() => fs::read(p).unwrap_or_default(),
                 Some(_) => return ControlFlow::Break(TestResult::finished(
                     test, state.command_history, Vec::new(), step_time, state.memory_leak,
@@ -301,7 +307,7 @@ impl<'a> ToolChainRunner<'a> {
         }
 
         // Not the last step — continue the pipeline
-        state.input_file = output_file.unwrap_or_else(|| {
+        state.input_file = output_path.unwrap_or_else(|| {
             match make_tmp_file(&stdout) {
                 Some((path, handle)) => {
                     state.tmp_handles.push(handle);
@@ -393,24 +399,22 @@ impl<'a> ToolChainRunner<'a> {
         cr
     }
 
-    fn resolve_output_file(&self, step: &Step) -> Option<PathBuf> {
-        step.output.as_ref().map(|output| {
-            if output.is_absolute() {
-                output.clone()
-            } else {
-                env::current_dir().unwrap_or_default().join(output)
-            }
-        })
+    fn resolve_output_file(&self, step: &Step) -> Option<(PathBuf, tempfile::TempPath)> {
+        if step.args.iter().any(|a| a.contains("$OUTPUT")) {
+            make_empty_tmp_file()
+        } else {
+            None
+        }
     }
 
     fn resolve_command(&self, step: &Step, params: &MagicParams) -> ResolvedCommand {
-        let mut args = vec![step.exe_path.display().to_string()];
-        args.extend(step.arguments.iter().cloned());
+        let mut args = vec![step.exe_raw.clone()];
+        args.extend(step.args.iter().cloned());
         let mut command = ResolvedCommand::new(args);
         self.replace_magic_args(&mut command, params);
         self.replace_env_vars(&mut command);
-        // Make exe path absolute if relative
-        if !command.args.is_empty() && !Path::new(&command.args[0]).is_absolute() {
+        // Only resolve paths containing '/' — bare names (e.g. "gcc") use $PATH lookup
+        if !command.args.is_empty() && command.args[0].contains('/') && !Path::new(&command.args[0]).is_absolute() {
             if let Ok(abs) = fs::canonicalize(&command.args[0]) {
                 command.args[0] = abs.to_string_lossy().into_owned();
             } else if let Ok(cwd) = env::current_dir() {
diff --git a/src/toolchain.rs b/src/toolchain.rs
index 4b96e1e..0962b07 100644
--- a/src/toolchain.rs
+++ b/src/toolchain.rs
@@ -1,14 +1,13 @@
-use std::path::PathBuf;
+use std::path::Path;
 
+use crate::config::Executable;
 use crate::error::{DragonError, Validate};
 
 /// A single step in a toolchain (e.g., compile, link, run).
 #[derive(Debug, Clone)]
 pub struct Step {
-    pub name: String,
-    pub exe_path: PathBuf,
-    pub arguments: Vec<String>,
-    pub output: Option<PathBuf>,
+    pub exe_raw: String,
+    pub args: Vec<String>,
     pub allow_error: bool,
     pub uses_ins: bool,
     pub uses_runtime: bool,
@@ -17,37 +16,50 @@ pub struct Step {
 impl Step {
     pub fn from_json(data: &serde_json::Value) -> Self {
         Self {
-            name: data["stepName"].as_str().unwrap_or("").into(),
-            exe_path: PathBuf::from(data["executablePath"].as_str().unwrap_or("")),
-            arguments: data
-                .get("arguments")
+            exe_raw: data["exe"].as_str().unwrap_or("").into(),
+            args: data
+                .get("args")
                 .and_then(|v| v.as_array())
                 .map(|arr| arr.iter().filter_map(|v| v.as_str().map(Into::into)).collect())
                 .unwrap_or_default(),
-            output: data.get("output").and_then(|v| v.as_str()).map(PathBuf::from),
             allow_error: data["allowError"].as_bool().unwrap_or(false),
             uses_ins: data["usesInStr"].as_bool().unwrap_or(false),
             uses_runtime: data["usesRuntime"].as_bool().unwrap_or(false),
         }
     }
+
+    /// Derive a human-readable step name from the raw exe string and the executable.
+    pub fn display_name(&self, exe: &Executable) -> String {
+        match self.exe_raw.as_str() {
+            "$EXE" => exe.id.clone(),
+            "$INPUT" => "run".to_string(),
+            other => {
+                // Use filename component for paths, bare name as-is
+                Path::new(other)
+                    .file_name()
+                    .unwrap_or(other.as_ref())
+                    .to_string_lossy()
+                    .into_owned()
+            }
+        }
+    }
 }
 
 impl Validate for Step {
     fn validate(&self) -> Vec<DragonError> {
         let mut errors = Vec::new();
-        if self.name.is_empty() {
-            errors.push(DragonError::Config(format!(
-                "Missing required field 'stepName' in Step {}", self.name
-            )));
-        }
-        if self.exe_path.as_os_str().is_empty() {
-            errors.push(DragonError::Config(format!(
-                "Missing required field 'exe_path' in Step: {}", self.name
-            )));
-        } else if !self.exe_path.to_string_lossy().starts_with('$') && !self.exe_path.exists() {
-            errors.push(DragonError::Config(format!(
-                "Cannot find exe_path '{}' in Step: {}", self.exe_path.display(), self.name
-            )));
+        if self.exe_raw.is_empty() {
+            errors.push(DragonError::Config(
+                "Missing required field 'exe' in Step".into(),
+            ));
+        } else if !self.exe_raw.starts_with('$') && self.exe_raw.contains('/') {
+            // Only check existence for paths (containing /), not bare names resolved via $PATH
+            if !Path::new(&self.exe_raw).exists() {
+                errors.push(DragonError::Config(format!(
+                    "Cannot find exe '{}' in Step",
+                    self.exe_raw
+                )));
+            }
         }
         errors
     }
diff --git a/src/util.rs b/src/util.rs
index b239048..5be4780 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -38,6 +38,17 @@ pub fn make_tmp_file(content: &[u8]) -> Option<(PathBuf, tempfile::TempPath)> {
     Some((path_buf, path))
 }
 
+/// Create an empty temporary file with execute permissions.
+/// Returns (path_buf, handle). Caller must keep the handle alive — deleted on drop.
+pub fn make_empty_tmp_file() -> Option<(PathBuf, tempfile::TempPath)> {
+    let tmp = tempfile::NamedTempFile::new().ok()?;
+    let path = tmp.into_temp_path();
+    let perms = fs::Permissions::from_mode(0o700);
+    fs::set_permissions(&path, perms).ok()?;
+    let path_buf = path.to_path_buf();
+    Some((path_buf, path))
+}
+
 /// Truncate bytes in the middle if they exceed max_bytes.
 pub fn truncated_bytes(data: &[u8], max_bytes: usize) -> Vec<u8> {
     if data.len() <= max_bytes {
diff --git a/tests/configs/ConfigGrade.json b/tests/configs/ConfigGrade.json
index 8dd0b96..c24148c 100644
--- a/tests/configs/ConfigGrade.json
+++ b/tests/configs/ConfigGrade.json
@@ -10,35 +10,36 @@
   "toolchains": {
     "LLVM": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "test.o",
-        "allowError": true 
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
+        "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
     ],
     "LLVM-opt": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-O3", "-o", "$OUTPUT"],
-        "output": "test.o",
-        "allowError": true 
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-O3",
+          "-o",
+          "$OUTPUT"
+        ],
+        "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
-    ] 
+    ]
   }
 }
diff --git a/tests/configs/VCalcCompileConfig.json b/tests/configs/VCalcCompileConfig.json
index efd665b..5cddfb4 100644
--- a/tests/configs/VCalcCompileConfig.json
+++ b/tests/configs/VCalcCompileConfig.json
@@ -10,30 +10,37 @@
   "toolchains": {
     "vcalc-llc": [
       {
-        "stepName": "vcalc",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "$OUTPUT"],
-        "output": "vcalc.ll"
-      }, 
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "$OUTPUT"
+        ]
+      },
       {
-        "stepName": "llc",
-        "executablePath": "/home/justin/install/llvm/llvm-18/bin/llc",
-        "arguments": ["-filetype=obj", "-relocation-model=pic", "$INPUT", "-o", "$OUTPUT"],
-        "output": "vcalc.o"
+        "exe": "/home/justin/install/llvm/llvm-18/bin/llc",
+        "args": [
+          "-filetype=obj",
+          "-relocation-model=pic",
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ]
       },
       {
-        "stepName": "clang",
-        "executablePath": "/usr/bin/clang",
-        "arguments": ["$INPUT", "-o", "$OUTPUT", "-L$RT_PATH", "-l$RT_LIB"],
-        "output": "vcalc"
+        "exe": "/usr/bin/clang",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT",
+          "-L$RT_PATH",
+          "-l$RT_LIB"
+        ]
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "usesRuntime": true
       }
-    ] 
+    ]
   }
 }
diff --git a/tests/configs/ValgrindConfig.json b/tests/configs/ValgrindConfig.json
index 4748213..4d570d8 100644
--- a/tests/configs/ValgrindConfig.json
+++ b/tests/configs/ValgrindConfig.json
@@ -7,25 +7,25 @@
   "toolchains": {
     "LLVM": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "prog.o",
-        "allowError": true 
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
+        "allowError": true
       },
       {
-	"stepName": "valgrind",
-        "executablePath": "/usr/bin/valgrind",
-        "arguments": [
-	  "--leak-check=full",
-	  "--error-exitcode=111",
-	  "--log-file=/dev/null",
-	  "$INPUT"
-	],
+        "exe": "/usr/bin/valgrind",
+        "args": [
+          "--leak-check=full",
+          "--error-exitcode=111",
+          "--log-file=/dev/null",
+          "$INPUT"
+        ],
         "usesInStr": true,
         "allowError": true
       }
-    ] 
+    ]
   }
 }
-
diff --git a/tests/configs/ValgrindGazpreaConfig.json b/tests/configs/ValgrindGazpreaConfig.json
index 8630c3b..e601deb 100644
--- a/tests/configs/ValgrindGazpreaConfig.json
+++ b/tests/configs/ValgrindGazpreaConfig.json
@@ -4,39 +4,48 @@
     "<team_name>": "<path_to_gazc>"
   },
   "runtimes": {
-    "<team_name>":"<path_to_libgazrt.so>"
+    "<team_name>": "<path_to_libgazrt.so>"
   },
   "solutionExecutable": "solution",
   "toolchains": {
     "gazprea-llc": [
       {
-        "stepName": "gazprea",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "$OUTPUT"],
-        "output": "gaz.ll",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "llc",
-        "executablePath": "/cshome/cmput415/415-resources/llvm-project/build/bin/llc",
-        "arguments": ["-filetype=obj", "-relocation-model=pic", "$INPUT", "-o", "$OUTPUT"],
-        "output": "gaz.o"
+        "exe": "/cshome/cmput415/415-resources/llvm-project/build/bin/llc",
+        "args": [
+          "-filetype=obj",
+          "-relocation-model=pic",
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ]
       },
       {
-        "stepName": "clang",
-        "executablePath": "/usr/bin/clang",
-        "arguments": ["$INPUT", "-o", "$OUTPUT", "-L$RT_PATH", "-l$RT_LIB", "-lm"],
-        "output": "gaz"
+        "exe": "/usr/bin/clang",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT",
+          "-L$RT_PATH",
+          "-l$RT_LIB",
+          "-lm"
+        ]
       },
       {
-          "stepName": "valgrind",
-        "executablePath": "/usr/bin/valgrind",
-        "arguments": [
-                "--leak-check=full",
-                "--error-exitcode=111",
-                "--log-file=/dev/null",
-                "$INPUT"
-              ],
+        "exe": "/usr/bin/valgrind",
+        "args": [
+          "--leak-check=full",
+          "--error-exitcode=111",
+          "--log-file=/dev/null",
+          "$INPUT"
+        ],
         "usesInStr": true,
         "usesRuntime": true,
         "allowError": true
@@ -44,4 +53,3 @@
     ]
   }
 }
-
diff --git a/tests/configs/catConfig.json b/tests/configs/catConfig.json
index d38285a..720da10 100644
--- a/tests/configs/catConfig.json
+++ b/tests/configs/catConfig.json
@@ -6,21 +6,22 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "cat-object",
-        "executablePath": "/usr/bin/cat",
-        "arguments": ["$INPUT"]
+        "exe": "/usr/bin/cat",
+        "args": [
+          "$INPUT"
+        ]
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/catConfigDarwin.json b/tests/configs/catConfigDarwin.json
index 5de46b8..1902736 100644
--- a/tests/configs/catConfigDarwin.json
+++ b/tests/configs/catConfigDarwin.json
@@ -6,21 +6,22 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "cat-object",
-        "executablePath": "/bin/cat",
-        "arguments": ["$INPUT"]
+        "exe": "/bin/cat",
+        "args": [
+          "$INPUT"
+        ]
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/gazbolt-configs/gazprea.json b/tests/configs/gazbolt-configs/gazprea.json
index 2c602e0..19809f8 100644
--- a/tests/configs/gazbolt-configs/gazprea.json
+++ b/tests/configs/gazbolt-configs/gazprea.json
@@ -9,12 +9,14 @@
   "toolchains": {
     "gazprea-llc": [
       {
-        "stepName": "gazprea",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "$OUTPUT", "--interp"],
-        "output": "interp.out",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "$OUTPUT",
+          "--interp"
+        ],
         "allowError": true
       }
-    ] 
+    ]
   }
 }
diff --git a/tests/configs/gazbolt-configs/generator.json b/tests/configs/gazbolt-configs/generator.json
index 7cbc8d3..3d6ab1e 100644
--- a/tests/configs/gazbolt-configs/generator.json
+++ b/tests/configs/gazbolt-configs/generator.json
@@ -4,16 +4,14 @@
     "generator": "$GENERATOR_PATH"
   },
   "toolchains": {
-      "interpreter": [
-        {
-          "stepName": "generator-interpreter",
-          "executablePath": "$EXE",
-          "arguments": [
-            "$INPUT",
-            "$OUTPUT"
-            ],
-          "output": "generator.out"
-        }
-      ]
-    }
+    "interpreter": [
+      {
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "$OUTPUT"
+        ]
+      }
+    ]
+  }
 }
diff --git a/tests/configs/gazbolt-configs/scalc.json b/tests/configs/gazbolt-configs/scalc.json
index b6f9a67..17a3260 100644
--- a/tests/configs/gazbolt-configs/scalc.json
+++ b/tests/configs/gazbolt-configs/scalc.json
@@ -6,16 +6,13 @@
   "toolchains": {
     "interpreter": [
       {
-        "stepName": "scalc-interpreter",
-        "executablePath": "$EXE",
-        "arguments": [
+        "exe": "$EXE",
+        "args": [
           "interpreter",
           "$INPUT",
           "$OUTPUT"
-        ],
-        "output": "scalc.out"
+        ]
       }
     ]
   }
 }
-
diff --git a/tests/configs/gazbolt-configs/vcalc.json b/tests/configs/gazbolt-configs/vcalc.json
index d071c0f..7f8abef 100644
--- a/tests/configs/gazbolt-configs/vcalc.json
+++ b/tests/configs/gazbolt-configs/vcalc.json
@@ -9,20 +9,22 @@
   "toolchains": {
     "vcalc-lli": [
       {
-        "stepName": "vcalc",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "$OUTPUT"],
-        "output": "/tmp/vcalc.ll",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "lli",
-        "executablePath": "/path/to/lli",
-        "arguments": [ "$INPUT" ],
+        "exe": "/path/to/lli",
+        "args": [
+          "$INPUT"
+        ],
         "usesInStr": true,
         "usesRuntime": true,
         "allowError": true
       }
-    ] 
+    ]
   }
 }
diff --git a/tests/configs/gccFailConfig.json b/tests/configs/gccFailConfig.json
index 816a8d6..561a73a 100644
--- a/tests/configs/gccFailConfig.json
+++ b/tests/configs/gccFailConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/gccMemcheckConfig.json b/tests/configs/gccMemcheckConfig.json
index ebaad5a..121260b 100644
--- a/tests/configs/gccMemcheckConfig.json
+++ b/tests/configs/gccMemcheckConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/gccMixConfig.json b/tests/configs/gccMixConfig.json
index 9911087..a66da59 100644
--- a/tests/configs/gccMixConfig.json
+++ b/tests/configs/gccMixConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/gccPassConfig.json b/tests/configs/gccPassConfig.json
index 5ae0a5b..e3aa4ff 100644
--- a/tests/configs/gccPassConfig.json
+++ b/tests/configs/gccPassConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/invalidDirConfig.json b/tests/configs/invalidDirConfig.json
index e224c49..c2db996 100644
--- a/tests/configs/invalidDirConfig.json
+++ b/tests/configs/invalidDirConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/invalidExeConfig.json b/tests/configs/invalidExeConfig.json
index 69f5ec5..e60092c 100644
--- a/tests/configs/invalidExeConfig.json
+++ b/tests/configs/invalidExeConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/invalidMultiConfig.json b/tests/configs/invalidMultiConfig.json
index fde97a3..bd4ac6e 100644
--- a/tests/configs/invalidMultiConfig.json
+++ b/tests/configs/invalidMultiConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "/this/dne/bin",
-        "arguments": [],
+        "exe": "/this/dne/bin",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/invalidRutnime.json b/tests/configs/invalidRutnime.json
index 613324d..0491f6c 100644
--- a/tests/configs/invalidRutnime.json
+++ b/tests/configs/invalidRutnime.json
@@ -9,16 +9,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/perfConfig.json b/tests/configs/perfConfig.json
index 13b8c1e..a578d6c 100644
--- a/tests/configs/perfConfig.json
+++ b/tests/configs/perfConfig.json
@@ -8,16 +8,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/runtimeConfigDarwin.json b/tests/configs/runtimeConfigDarwin.json
index ca38816..2787ae1 100644
--- a/tests/configs/runtimeConfigDarwin.json
+++ b/tests/configs/runtimeConfigDarwin.json
@@ -9,26 +9,30 @@
   "toolchains": {
     "clang-runtime": [
       {
-        "stepName": "clang",
-        "executablePath": "$EXE",
-        "arguments": ["-c", "$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/prog.o"
+        "exe": "$EXE",
+        "args": [
+          "-c",
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ]
       },
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT", "-L$RT_PATH", "-l$RT_LIB"],
-        "output": "/tmp/prog"
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT",
+          "-L$RT_PATH",
+          "-l$RT_LIB"
+        ]
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true,
         "usesRuntime": true
       }
     ]
-  } 
+  }
 }
-  
\ No newline at end of file
diff --git a/tests/configs/runtimeConfigLinux.json b/tests/configs/runtimeConfigLinux.json
index d16054e..b9817f4 100644
--- a/tests/configs/runtimeConfigLinux.json
+++ b/tests/configs/runtimeConfigLinux.json
@@ -9,26 +9,30 @@
   "toolchains": {
     "clang-runtime": [
       {
-        "stepName": "clang",
-        "executablePath": "$EXE",
-        "arguments": ["-c", "$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/prog.o"
+        "exe": "$EXE",
+        "args": [
+          "-c",
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ]
       },
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT", "-L$RT_PATH", "-l$RT_LIB"],
-        "output": "/tmp/prog"
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT",
+          "-L$RT_PATH",
+          "-l$RT_LIB"
+        ]
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true,
         "usesRuntime": true
       }
     ]
-  } 
+  }
 }
-  
diff --git a/tests/configs/serverConfig.json b/tests/configs/serverConfig.json
index ea93981..12c83c4 100644
--- a/tests/configs/serverConfig.json
+++ b/tests/configs/serverConfig.json
@@ -7,16 +7,17 @@
   "toolchains": {
     "gcc-explorer": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["-xc", "$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "-xc",
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }

From d833a4d7a0c9490a2a0df1f3713e82b2780a55a0 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 16:55:53 -0700
Subject: [PATCH 19/45] feat: add serve mode http api for remote test execution

---
 Cargo.lock    | 534 +++++++++++++++++++++++++++++++++++++++++++++++++-
 Cargo.toml    |   4 +
 src/cli.rs    |  29 ++-
 src/lib.rs    |   1 +
 src/main.rs   |  20 ++
 src/server.rs | 284 +++++++++++++++++++++++++++
 6 files changed, 860 insertions(+), 12 deletions(-)
 create mode 100644 src/server.rs

diff --git a/Cargo.lock b/Cargo.lock
index 9d821f6..2c83e38 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -67,12 +67,82 @@ version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
 
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
+[[package]]
+name = "axum"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
+dependencies = [
+ "axum-core",
+ "bytes",
+ "form_urlencoded",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "itoa",
+ "matchit",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "serde_core",
+ "serde_json",
+ "serde_path_to_error",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
 [[package]]
 name = "bitflags"
 version = "2.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
 
+[[package]]
+name = "bytes"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+
 [[package]]
 name = "cfg-if"
 version = "1.0.4"
@@ -160,6 +230,8 @@ dependencies = [
 name = "dragon-runner-rs"
 version = "0.1.0"
 dependencies = [
+ "axum",
+ "base64",
  "clap",
  "colored",
  "csv",
@@ -169,6 +241,8 @@ dependencies = [
  "serde_json",
  "tempfile",
  "thiserror",
+ "tokio",
+ "tower-http",
  "wait-timeout",
 ]
 
@@ -200,6 +274,48 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
+[[package]]
+name = "form_urlencoded"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
+dependencies = [
+ "futures-core",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
+
+[[package]]
+name = "futures-task"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
+
+[[package]]
+name = "futures-util"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "pin-project-lite",
+ "slab",
+]
+
 [[package]]
 name = "getrandom"
 version = "0.4.1"
@@ -240,6 +356,87 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
+[[package]]
+name = "http"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
+dependencies = [
+ "bytes",
+ "itoa",
+]
+
+[[package]]
+name = "http-body"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
+dependencies = [
+ "bytes",
+ "http",
+]
+
+[[package]]
+name = "http-body-util"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "httparse"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
+[[package]]
+name = "hyper"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "http",
+ "http-body",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "pin-utils",
+ "smallvec",
+ "tokio",
+]
+
+[[package]]
+name = "hyper-util"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
+dependencies = [
+ "bytes",
+ "http",
+ "http-body",
+ "hyper",
+ "pin-project-lite",
+ "tokio",
+ "tower-service",
+]
+
 [[package]]
 name = "id-arena"
 version = "2.3.0"
@@ -294,18 +491,50 @@ version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
 
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
 [[package]]
 name = "log"
 version = "0.4.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 
+[[package]]
+name = "matchit"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
+
 [[package]]
 name = "memchr"
 version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
 
+[[package]]
+name = "mime"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+
+[[package]]
+name = "mio"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -318,6 +547,47 @@ version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
 [[package]]
 name = "prettyplease"
 version = "0.2.37"
@@ -352,6 +622,15 @@ version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
 
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
 [[package]]
 name = "regex"
 version = "1.12.3"
@@ -400,6 +679,12 @@ version = "1.0.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
 
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
 [[package]]
 name = "semver"
 version = "1.0.27"
@@ -449,6 +734,61 @@ dependencies = [
  "zmij",
 ]
 
+[[package]]
+name = "serde_path_to_error"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
+dependencies = [
+ "itoa",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
+dependencies = [
+ "errno",
+ "libc",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
 [[package]]
 name = "strsim"
 version = "0.11.1"
@@ -466,6 +806,12 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "sync_wrapper"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
+
 [[package]]
 name = "tempfile"
 version = "3.25.0"
@@ -499,6 +845,96 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "tokio"
+version = "1.49.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tower"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tokio",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
+dependencies = [
+ "bitflags",
+ "bytes",
+ "http",
+ "pin-project-lite",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
+
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "log",
+ "pin-project-lite",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+]
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.24"
@@ -526,6 +962,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
 [[package]]
 name = "wasip2"
 version = "1.0.2+wasi-0.2.9"
@@ -590,7 +1032,16 @@ version = "0.59.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
 dependencies = [
- "windows-targets",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.5",
 ]
 
 [[package]]
@@ -608,14 +1059,31 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
 dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_gnullvm",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
 ]
 
 [[package]]
@@ -624,48 +1092,96 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
 [[package]]
 name = "windows_i686_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
 [[package]]
 name = "wit-bindgen"
 version = "0.51.0"
diff --git a/Cargo.toml b/Cargo.toml
index 31b6034..5461fa5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,3 +18,7 @@ colored = "2"
 csv = "1"
 regex = "1"
 wait-timeout = "0.2"
+axum = "0.8"
+tokio = { version = "1", features = ["full"] }
+tower-http = { version = "0.6", features = ["cors"] }
+base64 = "0.22"
diff --git a/src/cli.rs b/src/cli.rs
index 9285a86..327cbff 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -110,12 +110,32 @@ pub enum Commands {
         #[arg(trailing_var_arg = true, allow_hyphen_values = true)]
         args: Vec<String>,
     },
+    /// Start an HTTP server exposing the test runner API
+    Serve {
+        /// Path to the JSON configuration file
+        config_file: PathBuf,
+        /// Address to bind the server to
+        #[arg(long, default_value = "127.0.0.1:3000")]
+        bind: String,
+        /// Timeout in seconds for each step
+        #[arg(long, default_value_t = 2.0)]
+        timeout: f64,
+        /// Maximum number of concurrent test executions
+        #[arg(long, default_value_t = 4)]
+        max_concurrent: usize,
+    },
 }
 
-/// Result of parsing CLI arguments — either a runner mode or a script invocation.
+/// Result of parsing CLI arguments — either a runner mode, a script invocation, or a server.
 pub enum CliAction {
     Run(RunnerArgs),
     Script(Vec<String>),
+    Serve {
+        config_file: PathBuf,
+        bind: String,
+        timeout: f64,
+        max_concurrent: usize,
+    },
 }
 
 /// Parse CLI arguments into a CliAction.
@@ -128,7 +148,7 @@ pub fn parse_cli_args() -> CliAction {
 
     // If the user omits the mode subcommand, default to "regular".
     // Detect this by checking whether the second arg is a known subcommand.
-    let known_modes = ["regular", "tournament", "perf", "memcheck", "script"];
+    let known_modes = ["regular", "tournament", "perf", "memcheck", "script", "serve"];
     let args_to_parse = if raw_args.len() >= 2 && !known_modes.contains(&raw_args[1].as_str()) && !raw_args[1].starts_with('-') {
         // Insert "regular" as the subcommand
         let mut patched = vec![raw_args[0].clone(), "regular".to_string()];
@@ -142,13 +162,16 @@ pub fn parse_cli_args() -> CliAction {
 
     match cli.command {
         Commands::Script { args } => CliAction::Script(args),
+        Commands::Serve { config_file, bind, timeout, max_concurrent } => {
+            CliAction::Serve { config_file, bind, timeout, max_concurrent }
+        }
         commands => {
             let (mode, mut args) = match commands {
                 Commands::Regular { flags } => (Mode::Regular, flags),
                 Commands::Tournament { flags } => (Mode::Tournament, flags),
                 Commands::Perf { flags } => (Mode::Perf, flags),
                 Commands::Memcheck { flags } => (Mode::Memcheck, flags),
-                Commands::Script { .. } => unreachable!(),
+                Commands::Script { .. } | Commands::Serve { .. } => unreachable!(),
             };
             args.mode = mode;
 
diff --git a/src/lib.rs b/src/lib.rs
index 9fca631..8f9276b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,4 +7,5 @@ pub mod runner;
 pub mod script;
 pub mod testfile;
 pub mod toolchain;
+pub mod server;
 pub mod util;
diff --git a/src/main.rs b/src/main.rs
index 3a51085..e0d5b4a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -5,6 +5,7 @@ use dragon_runner_rs::config::load_config;
 use dragon_runner_rs::harness::*;
 use dragon_runner_rs::log::log;
 use dragon_runner_rs::script::run_script;
+use dragon_runner_rs::server;
 
 fn main() {
     let action = parse_cli_args();
@@ -13,6 +14,25 @@ fn main() {
         CliAction::Script(args) => {
             std::process::exit(run_script(args));
         }
+        CliAction::Serve { config_file, bind, timeout, max_concurrent } => {
+            let config = match load_config(&config_file, None) {
+                Some(c) => c,
+                None => {
+                    log(0, 0, &format!("Could not open config file: {}", config_file.display()));
+                    std::process::exit(1);
+                }
+            };
+            if !config.errors.is_empty() {
+                log(0, 0, &format!("Found Config {} error(s):", config.errors.len()));
+                for e in &config.errors {
+                    log(0, 0, &format!("{e}").red().to_string());
+                }
+                std::process::exit(1);
+            }
+            let rt = tokio::runtime::Runtime::new().expect("failed to create tokio runtime");
+            rt.block_on(server::run_server(config, &bind, timeout, max_concurrent));
+            return;
+        }
         CliAction::Run(args) => args,
     };
 
diff --git a/src/server.rs b/src/server.rs
new file mode 100644
index 0000000..292f53f
--- /dev/null
+++ b/src/server.rs
@@ -0,0 +1,284 @@
+use std::sync::Arc;
+
+use axum::extract::State;
+use axum::http::StatusCode;
+use axum::routing::{get, post};
+use axum::{Json, Router};
+use base64::engine::general_purpose::STANDARD as B64;
+use base64::Engine;
+use serde::{Deserialize, Serialize};
+use tokio::sync::Semaphore;
+use tower_http::cors::CorsLayer;
+
+use crate::config::{Config, Executable};
+use crate::runner::ToolChainRunner;
+use crate::testfile::TestFile;
+use crate::toolchain::ToolChain;
+
+struct AppState {
+    config: Config,
+    timeout: f64,
+    run_semaphore: Semaphore,
+}
+
+// ---------------------------------------------------------------------------
+// Request / Response types
+// ---------------------------------------------------------------------------
+
+#[derive(Deserialize)]
+struct RunRequest {
+    toolchain: String,
+    executable: String,
+    code: String,
+    stdin: Option<String>,
+    expected_output: Option<String>,
+}
+
+#[derive(Serialize)]
+struct RunResponse {
+    passed: bool,
+    exit_status: i32,
+    stdout: String,
+    stderr: String,
+    time_secs: Option<f64>,
+    timed_out: bool,
+    error_test: bool,
+    failing_step: Option<String>,
+    steps: Vec<StepInfo>,
+}
+
+#[derive(Serialize)]
+struct StepInfo {
+    name: String,
+    exit_status: i32,
+    time_secs: f64,
+}
+
+#[derive(Serialize)]
+struct InfoResponse {
+    config_name: String,
+    toolchains: Vec<ToolchainInfo>,
+    executables: Vec<ExecutableInfo>,
+    packages: Vec<PackageInfo>,
+}
+
+#[derive(Serialize)]
+struct ToolchainInfo {
+    name: String,
+    num_steps: usize,
+}
+
+#[derive(Serialize)]
+struct ExecutableInfo {
+    id: String,
+    path: String,
+}
+
+#[derive(Serialize)]
+struct PackageInfo {
+    name: String,
+    num_tests: usize,
+}
+
+#[derive(Serialize)]
+struct ErrorResponse {
+    error: String,
+}
+
+fn error_json(status: StatusCode, msg: impl Into<String>) -> (StatusCode, Json<ErrorResponse>) {
+    (status, Json(ErrorResponse { error: msg.into() }))
+}
+
+// ---------------------------------------------------------------------------
+// Handlers
+// ---------------------------------------------------------------------------
+
+async fn health() -> &'static str {
+    "OK"
+}
+
+async fn info(State(state): State<Arc<AppState>>) -> Json<InfoResponse> {
+    let cfg = &state.config;
+    Json(InfoResponse {
+        config_name: cfg.name.clone(),
+        toolchains: cfg
+            .toolchains
+            .iter()
+            .map(|tc| ToolchainInfo {
+                name: tc.name.clone(),
+                num_steps: tc.len(),
+            })
+            .collect(),
+        executables: cfg
+            .executables
+            .iter()
+            .map(|e| ExecutableInfo {
+                id: e.id.clone(),
+                path: e.exe_path.display().to_string(),
+            })
+            .collect(),
+        packages: cfg
+            .packages
+            .iter()
+            .map(|p| PackageInfo {
+                name: p.name.clone(),
+                num_tests: p.n_tests,
+            })
+            .collect(),
+    })
+}
+
+async fn run(
+    State(state): State<Arc<AppState>>,
+    Json(req): Json<RunRequest>,
+) -> Result<Json<RunResponse>, (StatusCode, Json<ErrorResponse>)> {
+    // Look up toolchain by name
+    let tc: ToolChain = state
+        .config
+        .toolchains
+        .iter()
+        .find(|tc| tc.name == req.toolchain)
+        .cloned()
+        .ok_or_else(|| error_json(StatusCode::BAD_REQUEST, format!("unknown toolchain: {}", req.toolchain)))?;
+
+    // Look up executable by id
+    let exe: Executable = state
+        .config
+        .executables
+        .iter()
+        .find(|e| e.id == req.executable)
+        .cloned()
+        .ok_or_else(|| error_json(StatusCode::BAD_REQUEST, format!("unknown executable: {}", req.executable)))?;
+
+    // Decode source code
+    let code_bytes = B64.decode(&req.code)
+        .map_err(|e| error_json(StatusCode::BAD_REQUEST, format!("invalid base64 in code: {e}")))?;
+
+    // Decode optional stdin
+    let stdin_bytes = req.stdin
+        .as_ref()
+        .map(|s| B64.decode(s))
+        .transpose()
+        .map_err(|e| error_json(StatusCode::BAD_REQUEST, format!("invalid base64 in stdin: {e}")))?;
+
+    // Decode optional expected_output
+    let expected_bytes = req.expected_output
+        .as_ref()
+        .map(|s| B64.decode(s))
+        .transpose()
+        .map_err(|e| error_json(StatusCode::BAD_REQUEST, format!("invalid base64 in expected_output: {e}")))?;
+
+    // Acquire semaphore permit for backpressure
+    let _permit = state.run_semaphore.acquire().await
+        .map_err(|_| error_json(StatusCode::SERVICE_UNAVAILABLE, "server shutting down"))?;
+
+    let timeout = state.timeout;
+
+    // Run the toolchain in a blocking task
+    let result = tokio::task::spawn_blocking(move || {
+        // Write code to a temp file
+        let tmp = tempfile::Builder::new()
+            .suffix(".test")
+            .tempfile()
+            .map_err(|e| format!("failed to create temp file: {e}"))?;
+
+        {
+            use std::io::Write;
+            let mut f = tmp.as_file();
+            f.write_all(&code_bytes)
+                .map_err(|e| format!("failed to write temp file: {e}"))?;
+        }
+
+        // Build TestFile from the temp path
+        let mut test = TestFile::new(tmp.path());
+
+        // Override directives if provided in the request
+        if let Some(input) = stdin_bytes {
+            test.input_stream = Ok(input);
+        }
+        if let Some(expected) = expected_bytes {
+            test.expected_out = Ok(expected);
+        }
+
+        let test = Arc::new(test);
+
+        let runner = ToolChainRunner::new(&tc, timeout)
+            .with_env(exe.runtime_env());
+
+        let result = runner.run(&test, &exe);
+        Ok::<_, String>(result)
+    })
+    .await
+    .map_err(|e| error_json(StatusCode::INTERNAL_SERVER_ERROR, format!("task panicked: {e}")))?
+    .map_err(|e| error_json(StatusCode::INTERNAL_SERVER_ERROR, e))?;
+
+    // Build step info from command history
+    let steps: Vec<StepInfo> = result
+        .command_history
+        .iter()
+        .map(|cr| StepInfo {
+            name: cr.cmd.clone(),
+            exit_status: cr.exit_status,
+            time_secs: cr.time,
+        })
+        .collect();
+
+    let last_exit = result
+        .command_history
+        .last()
+        .map(|cr| cr.exit_status)
+        .unwrap_or(0);
+
+    let stdout_b64 = result
+        .gen_output
+        .as_deref()
+        .map(|b| B64.encode(b))
+        .unwrap_or_default();
+
+    let stderr_b64 = result
+        .command_history
+        .last()
+        .map(|cr| B64.encode(&cr.stderr))
+        .unwrap_or_default();
+
+    Ok(Json(RunResponse {
+        passed: result.did_pass,
+        exit_status: last_exit,
+        stdout: stdout_b64,
+        stderr: stderr_b64,
+        time_secs: result.time,
+        timed_out: result.did_timeout,
+        error_test: result.error_test,
+        failing_step: result.failing_step,
+        steps,
+    }))
+}
+
+// ---------------------------------------------------------------------------
+// Server entrypoint
+// ---------------------------------------------------------------------------
+
+pub async fn run_server(config: Config, bind: &str, timeout: f64, max_concurrent: usize) {
+    let state = Arc::new(AppState {
+        config,
+        timeout,
+        run_semaphore: Semaphore::new(max_concurrent),
+    });
+
+    let app = Router::new()
+        .route("/health", get(health))
+        .route("/api/info", get(info))
+        .route("/api/run", post(run))
+        .layer(CorsLayer::permissive())
+        .with_state(state);
+
+    let listener = tokio::net::TcpListener::bind(bind)
+        .await
+        .unwrap_or_else(|e| panic!("failed to bind to {bind}: {e}"));
+
+    println!("dragon-runner server listening on {bind}");
+
+    axum::serve(listener, app)
+        .await
+        .expect("server error");
+}

From 2c3fa67f30d7f610a07d97fe342e054e4a0e4ca2 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 17:00:40 -0700
Subject: [PATCH 20/45] refactor: avoid input stream allocation in run_step

---
 src/runner.rs | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/runner.rs b/src/runner.rs
index 9b293b7..4cab94c 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -228,9 +228,9 @@ impl<'a> ToolChainRunner<'a> {
         exe: &Executable,
     ) -> ControlFlow<TestResult, PipelineState> {
         let input_stream = if step.uses_ins {
-            test.get_input_stream().to_vec()
+            test.get_input_stream()
         } else {
-            Vec::new()
+            b""
         };
 
         let output_resolved = self.resolve_output_file(step);
@@ -536,6 +536,11 @@ mod tests {
         load_config(&path, None).expect("config should load")
     }
 
+    fn _assert_send_sync() {
+        fn check<T: Send + Sync>() {}
+        check::<ToolChainRunner<'_>>();
+    }
+
     fn run_tests_for_config(config: &Config, expected_result: bool) {
         for exe in &config.executables {
             for tc in &config.toolchains {

From 2f2b9e1d6823191af5c6313c79593a707041ba99 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 17:00:59 -0700
Subject: [PATCH 21/45] feat: parallelize test execution within subpackages
 using rayon

---
 Cargo.toml     | 1 +
 src/harness.rs | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 5461fa5..6ed693c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,3 +22,4 @@ axum = "0.8"
 tokio = { version = "1", features = ["full"] }
 tower-http = { version = "0.6", features = ["cors"] }
 base64 = "0.22"
+rayon = "1"
diff --git a/src/harness.rs b/src/harness.rs
index 5c74e4f..9512f46 100644
--- a/src/harness.rs
+++ b/src/harness.rs
@@ -2,6 +2,7 @@ use std::fs::{self, OpenOptions};
 use std::io::Write;
 
 use colored::Colorize;
+use rayon::prelude::*;
 
 use crate::cli::{Mode, RunnerArgs};
 use crate::config::{Config, Executable, Package};
@@ -69,8 +70,12 @@ pub trait TestHarness {
                         let mut counters = SubPackageCounters { pass_count: 0, test_count: 0, depth: spkg.depth };
                         self.pre_subpackage_hook(spkg);
 
-                        for test in &spkg.tests {
-                            let result = runner.run(test, exe);
+                        let results: Vec<TestResult> = spkg.tests
+                            .par_iter()
+                            .map(|test| runner.run(test, exe))
+                            .collect();
+
+                        for result in results {
                             let fast_fail = cli_args.fast_fail && !result.did_pass;
                             self.process_test_result(result, cli_args, &mut counters);
                             if fast_fail {

From 891db6956a59d7992a9ad5b97cbec28310779efb Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 17:03:27 -0700
Subject: [PATCH 22/45] feat: add embedded html frontend for serve mode

---
 src/index.html | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/server.rs  |  6 +++++
 2 files changed, 73 insertions(+)
 create mode 100644 src/index.html

diff --git a/src/index.html b/src/index.html
new file mode 100644
index 0000000..119df4a
--- /dev/null
+++ b/src/index.html
@@ -0,0 +1,67 @@
+<!DOCTYPE html>
+<html><head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<title>dragon-runner</title>
+<style>
+*{box-sizing:border-box;margin:0;padding:0}
+body{font:14px/1.5 monospace;background:#1a1a2e;color:#e0e0e0;padding:1rem}
+h1{font-size:1.2rem;margin-bottom:1rem;color:#7fdbca}
+select,textarea,button{font:inherit;background:#16213e;color:#e0e0e0;border:1px solid #334;border-radius:4px;padding:.4rem}
+textarea{width:100%;resize:vertical;tab-size:4}
+button{background:#0f3460;cursor:pointer;padding:.4rem 1.2rem}
+button:hover{background:#1a5276}
+button:disabled{opacity:.5;cursor:wait}
+.row{display:flex;gap:.5rem;margin-bottom:.5rem;align-items:center}
+.row label{min-width:5rem;color:#888}
+#out{margin-top:1rem;white-space:pre-wrap;background:#0f0f23;padding:.8rem;border-radius:4px;min-height:3rem}
+.pass{color:#50fa7b}.fail{color:#ff5555}.dim{color:#666}
+</style>
+</head><body>
+<h1>dragon-runner</h1>
+<div class="row"><label>toolchain</label><select id="tc"></select></div>
+<div class="row"><label>executable</label><select id="exe"></select></div>
+<div class="row"><label>stdin</label><textarea id="sin" rows="2" placeholder="(optional)"></textarea></div>
+<div class="row"><label>expected</label><textarea id="exp" rows="2" placeholder="(optional)"></textarea></div>
+<div class="row"><label>code</label></div>
+<textarea id="code" rows="16" placeholder="enter source code..." spellcheck="false"></textarea>
+<div class="row" style="margin-top:.5rem"><button id="btn" onclick="go()">Run</button><span id="st" class="dim"></span></div>
+<div id="out"></div>
+<script>
+let B=s=>btoa(s),D=s=>atob(s)
+let $=id=>document.getElementById(id)
+
+fetch("/api/info").then(r=>r.json()).then(d=>{
+  d.toolchains.forEach(t=>{let o=new Option(t.name);$("tc").add(o)})
+  d.executables.forEach(e=>{let o=new Option(e.id);$("exe").add(o)})
+  $("st").textContent=d.config_name
+})
+
+async function go(){
+  let b=$("btn");b.disabled=true;$("st").textContent="running...";$("st").className=""
+  let body={toolchain:$("tc").value,executable:$("exe").value,code:B($("code").value)}
+  let s=$("sin").value;if(s)body.stdin=B(s)
+  let e=$("exp").value;if(e)body.expected_output=B(e)
+  try{
+    let r=await fetch("/api/run",{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify(body)})
+    let d=await r.json()
+    if(!r.ok){$("out").textContent=d.error;$("st").textContent="error";$("st").className="fail";return}
+    let p=d.passed,t=d.time_secs?.toFixed(3)+"s"
+    $("st").textContent=(p?"PASS":"FAIL")+" "+t
+    $("st").className=p?"pass":"fail"
+    let lines=[]
+    d.steps.forEach(s=>lines.push(`[${s.exit_status}] ${s.name} (${s.time_secs.toFixed(3)}s)`))
+    if(d.stdout)lines.push("\n--- stdout ---\n"+D(d.stdout))
+    if(d.stderr&&D(d.stderr))lines.push("\n--- stderr ---\n"+D(d.stderr))
+    if(d.timed_out)lines.push("\nTIMEOUT at: "+d.failing_step)
+    if(d.failing_step&&!d.timed_out)lines.push("\nfailed at: "+d.failing_step)
+    $("out").textContent=lines.join("\n")
+  }catch(err){$("out").textContent=err;$("st").textContent="error";$("st").className="fail"}
+  finally{b.disabled=false}
+}
+
+$("code").addEventListener("keydown",e=>{
+  if(e.key==="Tab"){e.preventDefault();let t=e.target,s=t.selectionStart;t.value=t.value.slice(0,s)+"\t"+t.value.slice(t.selectionEnd);t.selectionStart=t.selectionEnd=s+1}
+})
+</script>
+</body></html>
diff --git a/src/server.rs b/src/server.rs
index 292f53f..a57fc95 100644
--- a/src/server.rs
+++ b/src/server.rs
@@ -2,6 +2,7 @@ use std::sync::Arc;
 
 use axum::extract::State;
 use axum::http::StatusCode;
+use axum::response::Html;
 use axum::routing::{get, post};
 use axum::{Json, Router};
 use base64::engine::general_purpose::STANDARD as B64;
@@ -93,6 +94,10 @@ fn error_json(status: StatusCode, msg: impl Into<String>) -> (StatusCode, Json<E
 // Handlers
 // ---------------------------------------------------------------------------
 
+async fn index() -> Html<&'static str> {
+    Html(include_str!("index.html"))
+}
+
 async fn health() -> &'static str {
     "OK"
 }
@@ -266,6 +271,7 @@ pub async fn run_server(config: Config, bind: &str, timeout: f64, max_concurrent
     });
 
     let app = Router::new()
+        .route("/", get(index))
         .route("/health", get(health))
         .route("/api/info", get(info))
         .route("/api/run", post(run))

From 12bdde7957dd29b3b523735c1a603935bcb3c18f Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 17:04:43 -0700
Subject: [PATCH 23/45] chore: update cargo.lock for rayon dependency

---
 Cargo.lock | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/Cargo.lock b/Cargo.lock
index 2c83e38..8f54ad3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -205,6 +205,31 @@ dependencies = [
  "windows-sys 0.59.0",
 ]
 
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
 [[package]]
 name = "csv"
 version = "1.4.0"
@@ -236,6 +261,7 @@ dependencies = [
  "colored",
  "csv",
  "glob",
+ "rayon",
  "regex",
  "serde",
  "serde_json",
@@ -246,6 +272,12 @@ dependencies = [
  "wait-timeout",
 ]
 
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
 [[package]]
 name = "equivalent"
 version = "1.0.2"
@@ -622,6 +654,26 @@ version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
 
+[[package]]
+name = "rayon"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "redox_syscall"
 version = "0.5.18"

From d91828c4c818bf6d868eed1d5da10a9534e7a02e Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 17:09:31 -0700
Subject: [PATCH 24/45] refactor: organize files

also attach missing run_tests.sh script from many changes ago
---
 src/index.html                   | 67 -------------------------------
 src/server/index.html            | 69 ++++++++++++++++++++++++++++++++
 src/{server.rs => server/mod.rs} |  0
 tests/run_tests.sh               | 18 +++++++++
 tests/test_config.py             | 31 ++++++++++++++
 5 files changed, 118 insertions(+), 67 deletions(-)
 delete mode 100644 src/index.html
 create mode 100644 src/server/index.html
 rename src/{server.rs => server/mod.rs} (100%)
 create mode 100755 tests/run_tests.sh
 create mode 100644 tests/test_config.py

diff --git a/src/index.html b/src/index.html
deleted file mode 100644
index 119df4a..0000000
--- a/src/index.html
+++ /dev/null
@@ -1,67 +0,0 @@
-<!DOCTYPE html>
-<html><head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width,initial-scale=1">
-<title>dragon-runner</title>
-<style>
-*{box-sizing:border-box;margin:0;padding:0}
-body{font:14px/1.5 monospace;background:#1a1a2e;color:#e0e0e0;padding:1rem}
-h1{font-size:1.2rem;margin-bottom:1rem;color:#7fdbca}
-select,textarea,button{font:inherit;background:#16213e;color:#e0e0e0;border:1px solid #334;border-radius:4px;padding:.4rem}
-textarea{width:100%;resize:vertical;tab-size:4}
-button{background:#0f3460;cursor:pointer;padding:.4rem 1.2rem}
-button:hover{background:#1a5276}
-button:disabled{opacity:.5;cursor:wait}
-.row{display:flex;gap:.5rem;margin-bottom:.5rem;align-items:center}
-.row label{min-width:5rem;color:#888}
-#out{margin-top:1rem;white-space:pre-wrap;background:#0f0f23;padding:.8rem;border-radius:4px;min-height:3rem}
-.pass{color:#50fa7b}.fail{color:#ff5555}.dim{color:#666}
-</style>
-</head><body>
-<h1>dragon-runner</h1>
-<div class="row"><label>toolchain</label><select id="tc"></select></div>
-<div class="row"><label>executable</label><select id="exe"></select></div>
-<div class="row"><label>stdin</label><textarea id="sin" rows="2" placeholder="(optional)"></textarea></div>
-<div class="row"><label>expected</label><textarea id="exp" rows="2" placeholder="(optional)"></textarea></div>
-<div class="row"><label>code</label></div>
-<textarea id="code" rows="16" placeholder="enter source code..." spellcheck="false"></textarea>
-<div class="row" style="margin-top:.5rem"><button id="btn" onclick="go()">Run</button><span id="st" class="dim"></span></div>
-<div id="out"></div>
-<script>
-let B=s=>btoa(s),D=s=>atob(s)
-let $=id=>document.getElementById(id)
-
-fetch("/api/info").then(r=>r.json()).then(d=>{
-  d.toolchains.forEach(t=>{let o=new Option(t.name);$("tc").add(o)})
-  d.executables.forEach(e=>{let o=new Option(e.id);$("exe").add(o)})
-  $("st").textContent=d.config_name
-})
-
-async function go(){
-  let b=$("btn");b.disabled=true;$("st").textContent="running...";$("st").className=""
-  let body={toolchain:$("tc").value,executable:$("exe").value,code:B($("code").value)}
-  let s=$("sin").value;if(s)body.stdin=B(s)
-  let e=$("exp").value;if(e)body.expected_output=B(e)
-  try{
-    let r=await fetch("/api/run",{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify(body)})
-    let d=await r.json()
-    if(!r.ok){$("out").textContent=d.error;$("st").textContent="error";$("st").className="fail";return}
-    let p=d.passed,t=d.time_secs?.toFixed(3)+"s"
-    $("st").textContent=(p?"PASS":"FAIL")+" "+t
-    $("st").className=p?"pass":"fail"
-    let lines=[]
-    d.steps.forEach(s=>lines.push(`[${s.exit_status}] ${s.name} (${s.time_secs.toFixed(3)}s)`))
-    if(d.stdout)lines.push("\n--- stdout ---\n"+D(d.stdout))
-    if(d.stderr&&D(d.stderr))lines.push("\n--- stderr ---\n"+D(d.stderr))
-    if(d.timed_out)lines.push("\nTIMEOUT at: "+d.failing_step)
-    if(d.failing_step&&!d.timed_out)lines.push("\nfailed at: "+d.failing_step)
-    $("out").textContent=lines.join("\n")
-  }catch(err){$("out").textContent=err;$("st").textContent="error";$("st").className="fail"}
-  finally{b.disabled=false}
-}
-
-$("code").addEventListener("keydown",e=>{
-  if(e.key==="Tab"){e.preventDefault();let t=e.target,s=t.selectionStart;t.value=t.value.slice(0,s)+"\t"+t.value.slice(t.selectionEnd);t.selectionStart=t.selectionEnd=s+1}
-})
-</script>
-</body></html>
diff --git a/src/server/index.html b/src/server/index.html
new file mode 100644
index 0000000..ea84480
--- /dev/null
+++ b/src/server/index.html
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html><head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<title>dragon-runner</title>
+<style>
+*{box-sizing:border-box;margin:0;padding:0}
+body{font:13px/1.4 monospace;background:#1a1a2e;color:#e0e0e0;padding:1.5rem;max-width:720px}
+h1{font-size:1.1rem;margin-bottom:.8rem;color:#7fdbca}
+select,textarea,button{font:inherit;background:#16213e;color:#e0e0e0;border:1px solid #334;border-radius:3px;padding:.3rem .4rem}
+textarea{resize:vertical;tab-size:4}
+.row textarea{width:220px}
+#code{width:100%;margin-top:.2rem}
+button{background:#0f3460;cursor:pointer;padding:.3rem 1rem}
+button:hover{background:#1a5276}
+button:disabled{opacity:.5;cursor:wait}
+.row{display:flex;gap:.5rem;margin-bottom:.4rem;align-items:baseline}
+.row label{min-width:5.5rem;color:#666;font-size:12px}
+#out{margin-top:.8rem;white-space:pre-wrap;background:#0f0f23;padding:.6rem;border-radius:3px;font-size:12px;line-height:1.35}
+.pass{color:#50fa7b}.fail{color:#ff5555}.dim{color:#555}
+</style>
+</head><body>
+<h1>dragon-runner</h1>
+<div class="row"><label>toolchain</label><select id="tc"></select></div>
+<div class="row"><label>executable</label><select id="exe"></select></div>
+<div class="row"><label>stdin</label><textarea id="sin" rows="1" placeholder="(optional)"></textarea></div>
+<div class="row"><label>expected</label><textarea id="exp" rows="1" placeholder="(optional)"></textarea></div>
+<div class="row"><label>code</label></div>
+<textarea id="code" rows="12" placeholder="enter source code..." spellcheck="false"></textarea>
+<div class="row" style="margin-top:.4rem"><button id="btn" onclick="go()">Run</button><span id="st" class="dim"></span></div>
+<div id="out"></div>
+<script>
+let B = s => btoa(s), D = s => atob(s)
+let $ = id => document.getElementById(id)
+
+fetch("/api/info").then(r => r.json()).then(d => {
+  d.toolchains.forEach(t => { let o = new Option(t.name); $("tc").add(o) })
+  d.executables.forEach(e => { let o = new Option(e.id); $("exe").add(o) })
+  $("st").textContent = d.config_name
+})
+
+async function go () {
+  let b = $("btn"); b.disabled = true; $("st").textContent = "running..."; $("st").className = ""
+  let body = { toolchain: $("tc").value, executable: $("exe").value, code: B($("code").value) }
+  let s = $("sin").value; if (s) body.stdin = B(s)
+  let e = $("exp").value; if (e) body.expected_output = B(e)
+  try {
+    let r = await fetch("/api/run", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body) })
+    let d = await r.json()
+    if (!r.ok) { $("out").textContent = d.error; $("st").textContent = "error"; $("st").className = "fail"; return }
+    let p = d.passed, t = d.time_secs?.toFixed(3) + "s"
+    $("st").textContent = (p ? "PASS" : "FAIL") + " " + t
+    $("st").className = p ? "pass" : "fail"
+    let lines = []
+    d.steps.forEach(s => lines.push(`[${s.exit_status}] ${s.name} (${s.time_secs.toFixed(3)}s)`))
+    if (d.stdout) lines.push("\n--- stdout ---\n" + D(d.stdout))
+    if (d.stderr && D(d.stderr)) lines.push("\n--- stderr ---\n" + D(d.stderr))
+    if (d.timed_out) lines.push("\nTIMEOUT at: " + d.failing_step)
+    if (d.failing_step && !d.timed_out) lines.push("\nfailed at: " + d.failing_step)
+    $("out").textContent = lines.join("\n")
+  } catch (err) { $("out").textContent = err; $("st").textContent = "error"; $("st").className = "fail" }
+  finally { b.disabled = false }
+}
+
+$("code").addEventListener("keydown", e => {
+  if (e.key === "Tab") { e.preventDefault(); let t = e.target, s = t.selectionStart; t.value = t.value.slice(0, s) + "\t" + t.value.slice(t.selectionEnd); t.selectionStart = t.selectionEnd = s + 1 }
+})
+</script>
+</body></html>
diff --git a/src/server.rs b/src/server/mod.rs
similarity index 100%
rename from src/server.rs
rename to src/server/mod.rs
diff --git a/tests/run_tests.sh b/tests/run_tests.sh
new file mode 100755
index 0000000..da5ba3b
--- /dev/null
+++ b/tests/run_tests.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Resolve project root: script lives in <root>/tests/
+PROJECT_ROOT="$(cd "$(dirname "$(readlink -f "$0")")/.." && pwd)"
+cd "$PROJECT_ROOT"
+
+echo "=== Building ==="
+cargo build
+
+echo ""
+echo "=== Compiling test shared libraries ==="
+python3 tests/scripts/test-scripts/compile_lib.py \
+    tests/lib/src tests/lib
+
+echo ""
+echo "=== Running tests (single-threaded to avoid /tmp/test.o races) ==="
+cargo test -- --test-threads=1
diff --git a/tests/test_config.py b/tests/test_config.py
new file mode 100644
index 0000000..c676dbf
--- /dev/null
+++ b/tests/test_config.py
@@ -0,0 +1,31 @@
+import os
+import sys 
+
+def test_valid_config(config_factory): 
+    config = config_factory("gccPassConfig.json")
+    
+    assert config is not None
+    assert config.test_dir is not None
+    assert config.packages is not None
+    for pkg in config.packages:
+        assert pkg.subpackages is not None
+        for spkg in pkg.subpackages:
+            assert spkg is not None
+            assert len(spkg.tests) > 0
+
+    assert config.error_collection == False
+    assert os.path.exists(config.test_dir)
+
+def test_invalid_dir_config(config_factory):
+    config = config_factory("invalidDirConfig.json")
+    
+    assert config.error_collection == True
+    assert not os.path.exists(config.test_dir)
+
+def test_invalid_exe_config(config_factory):
+    
+    config = config_factory("invalidExeConfig.json")
+
+    assert config.error_collection == True
+    assert len(config.executables) == 1
+    assert not os.path.exists(config.executables[0].exe_path)

From ac888d58758a16cab77f78e5e6c0c033c8446e27 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 17:20:42 -0700
Subject: [PATCH 25/45] fix: valgrind in ci & skipping when unavailable

---
 .github/workflows/ci.yml |  4 ++++
 src/runner.rs            | 17 +++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e097346..770c5cf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,6 +18,10 @@ jobs:
       - name: Install Rust toolchain
         uses: dtolnay/rust-toolchain@stable
 
+      - name: Install valgrind (Linux only)
+        if: runner.os == 'Linux'
+        run: sudo apt-get update && sudo apt-get install -y valgrind
+
       - name: Cache cargo
         uses: actions/cache@v4
         with:
diff --git a/src/runner.rs b/src/runner.rs
index 4cab94c..bd9ebb6 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -578,9 +578,22 @@ mod tests {
         run_tests_for_config(&config, false);
     }
 
+    fn valgrind_available() -> bool {
+        std::process::Command::new("valgrind")
+            .arg("--version")
+            .stdout(std::process::Stdio::null())
+            .stderr(std::process::Stdio::null())
+            .status()
+            .is_ok_and(|s: std::process::ExitStatus| s.success())
+    }
+
     /// Memcheck wrapping works on gccPassConfig — runner still produces results.
     #[test]
     fn test_memcheck_clean_programs() {
+        if !valgrind_available() {
+            eprintln!("skipping: valgrind not found");
+            return;
+        }
         let config = create_config("gccPassConfig.json");
         assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
         let mut ran_any = false;
@@ -613,6 +626,10 @@ mod tests {
     /// Memcheck on MemoryLeaks package — leaky programs should be flagged.
     #[test]
     fn test_memcheck_detects_leaks() {
+        if !valgrind_available() {
+            eprintln!("skipping: valgrind not found");
+            return;
+        }
         let config = create_config("gccMemcheckConfig.json");
         assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
         for exe in &config.executables {

From 99f99dd6ff3473e08696a8ab24cc01afd2d71173 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 17:30:05 -0700
Subject: [PATCH 26/45] fix: some non-optinal flags which should be optional

---
 src/cli.rs     | 18 +++++++++---------
 src/config.rs  |  5 ++---
 src/harness.rs | 16 +++++++++-------
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/cli.rs b/src/cli.rs
index 327cbff..c84101a 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -33,9 +33,9 @@ pub struct RunnerArgs {
     /// Path to the JSON configuration file
     pub config_file: PathBuf,
 
-    /// Path to write failure log
-    #[arg(long = "fail-log", default_value = "")]
-    pub failure_log: PathBuf,
+    /// Path to write failure log (tournament mode)
+    #[arg(long = "fail-log")]
+    pub failure_log: Option<PathBuf>,
 
     /// Timeout in seconds for each step
     #[arg(long, default_value_t = 2.0)]
@@ -46,12 +46,12 @@ pub struct RunnerArgs {
     pub verify: bool,
 
     /// Debug a specific package path
-    #[arg(long = "debug-package", default_value = "")]
-    pub debug_package: String,
+    #[arg(long = "debug-package")]
+    pub debug_package: Option<String>,
 
     /// Filter packages by glob pattern (case insensitive)
-    #[arg(short = 'p', long = "package", default_value = "")]
-    pub package_filter: String,
+    #[arg(short = 'p', long = "package")]
+    pub package_filter: Option<String>,
 
     /// Show timing information
     #[arg(short = 't', long = "time")]
@@ -66,8 +66,8 @@ pub struct RunnerArgs {
     pub show_testcase: bool,
 
     /// Output file path
-    #[arg(short = 'o', long = "output", default_value = "")]
-    pub output: PathBuf,
+    #[arg(short = 'o', long = "output")]
+    pub output: Option<PathBuf>,
 
     /// Stop on first failure
     #[arg(short = 'f', long = "fast-fail")]
diff --git a/src/config.rs b/src/config.rs
index c01e8ca..cbe7c16 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -381,9 +381,8 @@ pub fn load_config(config_path: &Path, args: Option<&RunnerArgs>) -> Option<Conf
     })?;
 
     let debug_package = args
-        .map(|a| a.debug_package.as_str())
-        .filter(|p| !p.is_empty());
-    let package_filter = args.map(|a| a.package_filter.as_str()).unwrap_or("");
+        .and_then(|a| a.debug_package.as_deref());
+    let package_filter = args.and_then(|a| a.package_filter.as_deref()).unwrap_or("");
 
     Some(Config::new(config_path, &config_data, debug_package, package_filter))
 }
diff --git a/src/harness.rs b/src/harness.rs
index 9512f46..90d8ea0 100644
--- a/src/harness.rs
+++ b/src/harness.rs
@@ -196,7 +196,7 @@ impl TournamentHarness {
         defending_exes.sort_by(|a, b| a.id.to_lowercase().cmp(&b.id.to_lowercase()));
 
         let solution_exe = config.solution_exe.as_deref();
-        let failure_log = &cli_args.failure_log;
+        let failure_log = cli_args.failure_log.as_deref();
 
         for tc in &config.toolchains {
             let csv_filename = format!("toolchain_{}.csv", tc.name);
@@ -227,7 +227,7 @@ impl TournamentHarness {
                         if result.did_pass {
                             print!("{}", ".".green());
                             pass_count += 1;
-                            if is_solution && !failure_log.as_os_str().is_empty() {
+                            if is_solution && failure_log.is_some() {
                                 Self::append_log("pass_log.txt".as_ref(), &format!(
                                     "{} {} {}", tc.name, a_pkg.name, result.test.path.display()
                                 ));
@@ -235,10 +235,12 @@ impl TournamentHarness {
                         } else {
                             print!("{}", ".".red());
                             Self::log_failure_to_file(&feedback_file, &result);
-                            if is_solution && !failure_log.as_os_str().is_empty() {
-                                Self::append_log(failure_log, &format!(
-                                    "{} {} {}", tc.name, a_pkg.name, result.test.path.display()
-                                ));
+                            if let Some(log) = failure_log {
+                                if is_solution {
+                                    Self::append_log(log, &format!(
+                                        "{} {} {}", tc.name, a_pkg.name, result.test.path.display()
+                                    ));
+                                }
                             }
                         }
                         test_count += 1;
@@ -391,7 +393,7 @@ mod tests {
 
         let args = RunnerArgs {
             mode: Mode::Tournament,
-            failure_log: failure_log.into(),
+            failure_log: Some(failure_log.into()),
             timeout: 2.0,
             ..Default::default()
         };

From ffb9b469c6d94626a6b2368e4185955efebe8b46 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 17:36:20 -0700
Subject: [PATCH 27/45] fix: macos ci test

---
 src/runner.rs | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/runner.rs b/src/runner.rs
index bd9ebb6..471a55b 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -670,7 +670,12 @@ mod tests {
 
         assert!(compile_script.exists(), "missing compile_lib.py");
 
-        let expected_lib = tests_dir.join("lib/libfib.so");
+        let (lib_name, config_name) = if cfg!(target_os = "macos") {
+            ("lib/libfib.dylib", "runtimeConfigDarwin.json")
+        } else {
+            ("lib/libfib.so", "runtimeConfigLinux.json")
+        };
+        let expected_lib = tests_dir.join(lib_name);
         if !expected_lib.exists() {
             let status = std::process::Command::new("python3")
                 .args([
@@ -684,7 +689,7 @@ mod tests {
             assert!(expected_lib.exists(), "failed to create shared object");
         }
 
-        let path = config_path("runtimeConfigLinux.json");
+        let path = config_path(config_name);
         let config = load_config(&path, None).expect("config should load");
         assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
         run_tests_for_config(&config, true);

From 6d35a300e5cdb8bb2108cdf17230597a8b8862c7 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 22:15:52 -0700
Subject: [PATCH 28/45] refactor: imperative arg skipping for default mode
 replaced with try_parse_from

---
 src/cli.rs  | 18 ++++++------------
 src/main.rs |  1 -
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/cli.rs b/src/cli.rs
index c84101a..fef29fa 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -142,23 +142,17 @@ pub enum CliAction {
 ///
 /// Supports: `dragon-runner <mode> config.json [flags...]`
 ///           `dragon-runner script <name> [args...]`
-/// If no recognized mode is given, inserts "regular" so clap can parse it.
+/// If no recognized subcommand is given, defaults to "regular".
 pub fn parse_cli_args() -> CliAction {
     let raw_args: Vec<String> = std::env::args().collect();
 
-    // If the user omits the mode subcommand, default to "regular".
-    // Detect this by checking whether the second arg is a known subcommand.
-    let known_modes = ["regular", "tournament", "perf", "memcheck", "script", "serve"];
-    let args_to_parse = if raw_args.len() >= 2 && !known_modes.contains(&raw_args[1].as_str()) && !raw_args[1].starts_with('-') {
-        // Insert "regular" as the subcommand
+    // Try parsing as-is first. If that fails, assume the user omitted the
+    // subcommand and default to "regular".
+    let cli = Cli::try_parse_from(&raw_args).unwrap_or_else(|_| {
         let mut patched = vec![raw_args[0].clone(), "regular".to_string()];
         patched.extend_from_slice(&raw_args[1..]);
-        patched
-    } else {
-        raw_args
-    };
-
-    let cli = Cli::parse_from(args_to_parse);
+        Cli::parse_from(patched)
+    });
 
     match cli.command {
         Commands::Script { args } => CliAction::Script(args),
diff --git a/src/main.rs b/src/main.rs
index e0d5b4a..a97d9a3 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -9,7 +9,6 @@ use dragon_runner_rs::server;
 
 fn main() {
     let action = parse_cli_args();
-
     let cli_args = match action {
         CliAction::Script(args) => {
             std::process::exit(run_script(args));

From 7350d061bef18f16e654daf3bf3e352735611389 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 22:19:11 -0700
Subject: [PATCH 29/45] refactor: use a proc-macro system for logging

---
 src/config.rs  | 14 +++++++-------
 src/harness.rs | 32 ++++++++++++++++----------------
 src/log.rs     | 39 ++++++++++++++++++++++++++++++---------
 src/main.rs    | 37 +++++++++----------------------------
 4 files changed, 62 insertions(+), 60 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index cbe7c16..4bad724 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -4,9 +4,9 @@ use std::fs;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 
+use crate::{info, debug, trace, trace2};
 use crate::cli::RunnerArgs;
 use crate::error::{DragonError, Validate};
-use crate::log::log;
 use crate::testfile::TestFile;
 use crate::toolchain::ToolChain;
 use crate::util::resolve_relative;
@@ -331,13 +331,13 @@ impl Config {
     }
 
     pub fn log_test_info(&self) {
-        log(1, 0, "\nPackages:");
+        debug!(0, "\nPackages:");
         for pkg in &self.packages {
-            log(1, 2, &format!("-- ({})", pkg.name));
+            debug!(2, "-- ({})", pkg.name);
             for spkg in &pkg.subpackages {
-                log(2, 4, &format!("-- ({})", spkg.name));
+                trace!(4, "-- ({})", spkg.name);
                 for test in &spkg.tests {
-                    log(3, 6, &format!("-- ({})", test.file));
+                    trace2!(6, "-- ({})", test.file);
                 }
             }
         }
@@ -371,12 +371,12 @@ pub fn load_config(config_path: &Path, args: Option<&RunnerArgs>) -> Option<Conf
     }
 
     let content = fs::read_to_string(config_path).ok().or_else(|| {
-        log(0, 0, &format!("Config Error: Failed to parse config: {}", config_path.display()));
+        info!(0, "Config Error: Failed to parse config: {}", config_path.display());
         None
     })?;
 
     let config_data: serde_json::Value = serde_json::from_str(&content).ok().or_else(|| {
-        log(0, 0, &format!("Config Error: Failed to parse config: {}", config_path.display()));
+        info!(0, "Config Error: Failed to parse config: {}", config_path.display());
         None
     })?;
 
diff --git a/src/harness.rs b/src/harness.rs
index 90d8ea0..a07c674 100644
--- a/src/harness.rs
+++ b/src/harness.rs
@@ -4,9 +4,9 @@ use std::io::Write;
 use colored::Colorize;
 use rayon::prelude::*;
 
+use crate::info;
 use crate::cli::{Mode, RunnerArgs};
 use crate::config::{Config, Executable, Package};
-use crate::log::log;
 use crate::runner::{TestResult, ToolChainRunner};
 
 /// Counters passed through hooks during iteration.
@@ -41,7 +41,7 @@ pub trait TestHarness {
 
         for exe in &config.executables {
             self.pre_executable_hook(&exe.id);
-            log(0, 0, &format!("Running executable: {}", exe.id));
+            info!(0, "Running executable: {}", exe.id);
             let exe_env = exe.runtime_env();
             let mut exe_pass = 0;
             let mut exe_total = 0;
@@ -50,14 +50,14 @@ pub trait TestHarness {
                 let runner = ToolChainRunner::new(tc, cli_args.timeout)
                     .with_env(exe_env.clone())
                     .with_memcheck(cli_args.mode == Mode::Memcheck);
-                log(0, 1, &format!("Running Toolchain: {}", tc.name));
+                info!(1, "Running Toolchain: {}", tc.name);
                 let mut tc_pass = 0;
                 let mut tc_total = 0;
 
                 for pkg in &config.packages {
                     let mut pkg_pass = 0;
                     let mut pkg_total = 0;
-                    log(0, 2, &format!("Entering package {}", pkg.name));
+                    info!(2, "Entering package {}", pkg.name);
 
                     for spkg in &pkg.subpackages {
                         if let Some(ref pat) = filter_pat {
@@ -66,7 +66,7 @@ pub trait TestHarness {
                             }
                         }
 
-                        log(0, 3 + spkg.depth, &format!("Entering subpackage {}", spkg.name));
+                        info!(3 + spkg.depth, "Entering subpackage {}", spkg.name);
                         let mut counters = SubPackageCounters { pass_count: 0, test_count: 0, depth: spkg.depth };
                         self.pre_subpackage_hook(spkg);
 
@@ -87,22 +87,22 @@ pub trait TestHarness {
                         }
 
                         self.post_subpackage_hook(&counters);
-                        log(0, 3 + spkg.depth, &format!("Subpackage Passed:  {} / {}", counters.pass_count, counters.test_count));
+                        info!(3 + spkg.depth, "Subpackage Passed:  {} / {}", counters.pass_count, counters.test_count);
                         pkg_pass += counters.pass_count;
                         pkg_total += counters.test_count;
                     }
 
-                    log(0, 2, &format!("Packaged Passed:  {} / {}", pkg_pass, pkg_total));
+                    info!(2, "Packaged Passed:  {} / {}", pkg_pass, pkg_total);
                     tc_pass += pkg_pass;
                     tc_total += pkg_total;
                 }
 
-                log(0, 1, &format!("Toolchain Passed:  {} / {}", tc_pass, tc_total));
+                info!(1, "Toolchain Passed:  {} / {}", tc_pass, tc_total);
                 exe_pass += tc_pass;
                 exe_total += tc_total;
             }
 
-            log(0, 0, &format!("Executable Passed:  {} / {}", exe_pass, exe_total));
+            info!(0, "Executable Passed:  {} / {}", exe_pass, exe_total);
             self.post_executable_hook();
         }
 
@@ -137,11 +137,11 @@ impl TestHarness for RegularHarness {
         let test_name = &result.test.file;
         if result.did_pass {
             let tag = if result.error_test { "[E-PASS] " } else { "[PASS] " };
-            log(0, indent, &format!("{}{}", tag.green(), test_name));
+            info!(indent, "{}{}", tag.green(), test_name);
             counters.pass_count += 1;
         } else {
             let tag = if result.error_test { "[E-FAIL] " } else { "[FAIL] " };
-            log(0, indent, &format!("{}{}", tag.red(), test_name));
+            info!(indent, "{}{}", tag.red(), test_name);
             self.passed = false;
         }
         counters.test_count += 1;
@@ -281,10 +281,10 @@ impl TestHarness for MemoryCheckHarness {
 
         let test_name = &result.test.file;
         if result.did_pass {
-            log(0, indent, &format!("{}{}", "[PASS] ".green(), test_name));
+            info!(indent, "{}{}", "[PASS] ".green(), test_name);
             counters.pass_count += 1;
         } else {
-            log(0, indent, &format!("{}{}", "[FAIL] ".red(), test_name));
+            info!(indent, "{}{}", "[FAIL] ".red(), test_name);
         }
 
         if result.memory_leak {
@@ -293,9 +293,9 @@ impl TestHarness for MemoryCheckHarness {
     }
 
     fn post_executable_hook(&mut self) {
-        log(0, 0, &format!("Leak Summary: ({} tests)", self.leak_tests.len()));
+        info!(0, "Leak Summary: ({} tests)", self.leak_tests.len());
         for result in &self.leak_tests {
-            log(0, 4, &format!("{}{}", "[LEAK] ".yellow(), result.test.file));
+            info!(4, "{}{}", "[LEAK] ".yellow(), result.test.file);
         }
         self.leak_tests.clear();
         self.test_count = 0;
@@ -338,7 +338,7 @@ impl TestHarness for PerformanceTestingHarness {
         let test_name = &result.test.file;
         if result.did_pass {
             counters.pass_count += 1;
-            log(0, indent, &format!("{}{}", "[PASS] ".green(), test_name));
+            info!(indent, "{}{}", "[PASS] ".green(), test_name);
             self.cur_col.push(result.time.map(|t| format!("{t:.4}")).unwrap_or_default());
         } else {
             self.cur_col.push(format!("{:.4}", cli_args.timeout));
diff --git a/src/log.rs b/src/log.rs
index 74f5872..0d10492 100644
--- a/src/log.rs
+++ b/src/log.rs
@@ -8,21 +8,42 @@ pub fn set_debug_level(level: u32) {
 }
 
 /// Log a message at a given verbosity level with indentation.
+/// Use the `info!`, `debug!`, `trace!`, or `trace2!` macros instead of calling this directly.
+#[doc(hidden)]
 pub fn log(level: u32, indent: usize, msg: &str) {
     if DEBUG_LEVEL.load(Ordering::Relaxed) >= level {
         println!("{:indent$}{msg}", "", indent = indent);
     }
 }
 
-/// Log multiline content with indentation.
-pub fn log_multiline(content: &str, level: u32, indent: usize) {
-    for line in content.lines() {
-        log(level, indent, line.trim_end());
-    }
+/// Always printed (level 0).
+#[macro_export]
+macro_rules! info {
+    ($indent:expr, $($arg:tt)*) => {
+        $crate::log::log(0, $indent, &format!($($arg)*))
+    };
+}
+
+/// Printed with -v (level 1).
+#[macro_export]
+macro_rules! debug {
+    ($indent:expr, $($arg:tt)*) => {
+        $crate::log::log(1, $indent, &format!($($arg)*))
+    };
+}
+
+/// Printed with -vv (level 2).
+#[macro_export]
+macro_rules! trace {
+    ($indent:expr, $($arg:tt)*) => {
+        $crate::log::log(2, $indent, &format!($($arg)*))
+    };
 }
 
-/// Log a delimiter line.
-pub fn log_delimiter(title: &str, level: u32, indent: usize) {
-    let delim = "-".repeat(20);
-    log(level, indent, &format!("{delim} {title} {delim}"));
+/// Printed with -vvv (level 3).
+#[macro_export]
+macro_rules! trace2 {
+    ($indent:expr, $($arg:tt)*) => {
+        $crate::log::log(3, $indent, &format!($($arg)*))
+    };
 }
diff --git a/src/main.rs b/src/main.rs
index a97d9a3..c3f0007 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,7 +3,7 @@ use colored::Colorize;
 use dragon_runner_rs::cli::{parse_cli_args, CliAction, Mode};
 use dragon_runner_rs::config::load_config;
 use dragon_runner_rs::harness::*;
-use dragon_runner_rs::log::log;
+use dragon_runner_rs::{info, debug};
 use dragon_runner_rs::script::run_script;
 use dragon_runner_rs::server;
 
@@ -17,14 +17,14 @@ fn main() {
             let config = match load_config(&config_file, None) {
                 Some(c) => c,
                 None => {
-                    log(0, 0, &format!("Could not open config file: {}", config_file.display()));
+                    info!(0, "Could not open config file: {}", config_file.display());
                     std::process::exit(1);
                 }
             };
             if !config.errors.is_empty() {
-                log(0, 0, &format!("Found Config {} error(s):", config.errors.len()));
+                info!(0, "Found Config {} error(s):", config.errors.len());
                 for e in &config.errors {
-                    log(0, 0, &format!("{e}").red().to_string());
+                    info!(0, "{}", format!("{e}").red());
                 }
                 std::process::exit(1);
             }
@@ -35,44 +35,25 @@ fn main() {
         CliAction::Run(args) => args,
     };
 
-    log(1, 0, &format!("{:?}", cli_args));
+    debug!(0, "{:?}", cli_args);
 
     let config = match load_config(&cli_args.config_file, Some(&cli_args)) {
         Some(c) => c,
         None => {
-            log(0, 0, &format!("Could not open config file: {}", cli_args.config_file.display()));
+            info!(0, "Could not open config file: {}", cli_args.config_file.display());
             std::process::exit(1);
         }
     };
 
     if !config.errors.is_empty() {
-        log(0, 0, &format!("Found Config {} error(s):", config.errors.len()));
-        log(0, 0, &format!("Parsed {} below:", cli_args.config_file.display()));
+        info!(0, "Found Config {} error(s):", config.errors.len());
+        info!(0, "Parsed {} below:", cli_args.config_file.display());
         for e in &config.errors {
-            log(0, 0, &format!("{e}").red().to_string());
+            info!(0, "{}", format!("{e}").red());
         }
         std::process::exit(1);
     }
 
-    if cli_args.verify {
-        let mut input = String::new();
-        println!("Enter your CCID/Github Team Name: ");
-        std::io::stdin()
-            .read_line(&mut input)
-            .expect("Failed to read input");
-        let ccid = input.trim();
-
-        let found = config.packages.iter().any(|pkg| {
-            log(0, 2, &format!("Searching..  {}", pkg.name));
-            pkg.name == ccid
-        });
-
-        if !found {
-            println!("Could not find package named after CCID: {}", ccid);
-            std::process::exit(1);
-        }
-    }
-
     config.log_test_info();
 
     let success = match cli_args.mode {

From 02c81f7d443f6ffe7340689ab5aef0c5ec30affe Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 22:26:49 -0700
Subject: [PATCH 30/45] rename: TestHarness trait to SequentialTestHarness

---
 src/harness.rs | 15 ++++++++-------
 src/main.rs    |  2 --
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/harness.rs b/src/harness.rs
index a07c674..fb15373 100644
--- a/src/harness.rs
+++ b/src/harness.rs
@@ -16,12 +16,13 @@ pub struct SubPackageCounters {
     pub depth: usize,
 }
 
-/// Mutable hooks called during the default iteration.
-/// Config and cli_args are passed separately to avoid cloning.
-pub trait TestHarness {
+/// Implemented by any `TestHarness` which makes a single, sequential iteration
+/// over the tests in each package and subpackage. Applies to all except for
+/// the `TournamentHarness`, which iterates in a cross product.
+pub trait SequentialTestHarness {
+
     fn run_passed(&self) -> bool;
     fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters);
-
     fn pre_run_hook(&mut self) {}
     fn post_run_hook(&mut self) {}
     fn pre_executable_hook(&mut self, _exe_id: &str) {}
@@ -129,7 +130,7 @@ impl RegularHarness {
     }
 }
 
-impl TestHarness for RegularHarness {
+impl SequentialTestHarness for RegularHarness {
     fn run_passed(&self) -> bool { self.passed }
 
     fn process_test_result(&mut self, result: TestResult, _cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
@@ -271,7 +272,7 @@ impl MemoryCheckHarness {
     }
 }
 
-impl TestHarness for MemoryCheckHarness {
+impl SequentialTestHarness for MemoryCheckHarness {
     fn run_passed(&self) -> bool { self.passed }
 
     fn process_test_result(&mut self, result: TestResult, _cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
@@ -326,7 +327,7 @@ impl PerformanceTestingHarness {
     }
 }
 
-impl TestHarness for PerformanceTestingHarness {
+impl SequentialTestHarness for PerformanceTestingHarness {
     fn run_passed(&self) -> bool { self.passed }
 
     fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
diff --git a/src/main.rs b/src/main.rs
index c3f0007..bcb5690 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,5 +1,4 @@
 use colored::Colorize;
-
 use dragon_runner_rs::cli::{parse_cli_args, CliAction, Mode};
 use dragon_runner_rs::config::load_config;
 use dragon_runner_rs::harness::*;
@@ -36,7 +35,6 @@ fn main() {
     };
 
     debug!(0, "{:?}", cli_args);
-
     let config = match load_config(&cli_args.config_file, Some(&cli_args)) {
         Some(c) => c,
         None => {

From 997dd705588d977626ff8f31de1b7355551a0811 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 22:31:46 -0700
Subject: [PATCH 31/45] refactor: derive json serialization for Step struct and
 display for Config

---
 src/config.rs    | 16 +---------------
 src/toolchain.rs | 29 +++++++++++++----------------
 2 files changed, 14 insertions(+), 31 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index 4bad724..29616da 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -346,21 +346,7 @@ impl Config {
 
 impl fmt::Display for Config {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        writeln!(f, "Config: {}", self.name)?;
-        writeln!(f, "  testDir: {}", self.test_dir.display())?;
-        writeln!(f, "  executables:")?;
-        for exe in &self.executables {
-            writeln!(f, "    - {} ({})", exe.id, exe.exe_path.display())?;
-        }
-        writeln!(f, "  toolchains:")?;
-        for tc in &self.toolchains {
-            writeln!(f, "    - {} ({} steps)", tc.name, tc.len())?;
-        }
-        writeln!(f, "  packages:")?;
-        for pkg in &self.packages {
-            write!(f, "    - {}", pkg.name)?;
-        }
-        Ok(())
+        fmt::Debug::fmt(self, f)
     }
 }
 
diff --git a/src/toolchain.rs b/src/toolchain.rs
index 0962b07..ac22996 100644
--- a/src/toolchain.rs
+++ b/src/toolchain.rs
@@ -1,33 +1,27 @@
 use std::path::Path;
 
+use serde::Deserialize;
+
 use crate::config::Executable;
 use crate::error::{DragonError, Validate};
 
 /// A single step in a toolchain (e.g., compile, link, run).
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Deserialize)]
+#[serde(rename_all = "camelCase")]
 pub struct Step {
+    #[serde(rename = "exe", default)]
     pub exe_raw: String,
+    #[serde(default)]
     pub args: Vec<String>,
+    #[serde(default)]
     pub allow_error: bool,
+    #[serde(rename = "usesInStr", default)]
     pub uses_ins: bool,
+    #[serde(default)]
     pub uses_runtime: bool,
 }
 
 impl Step {
-    pub fn from_json(data: &serde_json::Value) -> Self {
-        Self {
-            exe_raw: data["exe"].as_str().unwrap_or("").into(),
-            args: data
-                .get("args")
-                .and_then(|v| v.as_array())
-                .map(|arr| arr.iter().filter_map(|v| v.as_str().map(Into::into)).collect())
-                .unwrap_or_default(),
-            allow_error: data["allowError"].as_bool().unwrap_or(false),
-            uses_ins: data["usesInStr"].as_bool().unwrap_or(false),
-            uses_runtime: data["usesRuntime"].as_bool().unwrap_or(false),
-        }
-    }
-
     /// Derive a human-readable step name from the raw exe string and the executable.
     pub fn display_name(&self, exe: &Executable) -> String {
         match self.exe_raw.as_str() {
@@ -76,7 +70,10 @@ impl ToolChain {
     pub fn new(name: &str, steps_data: &[serde_json::Value]) -> Self {
         Self {
             name: name.into(),
-            steps: steps_data.iter().map(Step::from_json).collect(),
+            steps: steps_data
+                .iter()
+                .filter_map(|v| serde_json::from_value(v.clone()).ok())
+                .collect(),
         }
     }
 

From 71283ed43fa005efb9f663b62fcfe78bb2a365ee Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 22:35:52 -0700
Subject: [PATCH 32/45] refactor: introduce Serde for RawConfig to remove
 manual parse_toolchain and parse_executable

---
 src/config.rs    | 107 ++++++++++++++++++++---------------------------
 src/toolchain.rs |  10 +----
 2 files changed, 47 insertions(+), 70 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index 29616da..f3b2bd3 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -4,13 +4,31 @@ use std::fs;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 
+use serde::Deserialize;
+
 use crate::{info, debug, trace, trace2};
 use crate::cli::RunnerArgs;
 use crate::error::{DragonError, Validate};
 use crate::testfile::TestFile;
-use crate::toolchain::ToolChain;
+use crate::toolchain::{Step, ToolChain};
 use crate::util::resolve_relative;
 
+/// Raw JSON shape of a config file, deserialized directly by serde.
+#[derive(Deserialize, Default)]
+#[serde(rename_all = "camelCase")]
+struct RawConfig {
+    #[serde(default)]
+    test_dir: String,
+    #[serde(default)]
+    tested_executable_paths: HashMap<String, String>,
+    #[serde(default)]
+    runtimes: HashMap<String, String>,
+    #[serde(default)]
+    solution_executable: Option<String>,
+    #[serde(default)]
+    toolchains: HashMap<String, Vec<Step>>,
+}
+
 // ---------------------------------------------------------------------------
 // SubPackage
 // ---------------------------------------------------------------------------
@@ -214,9 +232,9 @@ pub struct Config {
 }
 
 impl Config {
-    pub fn new(
+    fn new(
         config_path: &Path,
-        config_data: &serde_json::Value,
+        raw: RawConfig,
         debug_package: Option<&str>,
         package_filter: &str,
     ) -> Self {
@@ -229,16 +247,27 @@ impl Config {
             .to_string_lossy()
             .into_owned();
 
-        let test_dir_rel = config_data["testDir"].as_str().unwrap_or("");
-        let test_dir = resolve_relative(Path::new(test_dir_rel), &abs_config);
+        let test_dir = resolve_relative(Path::new(&raw.test_dir), &abs_config);
+
+        let executables = raw.tested_executable_paths
+            .iter()
+            .map(|(id, path_str)| {
+                let exe_path = resolve_relative(Path::new(path_str), &abs_config);
+                let runtime = raw.runtimes.get(id)
+                    .map(|rt_path| {
+                        let resolved = resolve_relative(Path::new(rt_path), &abs_config);
+                        fs::canonicalize(&resolved).unwrap_or(resolved)
+                    })
+                    .unwrap_or_default();
+                Executable::new(id, exe_path, runtime)
+            })
+            .collect();
+
+        let toolchains = raw.toolchains
+            .into_iter()
+            .map(|(name, steps)| ToolChain::new(&name, steps))
+            .collect();
 
-        let executables = Self::parse_executables(
-            config_data.get("testedExecutablePaths"),
-            config_data.get("runtimes"),
-            &abs_config,
-        );
-        let solution_exe = config_data["solutionExecutable"].as_str().map(Into::into);
-        let toolchains = Self::parse_toolchains(config_data.get("toolchains"));
         let packages = Self::gather_packages(&test_dir, debug_package);
 
         let mut cfg = Self {
@@ -246,7 +275,7 @@ impl Config {
             config_path: abs_config,
             test_dir,
             executables,
-            solution_exe,
+            solution_exe: raw.solution_executable,
             toolchains,
             packages,
             package_filter: package_filter.into(),
@@ -256,52 +285,6 @@ impl Config {
         cfg
     }
 
-    fn parse_executables(
-        exe_data: Option<&serde_json::Value>,
-        runtime_data: Option<&serde_json::Value>,
-        abs_config_path: &Path,
-    ) -> Vec<Executable> {
-        let exe_map = match exe_data.and_then(|v| v.as_object()) {
-            Some(m) => m,
-            None => return Vec::new(),
-        };
-        let rt_map = runtime_data.and_then(|v| v.as_object());
-
-        exe_map
-            .iter()
-            .map(|(id, path_val)| {
-                let exe_path = resolve_relative(
-                    Path::new(path_val.as_str().unwrap_or("")),
-                    abs_config_path,
-                );
-
-                let runtime = rt_map
-                    .and_then(|rts| rts.get(id.as_str()))
-                    .and_then(|v| v.as_str())
-                    .map(|rt_path| {
-                        let resolved = resolve_relative(Path::new(rt_path), abs_config_path);
-                        fs::canonicalize(&resolved).unwrap_or(resolved)
-                    })
-                    .unwrap_or_default();
-
-                Executable::new(id, exe_path, runtime)
-            })
-            .collect()
-    }
-
-    fn parse_toolchains(tc_data: Option<&serde_json::Value>) -> Vec<ToolChain> {
-        tc_data
-            .and_then(|v| v.as_object())
-            .map(|map| {
-                map.iter()
-                    .map(|(name, steps)| {
-                        ToolChain::new(name, steps.as_array().map(|a| a.as_slice()).unwrap_or(&[]))
-                    })
-                    .collect()
-            })
-            .unwrap_or_default()
-    }
-
     fn gather_packages(test_dir: &Path, debug_package: Option<&str>) -> Vec<Package> {
         if let Some(pkg) = debug_package.filter(|p| !p.is_empty()) {
             return vec![Package::new(Path::new(pkg))];
@@ -357,11 +340,11 @@ pub fn load_config(config_path: &Path, args: Option<&RunnerArgs>) -> Option<Conf
     }
 
     let content = fs::read_to_string(config_path).ok().or_else(|| {
-        info!(0, "Config Error: Failed to parse config: {}", config_path.display());
+        info!(0, "Config Error: Failed to read config: {}", config_path.display());
         None
     })?;
 
-    let config_data: serde_json::Value = serde_json::from_str(&content).ok().or_else(|| {
+    let raw: RawConfig = serde_json::from_str(&content).ok().or_else(|| {
         info!(0, "Config Error: Failed to parse config: {}", config_path.display());
         None
     })?;
@@ -370,7 +353,7 @@ pub fn load_config(config_path: &Path, args: Option<&RunnerArgs>) -> Option<Conf
         .and_then(|a| a.debug_package.as_deref());
     let package_filter = args.and_then(|a| a.package_filter.as_deref()).unwrap_or("");
 
-    Some(Config::new(config_path, &config_data, debug_package, package_filter))
+    Some(Config::new(config_path, raw, debug_package, package_filter))
 }
 
 #[cfg(test)]
diff --git a/src/toolchain.rs b/src/toolchain.rs
index ac22996..5b812d2 100644
--- a/src/toolchain.rs
+++ b/src/toolchain.rs
@@ -67,14 +67,8 @@ pub struct ToolChain {
 }
 
 impl ToolChain {
-    pub fn new(name: &str, steps_data: &[serde_json::Value]) -> Self {
-        Self {
-            name: name.into(),
-            steps: steps_data
-                .iter()
-                .filter_map(|v| serde_json::from_value(v.clone()).ok())
-                .collect(),
-        }
+    pub fn new(name: &str, steps: Vec<Step>) -> Self {
+        Self { name: name.into(), steps }
     }
 
     pub fn len(&self) -> usize {

From 1517dcad207103e936172b78dd90988a01432704 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 22:39:28 -0700
Subject: [PATCH 33/45] refactor: imperative directive parsing with functional

---
 src/testfile.rs | 72 +++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 42 deletions(-)

diff --git a/src/testfile.rs b/src/testfile.rs
index 6d63525..b5468e7 100644
--- a/src/testfile.rs
+++ b/src/testfile.rs
@@ -59,13 +59,10 @@ impl TestFile {
                 "Directive Conflict for test {}: Supplied both {inline_dir} and {file_dir}",
                 test_path.file_name().unwrap_or_default().to_string_lossy(),
             )),
-
             (Some(Ok(bytes)), _) => Ok(bytes),
             (Some(Err(e)), _) => Err(e),
-
             (None, Some(Ok(ref_bytes))) => Self::read_referenced_file(test_path, file_dir, &ref_bytes),
             (None, Some(Err(e))) => Err(e),
-
             (None, None) => Ok(Vec::new()),
         }
     }
@@ -95,42 +92,36 @@ impl TestFile {
         comment_syntax: &str,
         directive: &str,
     ) -> Option<DirectiveResult> {
+        let err = || format!("Unknown error occurred while parsing testfile: {}", test_path.display());
+
         let file = match fs::File::open(test_path) {
             Ok(f) => f,
-            Err(_) => return Some(Err(format!(
-                "Unknown error occurred while parsing testfile: {}", test_path.display()
-            ))),
+            Err(_) => return Some(Err(err())),
         };
 
-        let mut contents: Vec<u8> = Vec::new();
-        let mut found_any = false;
-
-        for line in io::BufReader::new(file).lines() {
-            let line = match line {
-                Ok(l) => l,
-                Err(_) => return Some(Err(format!(
-                    "Unknown error occurred while parsing testfile: {}", test_path.display()
-                ))),
-            };
-
-            match (line.find(comment_syntax), line.find(directive)) {
-                (Some(c), Some(d)) if c <= d => {}
-                _ => continue,
-            }
-
-            let rhs = match line.split_once(directive) {
-                Some((_, rhs)) => rhs,
-                None => continue,
-            };
-
-            if found_any {
-                contents.push(b'\n');
-            }
-            contents.extend_from_slice(&str_to_bytes(rhs, true));
-            found_any = true;
+        let values: Result<Vec<Vec<u8>>, String> = io::BufReader::new(file)
+            .lines()
+            .map(|line| line.map_err(|_| err()))
+            .filter_map(|line| {
+                let line = match line {
+                    Ok(l) => l,
+                    Err(e) => return Some(Err(e)),
+                };
+                let comment_pos = line.find(comment_syntax)?;
+                let directive_pos = line.find(directive)?;
+                if comment_pos > directive_pos {
+                    return None;
+                }
+                let (_, rhs) = line.split_once(directive)?;
+                Some(Ok(str_to_bytes(rhs, true)))
+            })
+            .collect();
+
+        match values {
+            Err(e) => Some(Err(e)),
+            Ok(parts) if parts.is_empty() => None,
+            Ok(parts) => Some(Ok(parts.join(&b'\n'))),
         }
-
-        found_any.then(|| Ok(contents))
     }
 
     /// Check if a path is a valid test file (not hidden, not .out/.ins extension).
@@ -146,13 +137,10 @@ impl TestFile {
 
 impl Validate for TestFile {
     fn validate(&self) -> Vec<DragonError> {
-        let mut errors = Vec::new();
-        if let Err(msg) = &self.expected_out {
-            errors.push(DragonError::TestFile(msg.clone()));
-        }
-        if let Err(msg) = &self.input_stream {
-            errors.push(DragonError::TestFile(msg.clone()));
-        }
-        errors
+        [&self.expected_out, &self.input_stream]
+            .into_iter()
+            .filter_map(|r| r.as_ref().err())
+            .map(|msg| DragonError::TestFile(msg.clone()))
+            .collect()
     }
 }

From 4f1dc82feced5e0a0df541e0cae1998886613bc2 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 22:44:38 -0700
Subject: [PATCH 34/45] fix: remove MainError hack around introduced F25 due to
 spec

---
 src/runner.rs | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/src/runner.rs b/src/runner.rs
index 471a55b..f87c5f1 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -227,12 +227,8 @@ impl<'a> ToolChainRunner<'a> {
         test: &Arc<TestFile>,
         exe: &Executable,
     ) -> ControlFlow<TestResult, PipelineState> {
-        let input_stream = if step.uses_ins {
-            test.get_input_stream()
-        } else {
-            b""
-        };
-
+        
+        let input_stream = if step.uses_ins { test.get_input_stream() } else { b"" };
         let output_resolved = self.resolve_output_file(step);
         let output_path = output_resolved.as_ref().map(|(p, _)| p.clone());
         let magic = MagicParams {
@@ -257,7 +253,6 @@ impl<'a> ToolChainRunner<'a> {
         }
 
         let cr = self.run_command(&command, &input_stream);
-
         if cr.timed_out {
             state.command_history.push(cr);
             return ControlFlow::Break(TestResult::timeout(
@@ -275,13 +270,11 @@ impl<'a> ToolChainRunner<'a> {
         let stdout = cr.stdout.clone();
         let stderr = cr.stderr.clone();
         let step_time = (cr.time * 10000.0).round() / 10000.0;
-
         let exit_status = cr.exit_status;
 
         if exit_status == VALGRIND_EXIT_CODE {
             state.memory_leak = true;
         }
-
         state.command_history.push(cr);
 
         if exit_status != 0 && !RESERVED_EXIT_CODES.contains(&exit_status) {
@@ -495,16 +488,6 @@ impl<'a> ToolChainRunner<'a> {
             let exp_error = ERROR_KIND_RE.captures(expected_str);
             let prod_line = ERROR_LINE_RE.captures(produced_str);
             let exp_line = ERROR_LINE_RE.captures(expected_str);
-
-            // MainError hack
-            if let (Some(ref pe), Some(ref ee)) = (&prod_error, &exp_error) {
-                if pe.get(1).map(|m| m.as_str()) == Some("MainError")
-                    && ee.get(1).map(|m| m.as_str()) == Some("MainError")
-                {
-                    return true;
-                }
-            }
-
             match (prod_error, exp_error, prod_line, exp_line) {
                 (Some(_), Some(_), Some(pl), Some(el)) => {
                     pl.get(1).map(|m| m.as_str()) == el.get(1).map(|m| m.as_str())

From f376bd5acb1d39fe976b4099854b4967ee2f03dc Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 22:47:06 -0700
Subject: [PATCH 35/45] refactor: give magic args their own Enum

---
 src/runner.rs    | 42 ++++++++++++++++++++++++++++++++++--------
 src/toolchain.rs |  5 +++--
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/src/runner.rs b/src/runner.rs
index f87c5f1..5b3e580 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -36,6 +36,34 @@ struct PipelineState {
     memory_leak: bool,
 }
 
+/// Magic variable placeholders used in toolchain step arguments.
+pub enum MagicArg {
+    Exe,
+    Input,
+    Output,
+}
+
+impl MagicArg {
+    pub const ALL: &[MagicArg] = &[MagicArg::Exe, MagicArg::Input, MagicArg::Output];
+
+    pub fn pattern(&self) -> &'static str {
+        match self {
+            MagicArg::Exe => "$EXE",
+            MagicArg::Input => "$INPUT",
+            MagicArg::Output => "$OUTPUT",
+        }
+    }
+
+    fn resolve<'a>(&self, params: &'a MagicParams) -> Option<&'a str> {
+        match self {
+            MagicArg::Exe => Some(&params.exe_path),
+            MagicArg::Input if !params.input_file.is_empty() => Some(&params.input_file),
+            MagicArg::Input => None,
+            MagicArg::Output => params.output_file.as_deref(),
+        }
+    }
+}
+
 /// Magic parameter values substituted into toolchain step arguments.
 pub struct MagicParams {
     pub exe_path: String,
@@ -393,7 +421,7 @@ impl<'a> ToolChainRunner<'a> {
     }
 
     fn resolve_output_file(&self, step: &Step) -> Option<(PathBuf, tempfile::TempPath)> {
-        if step.args.iter().any(|a| a.contains("$OUTPUT")) {
+        if step.args.iter().any(|a| a.contains(MagicArg::Output.pattern())) {
             make_empty_tmp_file()
         } else {
             None
@@ -420,13 +448,11 @@ impl<'a> ToolChainRunner<'a> {
 
     fn replace_magic_args(&self, command: &mut ResolvedCommand, params: &MagicParams) {
         for arg in command.args.iter_mut() {
-            if arg.contains("$EXE") {
-                *arg = arg.replace("$EXE", &params.exe_path);
-            } else if arg.contains("$INPUT") && !params.input_file.is_empty() {
-                *arg = arg.replace("$INPUT", &params.input_file);
-            } else if arg.contains("$OUTPUT") {
-                if let Some(ref out) = params.output_file {
-                    *arg = arg.replace("$OUTPUT", out);
+            for magic in MagicArg::ALL {
+                if arg.contains(magic.pattern()) {
+                    if let Some(val) = magic.resolve(params) {
+                        *arg = arg.replace(magic.pattern(), val);
+                    }
                 }
             }
         }
diff --git a/src/toolchain.rs b/src/toolchain.rs
index 5b812d2..4619130 100644
--- a/src/toolchain.rs
+++ b/src/toolchain.rs
@@ -4,6 +4,7 @@ use serde::Deserialize;
 
 use crate::config::Executable;
 use crate::error::{DragonError, Validate};
+use crate::runner::MagicArg;
 
 /// A single step in a toolchain (e.g., compile, link, run).
 #[derive(Debug, Clone, Deserialize)]
@@ -25,8 +26,8 @@ impl Step {
     /// Derive a human-readable step name from the raw exe string and the executable.
     pub fn display_name(&self, exe: &Executable) -> String {
         match self.exe_raw.as_str() {
-            "$EXE" => exe.id.clone(),
-            "$INPUT" => "run".to_string(),
+            s if s == MagicArg::Exe.pattern() => exe.id.clone(),
+            s if s == MagicArg::Input.pattern() => "run".to_string(),
             other => {
                 // Use filename component for paths, bare name as-is
                 Path::new(other)

From 9ce897d0ff3d33aeee17cc4488b3f9283369f23b Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 22:59:45 -0700
Subject: [PATCH 36/45] feature: closes #19 log full failure path

---
 src/cli.rs     |  4 ++++
 src/harness.rs | 20 +++++++++++++++-----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/cli.rs b/src/cli.rs
index fef29fa..d386d65 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -72,6 +72,10 @@ pub struct RunnerArgs {
     /// Stop on first failure
     #[arg(short = 'f', long = "fast-fail")]
     pub fast_fail: bool,
+
+    /// Print full file paths for test results instead of just the filename
+    #[arg(long = "full-path")]
+    pub full_path: bool,
 }
 
 /// CMPUT 415 testing utility
diff --git a/src/harness.rs b/src/harness.rs
index fb15373..ffed740 100644
--- a/src/harness.rs
+++ b/src/harness.rs
@@ -8,6 +8,16 @@ use crate::info;
 use crate::cli::{Mode, RunnerArgs};
 use crate::config::{Config, Executable, Package};
 use crate::runner::{TestResult, ToolChainRunner};
+use crate::testfile::TestFile;
+
+/// Returns the full path or just the filename depending on the flag.
+fn test_display_name(test: &TestFile, full_path: bool) -> String {
+    if full_path {
+        test.path.display().to_string()
+    } else {
+        test.file.clone()
+    }
+}
 
 /// Counters passed through hooks during iteration.
 pub struct SubPackageCounters {
@@ -133,9 +143,9 @@ impl RegularHarness {
 impl SequentialTestHarness for RegularHarness {
     fn run_passed(&self) -> bool { self.passed }
 
-    fn process_test_result(&mut self, result: TestResult, _cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
+    fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
         let indent = 4 + counters.depth;
-        let test_name = &result.test.file;
+        let test_name = test_display_name(&result.test, cli_args.full_path);
         if result.did_pass {
             let tag = if result.error_test { "[E-PASS] " } else { "[PASS] " };
             info!(indent, "{}{}", tag.green(), test_name);
@@ -275,12 +285,12 @@ impl MemoryCheckHarness {
 impl SequentialTestHarness for MemoryCheckHarness {
     fn run_passed(&self) -> bool { self.passed }
 
-    fn process_test_result(&mut self, result: TestResult, _cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
+    fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
         self.test_count += 1;
         counters.test_count += 1;
         let indent = 4 + counters.depth;
 
-        let test_name = &result.test.file;
+        let test_name = test_display_name(&result.test, cli_args.full_path);
         if result.did_pass {
             info!(indent, "{}{}", "[PASS] ".green(), test_name);
             counters.pass_count += 1;
@@ -336,7 +346,7 @@ impl SequentialTestHarness for PerformanceTestingHarness {
         }
 
         let indent = 4 + counters.depth;
-        let test_name = &result.test.file;
+        let test_name = test_display_name(&result.test, cli_args.full_path);
         if result.did_pass {
             counters.pass_count += 1;
             info!(indent, "{}{}", "[PASS] ".green(), test_name);

From ae73cecd278b9555fb4bc3b2cc9e286ec143d511 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 23:00:05 -0700
Subject: [PATCH 37/45] impl: more descript DragonError variants

---
 src/config.rs    | 42 +++++++++++++++++++-----------------------
 src/error.rs     | 32 ++++++++++++++++++++++++++++----
 src/main.rs      | 12 ++++++------
 src/testfile.rs  | 35 +++++++++++++++++------------------
 src/toolchain.rs | 15 ++++++++-------
 5 files changed, 78 insertions(+), 58 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index f3b2bd3..3afdcdb 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -6,7 +6,7 @@ use std::sync::Arc;
 
 use serde::Deserialize;
 
-use crate::{info, debug, trace, trace2};
+use crate::{debug, trace, trace2};
 use crate::cli::RunnerArgs;
 use crate::error::{DragonError, Validate};
 use crate::testfile::TestFile;
@@ -200,14 +200,16 @@ impl Validate for Executable {
     fn validate(&self) -> Vec<DragonError> {
         let mut errors = Vec::new();
         if !self.exe_path.exists() {
-            errors.push(DragonError::Config(format!(
-                "Cannot find binary file: {} in Executable: {}", self.exe_path.display(), self.id
-            )));
+            errors.push(DragonError::MissingFile {
+                path: self.exe_path.clone(),
+                context: format!("Executable '{}'", self.id),
+            });
         }
         if !self.runtime.as_os_str().is_empty() && !self.runtime.exists() {
-            errors.push(DragonError::Config(format!(
-                "Cannot find runtime file: {} in Executable: {}", self.runtime.display(), self.id
-            )));
+            errors.push(DragonError::MissingFile {
+                path: self.runtime.clone(),
+                context: format!("Executable '{}' runtime", self.id),
+            });
         }
         errors
     }
@@ -301,9 +303,9 @@ impl Config {
     fn collect_errors(&self) -> Vec<DragonError> {
         let mut errors = Vec::new();
         if !self.test_dir.exists() {
-            errors.push(DragonError::Config(format!(
-                "Cannot find test directory: {}", self.test_dir.display()
-            )));
+            errors.push(DragonError::MissingTestDir {
+                path: self.test_dir.clone(),
+            });
         }
         errors.extend(
             self.executables.iter().flat_map(|e| e.validate())
@@ -334,26 +336,20 @@ impl fmt::Display for Config {
 }
 
 /// Load and parse a JSON configuration file.
-pub fn load_config(config_path: &Path, args: Option<&RunnerArgs>) -> Option<Config> {
-    if !config_path.exists() {
-        return None;
-    }
+pub fn load_config(config_path: &Path, args: Option<&RunnerArgs>) -> Result<Config, DragonError> {
+    let path = config_path.to_path_buf();
 
-    let content = fs::read_to_string(config_path).ok().or_else(|| {
-        info!(0, "Config Error: Failed to read config: {}", config_path.display());
-        None
-    })?;
+    let content = fs::read_to_string(config_path)
+        .map_err(|_| DragonError::ConfigRead { path: path.clone() })?;
 
-    let raw: RawConfig = serde_json::from_str(&content).ok().or_else(|| {
-        info!(0, "Config Error: Failed to parse config: {}", config_path.display());
-        None
-    })?;
+    let raw: RawConfig = serde_json::from_str(&content)
+        .map_err(|e| DragonError::ConfigParse { path: path.clone(), reason: e.to_string() })?;
 
     let debug_package = args
         .and_then(|a| a.debug_package.as_deref());
     let package_filter = args.and_then(|a| a.package_filter.as_deref()).unwrap_or("");
 
-    Some(Config::new(config_path, raw, debug_package, package_filter))
+    Ok(Config::new(config_path, raw, debug_package, package_filter))
 }
 
 #[cfg(test)]
diff --git a/src/error.rs b/src/error.rs
index 7ff9af8..635b2bd 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,11 +1,35 @@
+use std::path::PathBuf;
+
 use thiserror::Error;
 
 #[derive(Debug, Clone, Error)]
 pub enum DragonError {
-    #[error("Config Error: {0}")]
-    Config(String),
-    #[error("Testfile Error: {0}")]
-    TestFile(String),
+    #[error("Failed to read config file: {path}")]
+    ConfigRead { path: PathBuf },
+
+    #[error("Failed to parse config file {path}: {reason}")]
+    ConfigParse { path: PathBuf, reason: String },
+
+    #[error("Missing file: {path} ({context})")]
+    MissingFile { path: PathBuf, context: String },
+
+    #[error("Missing test directory: {path}")]
+    MissingTestDir { path: PathBuf },
+
+    #[error("Missing required field '{field}' in {context}")]
+    MissingField { field: String, context: String },
+
+    #[error("Directive conflict in {test}: both {inline} and {file_dir} supplied")]
+    DirectiveConflict { test: String, inline: String, file_dir: String },
+
+    #[error("Failed to read test file: {path}")]
+    TestFileRead { path: PathBuf },
+
+    #[error("Referenced file not found: {path} (directive {directive} in test {test})")]
+    ReferencedFileNotFound { path: PathBuf, directive: String, test: PathBuf },
+
+    #[error("Failed to read referenced file: {path}")]
+    ReferencedFileRead { path: PathBuf },
 }
 
 pub trait Validate {
diff --git a/src/main.rs b/src/main.rs
index bcb5690..cf1d600 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -14,9 +14,9 @@ fn main() {
         }
         CliAction::Serve { config_file, bind, timeout, max_concurrent } => {
             let config = match load_config(&config_file, None) {
-                Some(c) => c,
-                None => {
-                    info!(0, "Could not open config file: {}", config_file.display());
+                Ok(c) => c,
+                Err(e) => {
+                    info!(0, "{}", format!("{e}").red());
                     std::process::exit(1);
                 }
             };
@@ -36,9 +36,9 @@ fn main() {
 
     debug!(0, "{:?}", cli_args);
     let config = match load_config(&cli_args.config_file, Some(&cli_args)) {
-        Some(c) => c,
-        None => {
-            info!(0, "Could not open config file: {}", cli_args.config_file.display());
+        Ok(c) => c,
+        Err(e) => {
+            info!(0, "{}", format!("{e}").red());
             std::process::exit(1);
         }
     };
diff --git a/src/testfile.rs b/src/testfile.rs
index b5468e7..756071a 100644
--- a/src/testfile.rs
+++ b/src/testfile.rs
@@ -5,8 +5,8 @@ use std::path::{Path, PathBuf};
 use crate::error::{DragonError, Validate};
 use crate::util::str_to_bytes;
 
-/// Result of parsing a directive — either successfully read bytes, or an error message.
-pub type DirectiveResult = Result<Vec<u8>, String>;
+/// Result of parsing a directive — either successfully read bytes, or a structured error.
+pub type DirectiveResult = Result<Vec<u8>, DragonError>;
 
 /// Represents a single test case file with parsed directives.
 #[derive(Debug, Clone)]
@@ -55,10 +55,11 @@ impl TestFile {
         let file_ref = Self::parse_directive(test_path, comment_syntax, file_dir);
 
         match (inline, file_ref) {
-            (Some(Ok(_)), Some(Ok(_))) => Err(format!(
-                "Directive Conflict for test {}: Supplied both {inline_dir} and {file_dir}",
-                test_path.file_name().unwrap_or_default().to_string_lossy(),
-            )),
+            (Some(Ok(_)), Some(Ok(_))) => Err(DragonError::DirectiveConflict {
+                test: test_path.file_name().unwrap_or_default().to_string_lossy().into_owned(),
+                inline: inline_dir.into(),
+                file_dir: file_dir.into(),
+            }),
             (Some(Ok(bytes)), _) => Ok(bytes),
             (Some(Err(e)), _) => Err(e),
             (None, Some(Ok(ref_bytes))) => Self::read_referenced_file(test_path, file_dir, &ref_bytes),
@@ -74,15 +75,15 @@ impl TestFile {
         let full_path = parent.join(&file_str);
 
         if !full_path.exists() {
-            return Err(format!(
-                "Failed to locate path supplied to {directive}\n\tTest:{}\n\tPath:{}\n",
-                test_path.display(),
-                full_path.display(),
-            ));
+            return Err(DragonError::ReferencedFileNotFound {
+                path: full_path,
+                directive: directive.into(),
+                test: test_path.into(),
+            });
         }
 
         fs::read(&full_path)
-            .map_err(|_| format!("Failed to read file {}", full_path.display()))
+            .map_err(|_| DragonError::ReferencedFileRead { path: full_path })
     }
 
     /// Scan a test file for lines matching `// DIRECTIVE:value` and collect the values.
@@ -92,16 +93,14 @@ impl TestFile {
         comment_syntax: &str,
         directive: &str,
     ) -> Option<DirectiveResult> {
-        let err = || format!("Unknown error occurred while parsing testfile: {}", test_path.display());
-
         let file = match fs::File::open(test_path) {
             Ok(f) => f,
-            Err(_) => return Some(Err(err())),
+            Err(_) => return Some(Err(DragonError::TestFileRead { path: test_path.into() })),
         };
 
-        let values: Result<Vec<Vec<u8>>, String> = io::BufReader::new(file)
+        let values: Result<Vec<Vec<u8>>, DragonError> = io::BufReader::new(file)
             .lines()
-            .map(|line| line.map_err(|_| err()))
+            .map(|line| line.map_err(|_| DragonError::TestFileRead { path: test_path.into() }))
             .filter_map(|line| {
                 let line = match line {
                     Ok(l) => l,
@@ -140,7 +139,7 @@ impl Validate for TestFile {
         [&self.expected_out, &self.input_stream]
             .into_iter()
             .filter_map(|r| r.as_ref().err())
-            .map(|msg| DragonError::TestFile(msg.clone()))
+            .cloned()
             .collect()
     }
 }
diff --git a/src/toolchain.rs b/src/toolchain.rs
index 4619130..f3db041 100644
--- a/src/toolchain.rs
+++ b/src/toolchain.rs
@@ -44,16 +44,17 @@ impl Validate for Step {
     fn validate(&self) -> Vec<DragonError> {
         let mut errors = Vec::new();
         if self.exe_raw.is_empty() {
-            errors.push(DragonError::Config(
-                "Missing required field 'exe' in Step".into(),
-            ));
+            errors.push(DragonError::MissingField {
+                field: "exe".into(),
+                context: "Step".into(),
+            });
         } else if !self.exe_raw.starts_with('$') && self.exe_raw.contains('/') {
             // Only check existence for paths (containing /), not bare names resolved via $PATH
             if !Path::new(&self.exe_raw).exists() {
-                errors.push(DragonError::Config(format!(
-                    "Cannot find exe '{}' in Step",
-                    self.exe_raw
-                )));
+                errors.push(DragonError::MissingFile {
+                    path: self.exe_raw.clone().into(),
+                    context: "Step".into(),
+                });
             }
         }
         errors

From b82bd2ca5ce8fa62b7294b9c9bda69a5f29c28d1 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 23:05:34 -0700
Subject: [PATCH 38/45] update: README

---
 README.md | 77 +++++++++++++++++++++++++++----------------------------
 1 file changed, 38 insertions(+), 39 deletions(-)

diff --git a/README.md b/README.md
index d3718ac..2315f14 100644
--- a/README.md
+++ b/README.md
@@ -4,19 +4,18 @@ A test runner for CMPUT 415 Compiler Design that services both student testing a
 
 ## Installation
 
-**Requirements:** Python ≥ 3.8
+**Requirements:** Rust toolchain (cargo)
 
 ```bash
 git clone https://github.com/cmput415/Dragon-Runner.git
 cd Dragon-Runner
-pip install .
+cargo install --path .
 ```
-Some newer versions of python prevent system-wide package installations by default. To get around this use a virtual environment or `--break-system-packages`. If `dragon-runner`is not found in your `$PATH` after install, ensure `~/.local/bin` is added.
 
 ## Quick Start
 
 ```bash
-# Run tests normally
+# Run tests normally (mode defaults to regular)
 dragon-runner config.json
 
 # Run in tournament mode (for grading)
@@ -26,7 +25,7 @@ dragon-runner tournament config.json
 dragon-runner memcheck valgrindConfig.json
 
 # Start HTTP server for explorer
-dragon-runner serve /path/to/configs
+dragon-runner serve /path/to/config.json
 ```
 
 ## Configuration
@@ -37,23 +36,19 @@ Dragon-Runner uses JSON configuration files to define test packages, executables
 
 ```json
 {
-  "testDir": "../packages/CPackage", 
+  "testDir": "../packages/CPackage",
   "testedExecutablePaths": {
     "gcc": "/usr/bin/gcc"
   },
   "toolchains": {
     "compile-and-run": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE", 
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": ["$INPUT", "-o", "$OUTPUT"],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
@@ -76,20 +71,18 @@ Dragon-Runner uses JSON configuration files to define test packages, executables
 #### Toolchain Steps
 | Property | Description | Required |
 |----------|-------------|----------|
-| `stepName` | Human-readable step name | ✓ |
-| `executablePath` | Path to executable (use `$EXE`, `$INPUT`) | ✓ |
-| `arguments` | Command arguments list | ✓ |
-| `output` | Output file path (optional) | |
-| `allowError` | Allow non-zero exit codes (optional) | |
-| `usesInStr` | Use test input stream as stdin (optional) | |
-| `usesRuntime` | Load runtime library (optional) | |
+| `exe` | Path to executable (supports `$EXE`, `$INPUT`, `$OUTPUT`) | ✓ |
+| `args` | Command arguments list (default: `[]`) | |
+| `allowError` | Allow non-zero exit codes (default: `false`) | |
+| `usesInStr` | Use test input stream as stdin (default: `false`) | |
+| `usesRuntime` | Load runtime library (default: `false`) | |
 
 #### Magic Variables
-- `$EXE` - Path to the tested executable
-- `$INPUT` - Input file (testfile for first step, previous output for others)
-- `$OUTPUT` - Output file for next step
-- `$RT_PATH` - Runtime library directory
-- `$RT_LIB` - Runtime library name
+- `$EXE` — Path to the tested executable
+- `$INPUT` — Input file (test file for first step, previous output for later steps)
+- `$OUTPUT` — Temporary output file for the next step
+
+Environment variables (`$RT_PATH`, `$RT_LIB`, etc.) are also expanded in step arguments.
 
 ## Test File Format
 
@@ -106,12 +99,13 @@ int main() {
 ```
 
 ### Directives
-- `INPUT:` - Single line of stdin (no newline)
-- `INPUT_FILE:` - Path to input file
-- `CHECK:` - Expected stdout (no newline)  
-- `CHECK_FILE:` - Path to expected output file
+- `CHECK:` — Expected stdout line (no trailing newline)
+- `CHECK_FILE:` — Path to expected output file
+- `INPUT:` — Single line of stdin (no trailing newline)
+- `INPUT_FILE:` — Path to input file
+- `SKIP` — Skip this test
 
-Multiple `INPUT:` and `CHECK:` directives are supported. `INPUT:` and `INPUT_FILE:` cannot be used together.
+Multiple `CHECK:` and `INPUT:` directives are concatenated with newlines. Inline and file variants of the same directive cannot be mixed in one test.
 
 ## Command Line Reference
 
@@ -120,13 +114,15 @@ Multiple `INPUT:` and `CHECK:` directives are supported. `INPUT:` and `INPUT_FIL
 dragon-runner [mode] config.json [options...]
 ```
 
+If no mode subcommand is given, `regular` is assumed.
+
 ### Modes
-- `regular` (default) - Standard test execution
-- `tournament` - Cross-product testing for grading
-- `perf` - Performance benchmarking
-- `memcheck` - Memory leak detection
-- `serve` - HTTP server mode
-- `script` - Run grading scripts
+- `regular` (default) — Standard test execution
+- `tournament` — Cross-product testing for grading
+- `perf` — Performance benchmarking
+- `memcheck` — Memory leak detection via valgrind
+- `serve` — HTTP server mode
+- `script` — Run grading scripts
 
 ### Options
 | Option | Description |
@@ -135,10 +131,13 @@ dragon-runner [mode] config.json [options...]
 | `--fail-log FILE` | Log failures to file |
 | `--verify` | Verify package exists for CCID |
 | `--debug-package PATH` | Test single package |
+| `-p, --package PATTERN` | Filter packages by glob pattern |
 | `-t, --time` | Show execution times |
 | `-v, --verbosity` | Increase output verbosity (repeat for more) |
-| `-s, --show-testcase` | Display test file contents |
+| `-s, --show-testcase` | Display test file contents on failure |
 | `-o, --output FILE` | Output file for results |
+| `-f, --fast-fail` | Stop on first failure |
+| `--full-path` | Print full file paths for test results |
 
 ### Examples
 
@@ -152,8 +151,8 @@ dragon-runner tournament -vv config.json
 # Performance testing with 5-second timeout
 dragon-runner perf --timeout 5.0 config.json
 
-# Serve configs on port 8080
-dragon-runner serve --port 8080 /path/to/configs
+# Serve config on custom address
+dragon-runner serve --bind 0.0.0.0:8080 config.json
 
 # Run grading script
 dragon-runner script build.py /path/to/submissions build.log 4

From 5d2cd925dda1a74509f7f9ac21d9d6df79ba15e1 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 23:10:58 -0700
Subject: [PATCH 39/45] feature: closes #18 adding directive to skip tests

---
 src/harness.rs                                | 53 +++++++++++++++----
 src/runner.rs                                 | 26 +++++++++
 src/testfile.rs                               | 50 +++++++++++++----
 .../RegularPass/valid_tests/021_skip_false.c  |  8 +++
 4 files changed, 119 insertions(+), 18 deletions(-)
 create mode 100644 tests/packages/CPackage/RegularPass/valid_tests/021_skip_false.c

diff --git a/src/harness.rs b/src/harness.rs
index ffed740..cfbe850 100644
--- a/src/harness.rs
+++ b/src/harness.rs
@@ -10,6 +10,15 @@ use crate::config::{Config, Executable, Package};
 use crate::runner::{TestResult, ToolChainRunner};
 use crate::testfile::TestFile;
 
+/// Format a skip count suffix for summary lines.
+fn skip_suffix(skip_count: usize) -> String {
+    if skip_count > 0 {
+        format!(" ({skip_count} skipped)")
+    } else {
+        String::new()
+    }
+}
+
 /// Returns the full path or just the filename depending on the flag.
 fn test_display_name(test: &TestFile, full_path: bool) -> String {
     if full_path {
@@ -23,6 +32,7 @@ fn test_display_name(test: &TestFile, full_path: bool) -> String {
 pub struct SubPackageCounters {
     pub pass_count: usize,
     pub test_count: usize,
+    pub skip_count: usize,
     pub depth: usize,
 }
 
@@ -56,6 +66,7 @@ pub trait SequentialTestHarness {
             let exe_env = exe.runtime_env();
             let mut exe_pass = 0;
             let mut exe_total = 0;
+            let mut exe_skip = 0;
 
             for tc in &config.toolchains {
                 let runner = ToolChainRunner::new(tc, cli_args.timeout)
@@ -64,10 +75,12 @@ pub trait SequentialTestHarness {
                 info!(1, "Running Toolchain: {}", tc.name);
                 let mut tc_pass = 0;
                 let mut tc_total = 0;
+                let mut tc_skip = 0;
 
                 for pkg in &config.packages {
                     let mut pkg_pass = 0;
                     let mut pkg_total = 0;
+                    let mut pkg_skip = 0;
                     info!(2, "Entering package {}", pkg.name);
 
                     for spkg in &pkg.subpackages {
@@ -78,7 +91,7 @@ pub trait SequentialTestHarness {
                         }
 
                         info!(3 + spkg.depth, "Entering subpackage {}", spkg.name);
-                        let mut counters = SubPackageCounters { pass_count: 0, test_count: 0, depth: spkg.depth };
+                        let mut counters = SubPackageCounters { pass_count: 0, test_count: 0, skip_count: 0, depth: spkg.depth };
                         self.pre_subpackage_hook(spkg);
 
                         let results: Vec<TestResult> = spkg.tests
@@ -98,22 +111,25 @@ pub trait SequentialTestHarness {
                         }
 
                         self.post_subpackage_hook(&counters);
-                        info!(3 + spkg.depth, "Subpackage Passed:  {} / {}", counters.pass_count, counters.test_count);
+                        info!(3 + spkg.depth, "Subpackage Passed:  {} / {}{}", counters.pass_count, counters.test_count, skip_suffix(counters.skip_count));
                         pkg_pass += counters.pass_count;
                         pkg_total += counters.test_count;
+                        pkg_skip += counters.skip_count;
                     }
 
-                    info!(2, "Packaged Passed:  {} / {}", pkg_pass, pkg_total);
+                    info!(2, "Packaged Passed:  {} / {}{}", pkg_pass, pkg_total, skip_suffix(pkg_skip));
                     tc_pass += pkg_pass;
                     tc_total += pkg_total;
+                    tc_skip += pkg_skip;
                 }
 
-                info!(1, "Toolchain Passed:  {} / {}", tc_pass, tc_total);
+                info!(1, "Toolchain Passed:  {} / {}{}", tc_pass, tc_total, skip_suffix(tc_skip));
                 exe_pass += tc_pass;
                 exe_total += tc_total;
+                exe_skip += tc_skip;
             }
 
-            info!(0, "Executable Passed:  {} / {}", exe_pass, exe_total);
+            info!(0, "Executable Passed:  {} / {}{}", exe_pass, exe_total, skip_suffix(exe_skip));
             self.post_executable_hook();
         }
 
@@ -146,6 +162,11 @@ impl SequentialTestHarness for RegularHarness {
     fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
         let indent = 4 + counters.depth;
         let test_name = test_display_name(&result.test, cli_args.full_path);
+        if result.skipped {
+            info!(indent, "{}{}", "[SKIP] ".yellow(), test_name);
+            counters.skip_count += 1;
+            return;
+        }
         if result.did_pass {
             let tag = if result.error_test { "[E-PASS] " } else { "[PASS] " };
             info!(indent, "{}{}", tag.green(), test_name);
@@ -233,6 +254,10 @@ impl TournamentHarness {
                     let tests = a_pkg.subpackages.iter().flat_map(|s| &s.tests);
                     for test in tests {
                         let result = runner.run(test, def_exe);
+                        if result.skipped {
+                            print!("{}", ".".yellow());
+                            continue;
+                        }
                         let is_solution = solution_exe == Some(&def_exe.id);
 
                         if result.did_pass {
@@ -286,11 +311,16 @@ impl SequentialTestHarness for MemoryCheckHarness {
     fn run_passed(&self) -> bool { self.passed }
 
     fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
+        let indent = 4 + counters.depth;
+        let test_name = test_display_name(&result.test, cli_args.full_path);
+        if result.skipped {
+            info!(indent, "{}{}", "[SKIP] ".yellow(), test_name);
+            counters.skip_count += 1;
+            return;
+        }
         self.test_count += 1;
         counters.test_count += 1;
-        let indent = 4 + counters.depth;
 
-        let test_name = test_display_name(&result.test, cli_args.full_path);
         if result.did_pass {
             info!(indent, "{}{}", "[PASS] ".green(), test_name);
             counters.pass_count += 1;
@@ -341,12 +371,17 @@ impl SequentialTestHarness for PerformanceTestingHarness {
     fn run_passed(&self) -> bool { self.passed }
 
     fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
+        let indent = 4 + counters.depth;
+        let test_name = test_display_name(&result.test, cli_args.full_path);
+        if result.skipped {
+            info!(indent, "{}{}", "[SKIP] ".yellow(), test_name);
+            counters.skip_count += 1;
+            return;
+        }
         if self.first_exec {
             self.testfile_col.push(result.test.file.clone());
         }
 
-        let indent = 4 + counters.depth;
-        let test_name = test_display_name(&result.test, cli_args.full_path);
         if result.did_pass {
             counters.pass_count += 1;
             info!(indent, "{}{}", "[PASS] ".green(), test_name);
diff --git a/src/runner.rs b/src/runner.rs
index 5b3e580..18d03c6 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -112,6 +112,7 @@ pub struct TestResult {
     pub did_timeout: bool,
     pub error_test: bool,
     pub memory_leak: bool,
+    pub skipped: bool,
     pub command_history: Vec<CommandResult>,
     pub gen_output: Option<Vec<u8>>,
     pub time: Option<f64>,
@@ -119,6 +120,21 @@ pub struct TestResult {
 }
 
 impl TestResult {
+    fn skipped(test: &Arc<TestFile>) -> Self {
+        Self {
+            test: Arc::clone(test),
+            did_pass: false,
+            did_timeout: false,
+            error_test: false,
+            memory_leak: false,
+            skipped: true,
+            command_history: Vec::new(),
+            gen_output: None,
+            time: None,
+            failing_step: None,
+        }
+    }
+
     fn finished(
         test: &Arc<TestFile>,
         history: Vec<CommandResult>,
@@ -133,6 +149,7 @@ impl TestResult {
             did_timeout: false,
             error_test: false,
             memory_leak,
+            skipped: false,
             command_history: history,
             gen_output: Some(output),
             time: Some(time),
@@ -152,6 +169,7 @@ impl TestResult {
             did_timeout: true,
             error_test: false,
             memory_leak: false,
+            skipped: false,
             command_history: history,
             gen_output: None,
             time: Some(timeout),
@@ -166,6 +184,7 @@ impl TestResult {
             did_timeout: false,
             error_test: false,
             memory_leak: false,
+            skipped: false,
             command_history: history,
             gen_output: None,
             time: None,
@@ -187,6 +206,7 @@ impl TestResult {
             did_timeout: false,
             error_test: true,
             memory_leak,
+            skipped: false,
             command_history: history,
             gen_output: Some(stderr),
             time: None,
@@ -229,6 +249,9 @@ impl<'a> ToolChainRunner<'a> {
 
     /// Run each step of the toolchain for a given test and executable.
     pub fn run(&self, test: &Arc<TestFile>, exe: &Executable) -> TestResult {
+        if test.skip {
+            return TestResult::skipped(test);
+        }
         let tc_len = self.tc.len();
         let init = PipelineState {
             input_file: test.path.clone(),
@@ -559,6 +582,9 @@ mod tests {
                     for spkg in &pkg.subpackages {
                         for test in &spkg.tests {
                             let result = runner.run(test, exe);
+                            if result.skipped {
+                                continue;
+                            }
                             assert_eq!(
                                 result.did_pass, expected_result,
                                 "Test {} expected {} but got {}",
diff --git a/src/testfile.rs b/src/testfile.rs
index 756071a..2f717f2 100644
--- a/src/testfile.rs
+++ b/src/testfile.rs
@@ -8,6 +8,28 @@ use crate::util::str_to_bytes;
 /// Result of parsing a directive — either successfully read bytes, or a structured error.
 pub type DirectiveResult = Result<Vec<u8>, DragonError>;
 
+/// Recognized directives that can appear in test files.
+pub enum Directive {
+    Check,
+    CheckFile,
+    Input,
+    InputFile,
+    Skip,
+}
+
+impl Directive {
+    /// The string tag to scan for in test file comments.
+    pub fn tag(&self) -> &'static str {
+        match self {
+            Directive::Check => "CHECK:",
+            Directive::CheckFile => "CHECK_FILE:",
+            Directive::Input => "INPUT:",
+            Directive::InputFile => "INPUT_FILE:",
+            Directive::Skip => "SKIP",
+        }
+    }
+}
+
 /// Represents a single test case file with parsed directives.
 #[derive(Debug, Clone)]
 pub struct TestFile {
@@ -18,6 +40,7 @@ pub struct TestFile {
     pub comment_syntax: String,
     pub expected_out: DirectiveResult,
     pub input_stream: DirectiveResult,
+    pub skip: bool,
 }
 
 impl TestFile {
@@ -30,10 +53,17 @@ impl TestFile {
         let file = format!("{stem}{extension}");
         let comment_syntax = "//".to_string();
 
-        let expected_out = Self::resolve_directive(test_path, &comment_syntax, "CHECK:", "CHECK_FILE:");
-        let input_stream = Self::resolve_directive(test_path, &comment_syntax, "INPUT:", "INPUT_FILE:");
-
-        Self { path: test_path.into(), stem, extension, file, comment_syntax, expected_out, input_stream }
+        let expected_out = Self::resolve_directive(
+            test_path, &comment_syntax,
+            Directive::Check.tag(), Directive::CheckFile.tag(),
+        );
+        let input_stream = Self::resolve_directive(
+            test_path, &comment_syntax,
+            Directive::Input.tag(), Directive::InputFile.tag(),
+        );
+        let skip = Self::parse_directive(test_path, &comment_syntax, Directive::Skip.tag()).is_some();
+
+        Self { path: test_path.into(), stem, extension, file, comment_syntax, expected_out, input_stream, skip }
     }
 
     pub fn get_expected_out(&self) -> &[u8] {
@@ -54,16 +84,18 @@ impl TestFile {
         let inline = Self::parse_directive(test_path, comment_syntax, inline_dir);
         let file_ref = Self::parse_directive(test_path, comment_syntax, file_dir);
 
+        // Transpose Option<Result> → Result<Option> so we can use `?` for errors.
+        let inline = inline.transpose()?;
+        let file_ref = file_ref.transpose()?;
+
         match (inline, file_ref) {
-            (Some(Ok(_)), Some(Ok(_))) => Err(DragonError::DirectiveConflict {
+            (Some(_), Some(_)) => Err(DragonError::DirectiveConflict {
                 test: test_path.file_name().unwrap_or_default().to_string_lossy().into_owned(),
                 inline: inline_dir.into(),
                 file_dir: file_dir.into(),
             }),
-            (Some(Ok(bytes)), _) => Ok(bytes),
-            (Some(Err(e)), _) => Err(e),
-            (None, Some(Ok(ref_bytes))) => Self::read_referenced_file(test_path, file_dir, &ref_bytes),
-            (None, Some(Err(e))) => Err(e),
+            (Some(bytes), _) => Ok(bytes),
+            (None, Some(ref_bytes)) => Self::read_referenced_file(test_path, file_dir, &ref_bytes),
             (None, None) => Ok(Vec::new()),
         }
     }
diff --git a/tests/packages/CPackage/RegularPass/valid_tests/021_skip_false.c b/tests/packages/CPackage/RegularPass/valid_tests/021_skip_false.c
new file mode 100644
index 0000000..7cf5445
--- /dev/null
+++ b/tests/packages/CPackage/RegularPass/valid_tests/021_skip_false.c
@@ -0,0 +1,8 @@
+// SKIP
+// CHECK:2
+
+#include <stdio.h>
+
+int main() {
+  printf("1");
+}

From 3161ba97044e611774f3bb27f756c0c4bcbe78e6 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 23:13:56 -0700
Subject: [PATCH 40/45] fix: flush stdout after each test for smooth view

---
 src/log.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/log.rs b/src/log.rs
index 0d10492..143f328 100644
--- a/src/log.rs
+++ b/src/log.rs
@@ -1,3 +1,4 @@
+use std::io::Write;
 use std::sync::atomic::{AtomicU32, Ordering};
 
 static DEBUG_LEVEL: AtomicU32 = AtomicU32::new(0);
@@ -13,6 +14,7 @@ pub fn set_debug_level(level: u32) {
 pub fn log(level: u32, indent: usize, msg: &str) {
     if DEBUG_LEVEL.load(Ordering::Relaxed) >= level {
         println!("{:indent$}{msg}", "", indent = indent);
+        let _ = std::io::stdout().flush();
     }
 }
 

From d9a99e4ac671dbd3efa96e96fed0d3b9e2855044 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Sun, 1 Mar 2026 23:14:57 -0700
Subject: [PATCH 41/45] fix: create itermediate files in tmp as to not clutter
 cwd

---
 src/harness.rs | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/harness.rs b/src/harness.rs
index cfbe850..3a8aabe 100644
--- a/src/harness.rs
+++ b/src/harness.rs
@@ -418,7 +418,7 @@ impl SequentialTestHarness for PerformanceTestingHarness {
 
 #[cfg(test)]
 mod tests {
-    use std::path::{Path, PathBuf};
+    use std::path::PathBuf;
 
     use crate::cli::{Mode, RunnerArgs};
     use crate::config::load_config;
@@ -434,12 +434,15 @@ mod tests {
         let path = config_path("ConfigGrade.json");
         let config = load_config(&path, None).expect("config should load");
 
-        let failure_log = Path::new("Failures_rs.txt");
-        let _ = std::fs::remove_file(failure_log);
+        let tmp = tempfile::tempdir().expect("failed to create temp dir");
+        let prev_dir = std::env::current_dir().unwrap();
+        std::env::set_current_dir(tmp.path()).unwrap();
+
+        let failure_log = tmp.path().join("Failures_rs.txt");
 
         let args = RunnerArgs {
             mode: Mode::Tournament,
-            failure_log: Some(failure_log.into()),
+            failure_log: Some(failure_log.clone()),
             timeout: 2.0,
             ..Default::default()
         };
@@ -452,6 +455,6 @@ mod tests {
             "failure log should have been created"
         );
 
-        let _ = std::fs::remove_file(failure_log);
+        std::env::set_current_dir(prev_dir).unwrap();
     }
 }

From 9de348fa6039156feff528477777ca89b3faf384 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Mon, 2 Mar 2026 11:21:47 -0700
Subject: [PATCH 42/45] update: nix flake

---
 CLAUDE.md  |  0
 flake.lock | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 flake.nix  | 34 +++++++++---------------------
 3 files changed, 71 insertions(+), 24 deletions(-)
 delete mode 100644 CLAUDE.md
 create mode 100644 flake.lock

diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 100644
index e69de29..0000000
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..67cca4c
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,61 @@
+{
+  "nodes": {
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1772433332,
+        "narHash": "sha256-izhTDFKsg6KeVBxJS9EblGeQ8y+O8eCa6RcW874vxEc=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "cf59864ef8aa2e178cccedbe2c178185b0365705",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/flake.nix b/flake.nix
index 00ed455..0983927 100644
--- a/flake.nix
+++ b/flake.nix
@@ -10,45 +10,31 @@
     flake-utils.lib.eachDefaultSystem (system:
       let
         pkgs = nixpkgs.legacyPackages.${system};
-        
-        python-packages = ps: with ps; [
-          colorama
-          pytest
-          numpy
-          flask
-          flask-cors
-        ];
-        
-        python-with-packages = pkgs.python3.withPackages python-packages;
       in
       {
         devShells.default = pkgs.mkShell {
           buildInputs = with pkgs; [
-            python-with-packages
-            python3Packages.pip
-            python3Packages.setuptools
-            python3Packages.wheel
+            rustc
+            cargo
+            rustfmt
+            clippy
           ];
-          
           shellHook = ''
             echo "Dragon Runner development environment"
-            export PYTHONPATH="$PWD:$PYTHONPATH"
           '';
         };
 
-        packages.default = pkgs.python3Packages.buildPythonPackage {
+        packages.default = pkgs.rustPlatform.buildRustPackage {
           pname = "dragon-runner";
-          version = "1.0.0";
+          version = "0.1.0";
           src = ./.;
-          
-          propagatedBuildInputs = python-packages pkgs.python3Packages;
-          
+          cargoLock.lockFile = ./Cargo.lock;
+          doCheck = false;
           meta = with pkgs.lib; {
-            description = "An experimental successor to the 415 tester";
-            license = licenses.unfree;
+            description = "The 415 compiler unit tester";
+            license = licenses.mit;
             maintainers = [ ];
           };
         };
       });
 }
-

From d71f56e266ddddb0057b9917a4b70732e057c5eb Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Mon, 2 Mar 2026 11:32:07 -0700
Subject: [PATCH 43/45] refactor: accumulate parsing errors directly, without
 storing in Config struct

---
 src/config.rs | 45 ++++++++++++++++++++++++---------------------
 src/main.rs   | 31 +++++++++++--------------------
 src/runner.rs | 10 +++++-----
 3 files changed, 40 insertions(+), 46 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index 3afdcdb..757e049 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -230,7 +230,6 @@ pub struct Config {
     pub toolchains: Vec<ToolChain>,
     pub packages: Vec<Package>,
     pub package_filter: String,
-    pub errors: Vec<DragonError>,
 }
 
 impl Config {
@@ -272,7 +271,7 @@ impl Config {
 
         let packages = Self::gather_packages(&test_dir, debug_package);
 
-        let mut cfg = Self {
+        Self {
             name,
             config_path: abs_config,
             test_dir,
@@ -281,10 +280,7 @@ impl Config {
             toolchains,
             packages,
             package_filter: package_filter.into(),
-            errors: Vec::new(),
-        };
-        cfg.errors = cfg.collect_errors();
-        cfg
+        }
     }
 
     fn gather_packages(test_dir: &Path, debug_package: Option<&str>) -> Vec<Package> {
@@ -308,7 +304,8 @@ impl Config {
             });
         }
         errors.extend(
-            self.executables.iter().flat_map(|e| e.validate())
+            self.executables.iter()
+                .flat_map(|e| e.validate())
                 .chain(self.toolchains.iter().flat_map(|t| t.validate()))
                 .chain(self.packages.iter().flat_map(|p| p.validate()))
         );
@@ -336,20 +333,26 @@ impl fmt::Display for Config {
 }
 
 /// Load and parse a JSON configuration file.
-pub fn load_config(config_path: &Path, args: Option<&RunnerArgs>) -> Result<Config, DragonError> {
+pub fn load_config(config_path: &Path, args: Option<&RunnerArgs>) -> Result<Config, Vec<DragonError>> {
     let path = config_path.to_path_buf();
 
     let content = fs::read_to_string(config_path)
-        .map_err(|_| DragonError::ConfigRead { path: path.clone() })?;
+        .map_err(|_| vec![DragonError::ConfigRead { path: path.clone() }])?;
 
     let raw: RawConfig = serde_json::from_str(&content)
-        .map_err(|e| DragonError::ConfigParse { path: path.clone(), reason: e.to_string() })?;
+        .map_err(|e| vec![DragonError::ConfigParse { path: path.clone(), reason: e.to_string() }])?;
 
     let debug_package = args
         .and_then(|a| a.debug_package.as_deref());
     let package_filter = args.and_then(|a| a.package_filter.as_deref()).unwrap_or("");
 
-    Ok(Config::new(config_path, raw, debug_package, package_filter))
+    let config = Config::new(config_path, raw, debug_package, package_filter);
+    let errors = config.collect_errors();
+    if errors.is_empty() {
+        Ok(config)
+    } else {
+        Err(errors)
+    }
 }
 
 #[cfg(test)]
@@ -382,8 +385,6 @@ mod tests {
                 assert!(!spkg.tests.is_empty(), "subpackage {} should have tests", spkg.name);
             }
         }
-
-        assert!(config.errors.is_empty(), "should have no errors");
     }
 
     #[test]
@@ -424,22 +425,24 @@ mod tests {
     #[test]
     fn test_invalid_dir_config() {
         let path = config_path("invalidDirConfig.json");
-        let config = load_config(&path, None).expect("config should load");
+        let errors = load_config(&path, None).unwrap_err();
 
-        assert!(!config.errors.is_empty(), "should have errors for invalid dir");
-        assert!(!config.test_dir.exists(), "test_dir should not exist");
+        assert!(!errors.is_empty(), "should have errors for invalid dir");
+        assert!(
+            errors.iter().any(|e| matches!(e, DragonError::MissingTestDir { .. })),
+            "should have a MissingTestDir error"
+        );
     }
 
     #[test]
     fn test_invalid_exe_config() {
         let path = config_path("invalidExeConfig.json");
-        let config = load_config(&path, None).expect("config should load");
+        let errors = load_config(&path, None).unwrap_err();
 
-        assert!(!config.errors.is_empty(), "should have errors for invalid exe");
-        assert_eq!(config.executables.len(), 1);
+        assert!(!errors.is_empty(), "should have errors for invalid exe");
         assert!(
-            !config.executables[0].exe_path.exists(),
-            "exe_path should not exist"
+            errors.iter().any(|e| matches!(e, DragonError::MissingFile { .. })),
+            "should have a MissingFile error"
         );
     }
 }
diff --git a/src/main.rs b/src/main.rs
index cf1d600..a052f94 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -15,18 +15,14 @@ fn main() {
         CliAction::Serve { config_file, bind, timeout, max_concurrent } => {
             let config = match load_config(&config_file, None) {
                 Ok(c) => c,
-                Err(e) => {
-                    info!(0, "{}", format!("{e}").red());
+                Err(errors) => {
+                    info!(0, "Found Config {} error(s):", errors.len());
+                    for e in &errors {
+                        info!(0, "{}", format!("{e}").red());
+                    }
                     std::process::exit(1);
                 }
             };
-            if !config.errors.is_empty() {
-                info!(0, "Found Config {} error(s):", config.errors.len());
-                for e in &config.errors {
-                    info!(0, "{}", format!("{e}").red());
-                }
-                std::process::exit(1);
-            }
             let rt = tokio::runtime::Runtime::new().expect("failed to create tokio runtime");
             rt.block_on(server::run_server(config, &bind, timeout, max_concurrent));
             return;
@@ -37,21 +33,16 @@ fn main() {
     debug!(0, "{:?}", cli_args);
     let config = match load_config(&cli_args.config_file, Some(&cli_args)) {
         Ok(c) => c,
-        Err(e) => {
-            info!(0, "{}", format!("{e}").red());
+        Err(errors) => {
+            info!(0, "Found Config {} error(s):", errors.len());
+            info!(0, "Parsed {} below:", cli_args.config_file.display());
+            for e in &errors {
+                info!(0, "{}", format!("{e}").red());
+            }
             std::process::exit(1);
         }
     };
 
-    if !config.errors.is_empty() {
-        info!(0, "Found Config {} error(s):", config.errors.len());
-        info!(0, "Parsed {} below:", cli_args.config_file.display());
-        for e in &config.errors {
-            info!(0, "{}", format!("{e}").red());
-        }
-        std::process::exit(1);
-    }
-
     config.log_test_info();
 
     let success = match cli_args.mode {
diff --git a/src/runner.rs b/src/runner.rs
index 18d03c6..d66c72b 100644
--- a/src/runner.rs
+++ b/src/runner.rs
@@ -602,14 +602,14 @@ mod tests {
     #[test]
     fn test_gcc_pass() {
         let config = create_config("gccPassConfig.json");
-        assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
+
         run_tests_for_config(&config, true);
     }
 
     #[test]
     fn test_gcc_fail() {
         let config = create_config("gccFailConfig.json");
-        assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
+
         run_tests_for_config(&config, false);
     }
 
@@ -630,7 +630,7 @@ mod tests {
             return;
         }
         let config = create_config("gccPassConfig.json");
-        assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
+
         let mut ran_any = false;
         for exe in &config.executables {
             for tc in &config.toolchains {
@@ -666,7 +666,7 @@ mod tests {
             return;
         }
         let config = create_config("gccMemcheckConfig.json");
-        assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
+
         for exe in &config.executables {
             for tc in &config.toolchains {
                 let runner = ToolChainRunner::new(tc, 10.0)
@@ -726,7 +726,7 @@ mod tests {
 
         let path = config_path(config_name);
         let config = load_config(&path, None).expect("config should load");
-        assert!(config.errors.is_empty(), "config errors: {:?}", config.errors);
+
         run_tests_for_config(&config, true);
     }
 }

From 60025f07f7892d3bb61da0ac8a277529c1c1db63 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Mon, 2 Mar 2026 11:45:07 -0700
Subject: [PATCH 44/45] fix: error prone coupling between solutionExe and
 config

Previously the `solutionExecutable` field was an implicitly required
to use a config in tournament mode. Failing to provide it caused silent
errors. Now it is a CLI argument, reducing coupling on the config.
---
 src/cli.rs                               |  4 ++++
 src/config.rs                            |  4 ----
 src/harness.rs                           | 16 ++++++++++++++--
 tests/configs/ConfigGrade.json           |  1 -
 tests/configs/VCalcCompileConfig.json    |  1 -
 tests/configs/ValgrindGazpreaConfig.json |  1 -
 6 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/cli.rs b/src/cli.rs
index d386d65..b8fcf19 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -37,6 +37,10 @@ pub struct RunnerArgs {
     #[arg(long = "fail-log")]
     pub failure_log: Option<PathBuf>,
 
+    /// Executable ID to use as the solution (tournament mode)
+    #[arg(long = "solution-exe")]
+    pub solution_exe: Option<String>,
+
     /// Timeout in seconds for each step
     #[arg(long, default_value_t = 2.0)]
     pub timeout: f64,
diff --git a/src/config.rs b/src/config.rs
index 757e049..827cdf9 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -24,8 +24,6 @@ struct RawConfig {
     #[serde(default)]
     runtimes: HashMap<String, String>,
     #[serde(default)]
-    solution_executable: Option<String>,
-    #[serde(default)]
     toolchains: HashMap<String, Vec<Step>>,
 }
 
@@ -226,7 +224,6 @@ pub struct Config {
     pub config_path: PathBuf,
     pub test_dir: PathBuf,
     pub executables: Vec<Executable>,
-    pub solution_exe: Option<String>,
     pub toolchains: Vec<ToolChain>,
     pub packages: Vec<Package>,
     pub package_filter: String,
@@ -276,7 +273,6 @@ impl Config {
             config_path: abs_config,
             test_dir,
             executables,
-            solution_exe: raw.solution_executable,
             toolchains,
             packages,
             package_filter: package_filter.into(),
diff --git a/src/harness.rs b/src/harness.rs
index 3a8aabe..ee9a9c2 100644
--- a/src/harness.rs
+++ b/src/harness.rs
@@ -227,7 +227,18 @@ impl TournamentHarness {
         let mut defending_exes: Vec<&Executable> = config.executables.iter().collect();
         defending_exes.sort_by(|a, b| a.id.to_lowercase().cmp(&b.id.to_lowercase()));
 
-        let solution_exe = config.solution_exe.as_deref();
+        let Some(solution_exe) = cli_args.solution_exe.as_deref() else {
+            eprintln!("Error: --solution-exe is required in tournament mode");
+            self.passed = false;
+            return;
+        };
+
+        if !config.executables.iter().any(|e| e.id == solution_exe) {
+            eprintln!("Error: --solution-exe '{}' does not match any executable in the config.\nAvailable: {:?}",
+                solution_exe, config.executables.iter().map(|e| &e.id).collect::<Vec<_>>());
+            self.passed = false;
+            return;
+        }
         let failure_log = cli_args.failure_log.as_deref();
 
         for tc in &config.toolchains {
@@ -258,7 +269,7 @@ impl TournamentHarness {
                             print!("{}", ".".yellow());
                             continue;
                         }
-                        let is_solution = solution_exe == Some(&def_exe.id);
+                        let is_solution = solution_exe == def_exe.id;
 
                         if result.did_pass {
                             print!("{}", ".".green());
@@ -443,6 +454,7 @@ mod tests {
         let args = RunnerArgs {
             mode: Mode::Tournament,
             failure_log: Some(failure_log.clone()),
+            solution_exe: Some("TA".into()),
             timeout: 2.0,
             ..Default::default()
         };
diff --git a/tests/configs/ConfigGrade.json b/tests/configs/ConfigGrade.json
index c24148c..c2ce41a 100644
--- a/tests/configs/ConfigGrade.json
+++ b/tests/configs/ConfigGrade.json
@@ -6,7 +6,6 @@
     "team3": "/usr/bin/clang",
     "TA": "/usr/bin/clang"
   },
-  "solutionExecutable": "TA",
   "toolchains": {
     "LLVM": [
       {
diff --git a/tests/configs/VCalcCompileConfig.json b/tests/configs/VCalcCompileConfig.json
index 5cddfb4..0cfe60e 100644
--- a/tests/configs/VCalcCompileConfig.json
+++ b/tests/configs/VCalcCompileConfig.json
@@ -6,7 +6,6 @@
   "runtimes": {
     "solution": "/home/justin/CDOL/Solutions/VCalc24Solution/bin/libvcalcrt.so"
   },
-  "solutionExecutable": "solution",
   "toolchains": {
     "vcalc-llc": [
       {
diff --git a/tests/configs/ValgrindGazpreaConfig.json b/tests/configs/ValgrindGazpreaConfig.json
index e601deb..9fcdc68 100644
--- a/tests/configs/ValgrindGazpreaConfig.json
+++ b/tests/configs/ValgrindGazpreaConfig.json
@@ -6,7 +6,6 @@
   "runtimes": {
     "<team_name>": "<path_to_libgazrt.so>"
   },
-  "solutionExecutable": "solution",
   "toolchains": {
     "gazprea-llc": [
       {

From 11a33e5e908add1658779b6c9ef33af51d974f12 Mon Sep 17 00:00:00 2001
From: Justin <meimar@ualberta.ca>
Date: Tue, 3 Mar 2026 11:41:36 -0700
Subject: [PATCH 45/45] update: restore previous formatting for -vv, -s, and -t
 flags

---
 Cargo.lock           |  11 +++++
 Cargo.toml           |   1 +
 scripts/add_empty.py |   3 +-
 scripts/checkout.py  |   8 ++++
 src/harness.rs       | 108 +++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 126 insertions(+), 5 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 8f54ad3..a799330 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -266,6 +266,7 @@ dependencies = [
  "serde",
  "serde_json",
  "tempfile",
+ "terminal_size",
  "thiserror",
  "tokio",
  "tower-http",
@@ -877,6 +878,16 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "terminal_size"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0"
+dependencies = [
+ "rustix",
+ "windows-sys 0.60.2",
+]
+
 [[package]]
 name = "thiserror"
 version = "2.0.18"
diff --git a/Cargo.toml b/Cargo.toml
index 6ed693c..e6fce96 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,3 +23,4 @@ tokio = { version = "1", features = ["full"] }
 tower-http = { version = "0.6", features = ["cors"] }
 base64 = "0.22"
 rayon = "1"
+terminal_size = "0.4.3"
diff --git a/scripts/add_empty.py b/scripts/add_empty.py
index 7fa3419..bd7f24b 100644
--- a/scripts/add_empty.py
+++ b/scripts/add_empty.py
@@ -2,7 +2,8 @@
 ============================== 415 Grading Script ==============================
 Author: Justin Meimar
 Name: add_empty.py
-Desc:
+Desc: Suplement competitive test suite submissions with empty tests to adhere to
+the submission minimum (usually 5 tests).
 ================================================================================
 """
 import sys
diff --git a/scripts/checkout.py b/scripts/checkout.py
index ca3abd4..44f6f10 100644
--- a/scripts/checkout.py
+++ b/scripts/checkout.py
@@ -1,3 +1,11 @@
+"""
+============================== 415 Grading Script ==============================
+Author: Justin Meimar
+Name: checkout.py
+Desc: Once all the repositories are pulled from gh-classroom, this script will
+checkout each to the latest commit before the deadline.
+================================================================================
+"""
 import sys
 import subprocess
 import argparse
diff --git a/src/harness.rs b/src/harness.rs
index ee9a9c2..20562c6 100644
--- a/src/harness.rs
+++ b/src/harness.rs
@@ -7,6 +7,7 @@ use rayon::prelude::*;
 use crate::info;
 use crate::cli::{Mode, RunnerArgs};
 use crate::config::{Config, Executable, Package};
+use crate::log::log;
 use crate::runner::{TestResult, ToolChainRunner};
 use crate::testfile::TestFile;
 
@@ -28,6 +29,100 @@ fn test_display_name(test: &TestFile, full_path: bool) -> String {
     }
 }
 
+/// Format a timing suffix for the PASS/FAIL line.
+/// Matches Python: right-aligned in a 10-char field followed by ` (s)`.
+fn time_suffix(result: &TestResult, show_time: bool) -> String {
+    if show_time {
+        if let Some(t) = result.time {
+            return format!("{:>10.4} (s)", t);
+        }
+    }
+    String::new()
+}
+
+/// Truncate bytes with middle omission if they exceed `max_bytes`.
+/// Matches Python's `truncated_bytes()`.
+fn truncated_bytes(data: &[u8], max_bytes: usize) -> Vec<u8> {
+    if data.len() <= max_bytes {
+        return data.to_vec();
+    }
+    let omission = b"\n{{ omitted for brevity }}\n";
+    let available = max_bytes.saturating_sub(omission.len());
+    let half = available / 2;
+    let mut out = Vec::with_capacity(max_bytes);
+    out.extend_from_slice(&data[..half]);
+    out.extend_from_slice(omission);
+    out.extend_from_slice(&data[data.len() - half..]);
+    out
+}
+
+/// Generate a pretty-printed box around file contents.
+/// Matches Python's `TestFile.pretty_print()`.
+fn pretty_print_file(path: &std::path::Path) -> Option<String> {
+    let content = fs::read_to_string(path).ok()?;
+    let term_width = terminal_size::terminal_size()
+        .map(|(w, _)| w.0 as usize)
+        .unwrap_or(80);
+    let content_width = std::cmp::min(term_width.saturating_sub(10), 100);
+    if content_width < 6 {
+        return Some(content);
+    }
+
+    let mut lines = Vec::new();
+    // top border
+    lines.push(format!("\u{250c}{}\u{2510}", "\u{2500}".repeat(content_width - 2)));
+    for line in content.lines() {
+        let display = if line.len() > content_width - 4 {
+            format!("{}...", &line[..content_width - 7])
+        } else {
+            line.to_string()
+        };
+        lines.push(format!("\u{2502} {:<width$} \u{2502}", display, width = content_width - 4));
+    }
+    // bottom border
+    lines.push(format!("\u{2514}{}\u{2518}", "\u{2500}".repeat(content_width - 2)));
+    Some(lines.join("\n"))
+}
+
+/// Print additional test details below the PASS/FAIL line based on CLI flags.
+/// Called by both RegularHarness and MemoryCheckHarness.
+/// Matches the Python `TestResult.log()` output order and verbosity levels.
+fn print_test_details(result: &TestResult, cli_args: &RunnerArgs, indent: usize) {
+    // -s: show testcase (level 0 on fail, level 2 on pass)
+    if cli_args.show_testcase {
+        let level: u32 = if result.did_pass { 2 } else { 0 };
+        if let Some(boxed) = pretty_print_file(&result.test.path) {
+            for line in boxed.lines() {
+                log(level, indent + 2, line);
+            }
+        }
+    }
+
+    // Command history: level 3 on pass, level 2 on fail
+    let cmd_level: u32 = if result.did_pass { 3 } else { 2 };
+    log(cmd_level, indent + 2, &format!("==> Command History"));
+    for cr in &result.command_history {
+        log(cmd_level, indent + 4, &format!("==> {} (exit {})", cr.cmd, cr.exit_status));
+        let stdout = truncated_bytes(&cr.stdout, 512);
+        log(cmd_level, indent + 6, &format!(
+            "stdout ({} bytes): {}", cr.stdout.len(), String::from_utf8_lossy(&stdout),
+        ));
+        let stderr = truncated_bytes(&cr.stderr, 512);
+        log(cmd_level, indent + 6, &format!(
+            "stderr ({} bytes): {}", cr.stderr.len(), String::from_utf8_lossy(&stderr),
+        ));
+    }
+
+    // Expected vs Generated output: level 2 on pass, level 1 on fail
+    let diff_level: u32 = if result.did_pass { 2 } else { 1 };
+    let expected_out = result.test.get_expected_out();
+    let generated_out = result.gen_output.as_deref().unwrap_or(b"");
+    log(diff_level, indent + 2, &format!("==> Expected Out ({} bytes):", expected_out.len()));
+    log(diff_level, indent + 3, &format!("{:?}", expected_out));
+    log(diff_level, indent + 2, &format!("==> Generated Out ({} bytes):", generated_out.len()));
+    log(diff_level, indent + 3, &format!("{:?}", generated_out));
+}
+
 /// Counters passed through hooks during iteration.
 pub struct SubPackageCounters {
     pub pass_count: usize,
@@ -167,16 +262,18 @@ impl SequentialTestHarness for RegularHarness {
             counters.skip_count += 1;
             return;
         }
+        let time = time_suffix(&result, cli_args.time);
         if result.did_pass {
             let tag = if result.error_test { "[E-PASS] " } else { "[PASS] " };
-            info!(indent, "{}{}", tag.green(), test_name);
+            info!(indent, "{}{}{}", tag.green(), test_name, time);
             counters.pass_count += 1;
         } else {
             let tag = if result.error_test { "[E-FAIL] " } else { "[FAIL] " };
-            info!(indent, "{}{}", tag.red(), test_name);
+            info!(indent, "{}{}{}", tag.red(), test_name, time);
             self.passed = false;
         }
         counters.test_count += 1;
+        print_test_details(&result, cli_args, indent);
     }
 }
 
@@ -332,13 +429,16 @@ impl SequentialTestHarness for MemoryCheckHarness {
         self.test_count += 1;
         counters.test_count += 1;
 
+        let time = time_suffix(&result, cli_args.time);
         if result.did_pass {
-            info!(indent, "{}{}", "[PASS] ".green(), test_name);
+            info!(indent, "{}{}{}", "[PASS] ".green(), test_name, time);
             counters.pass_count += 1;
         } else {
-            info!(indent, "{}{}", "[FAIL] ".red(), test_name);
+            info!(indent, "{}{}{}", "[FAIL] ".red(), test_name, time);
         }
 
+        print_test_details(&result, cli_args, indent);
+
         if result.memory_leak {
             self.leak_tests.push(result);
         }