diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2a82632..770c5cf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,25 +8,28 @@ jobs:
         os: [ubuntu-latest, macos-latest]
     runs-on: ${{ matrix.os }}
     steps:
-      - uses: actions/checkout@v3 
-      
+      - uses: actions/checkout@v4
+
       - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
         with:
           python-version: '3.10'
- 
-      - name: Install dependencies
-        run: |
-          pip install .
 
-      - name: run config tests
-        run: pytest tests/test_config.py
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Install valgrind (Linux only)
+        if: runner.os == 'Linux'
+        run: sudo apt-get update && sudo apt-get install -y valgrind
 
-      - name: run runner tests
-        run: pytest tests/test_runner.py
+      - name: Cache cargo
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            ~/.cargo/git
+            target
+          key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }}
 
-      - name: run grade tests
-        run: pytest tests/test_grader.py
-      
-      - name: run runtime tests
-        run: pytest tests/test_runtime.py
+      - name: Run tests
+        run: ./tests/run_tests.sh
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..a799330
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,1340 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
+[[package]]
+name = "axum"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
+dependencies = [
+ "axum-core",
+ "bytes",
+ "form_urlencoded",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "itoa",
+ "matchit",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "serde_core",
+ "serde_json",
+ "serde_path_to_error",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
+[[package]]
+name = "bitflags"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
+
+[[package]]
+name = "bytes"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "clap"
+version = "4.5.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "colored"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c"
+dependencies = [
+ "lazy_static",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "csv"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde_core",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "dragon-runner-rs"
+version = "0.1.0"
+dependencies = [
+ "axum",
+ "base64",
+ "clap",
+ "colored",
+ "csv",
+ "glob",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_json",
+ "tempfile",
+ "terminal_size",
+ "thiserror",
+ "tokio",
+ "tower-http",
+ "wait-timeout",
+]
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
+dependencies = [
+ "futures-core",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
+
+[[package]]
+name = "futures-task"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
+
+[[package]]
+name = "futures-util"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "pin-project-lite",
+ "slab",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+ "wasip3",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "foldhash",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "http"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
+dependencies = [
+ "bytes",
+ "itoa",
+]
+
+[[package]]
+name = "http-body"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
+dependencies = [
+ "bytes",
+ "http",
+]
+
+[[package]]
+name = "http-body-util"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "httparse"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
+[[package]]
+name = "hyper"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "http",
+ "http-body",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "pin-utils",
+ "smallvec",
+ "tokio",
+]
+
+[[package]]
+name = "hyper-util"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
+dependencies = [
+ "bytes",
+ "http",
+ "http-body",
+ "hyper",
+ "pin-project-lite",
+ "tokio",
+ "tower-service",
+]
+
+[[package]]
+name = "id-arena"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
+
+[[package]]
+name = "indexmap"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.16.1",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "itoa"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "leb128fmt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
+
+[[package]]
+name = "libc"
+version = "0.2.182"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "matchit"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
+
+[[package]]
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "mime"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+
+[[package]]
+name = "mio"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "rayon"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
+
+[[package]]
+name = "rustix"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "semver"
+version = "1.0.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "serde_path_to_error"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
+dependencies = [
+ "itoa",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
+dependencies = [
+ "errno",
+ "libc",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "sync_wrapper"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
+
+[[package]]
+name = "tempfile"
+version = "3.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
+dependencies = [
+ "fastrand",
+ "getrandom",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "terminal_size"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0"
+dependencies = [
+ "rustix",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tokio"
+version = "1.49.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tower"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tokio",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
+dependencies = [
+ "bitflags",
+ "bytes",
+ "http",
+ "pin-project-lite",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
+
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "log",
+ "pin-project-lite",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "wait-timeout"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.2+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasip3"
+version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasm-encoder"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
+dependencies = [
+ "leb128fmt",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasm-metadata"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
+dependencies = [
+ "anyhow",
+ "indexmap",
+ "wasm-encoder",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasmparser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
+dependencies = [
+ "bitflags",
+ "hashbrown 0.15.5",
+ "indexmap",
+ "semver",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
+[[package]]
+name = "wit-bindgen"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+dependencies = [
+ "wit-bindgen-rust-macro",
+]
+
+[[package]]
+name = "wit-bindgen-core"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
+dependencies = [
+ "anyhow",
+ "heck",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-bindgen-rust"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
+dependencies = [
+ "anyhow",
+ "heck",
+ "indexmap",
+ "prettyplease",
+ "syn",
+ "wasm-metadata",
+ "wit-bindgen-core",
+ "wit-component",
+]
+
+[[package]]
+name = "wit-bindgen-rust-macro"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
+dependencies = [
+ "anyhow",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wit-bindgen-core",
+ "wit-bindgen-rust",
+]
+
+[[package]]
+name = "wit-component"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
+dependencies = [
+ "anyhow",
+ "bitflags",
+ "indexmap",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "wasm-encoder",
+ "wasm-metadata",
+ "wasmparser",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-parser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
+dependencies = [
+ "anyhow",
+ "id-arena",
+ "indexmap",
+ "log",
+ "semver",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "unicode-xid",
+ "wasmparser",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..e6fce96
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "dragon-runner-rs"
+version = "0.1.0"
+edition = "2021"
+
+[[bin]]
+name = "dragon-runner"
+path = "src/main.rs"
+
+[dependencies]
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+clap = { version = "4", features = ["derive"] }
+thiserror = "2"
+glob = "0.3"
+tempfile = "3"
+colored = "2"
+csv = "1"
+regex = "1"
+wait-timeout = "0.2"
+axum = "0.8"
+tokio = { version = "1", features = ["full"] }
+tower-http = { version = "0.6", features = ["cors"] }
+base64 = "0.22"
+rayon = "1"
+terminal_size = "0.4.3"
diff --git a/README.md b/README.md
index d3718ac..2315f14 100644
--- a/README.md
+++ b/README.md
@@ -4,19 +4,18 @@ A test runner for CMPUT 415 Compiler Design that services both student testing a
 
 ## Installation
 
-**Requirements:** Python ≥ 3.8
+**Requirements:** Rust toolchain (cargo)
 
 ```bash
 git clone https://github.com/cmput415/Dragon-Runner.git
 cd Dragon-Runner
-pip install .
+cargo install --path .
 ```
-Some newer versions of python prevent system-wide package installations by default. To get around this use a virtual environment or `--break-system-packages`. If `dragon-runner`is not found in your `$PATH` after install, ensure `~/.local/bin` is added.
 
 ## Quick Start
 
 ```bash
-# Run tests normally
+# Run tests normally (mode defaults to regular)
 dragon-runner config.json
 
 # Run in tournament mode (for grading)
@@ -26,7 +25,7 @@ dragon-runner tournament config.json
 dragon-runner memcheck valgrindConfig.json
 
 # Start HTTP server for explorer
-dragon-runner serve /path/to/configs
+dragon-runner serve /path/to/config.json
 ```
 
 ## Configuration
@@ -37,23 +36,19 @@ Dragon-Runner uses JSON configuration files to define test packages, executables
 
 ```json
 {
-  "testDir": "../packages/CPackage", 
+  "testDir": "../packages/CPackage",
   "testedExecutablePaths": {
     "gcc": "/usr/bin/gcc"
   },
   "toolchains": {
     "compile-and-run": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE", 
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": ["$INPUT", "-o", "$OUTPUT"],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
@@ -76,20 +71,18 @@ Dragon-Runner uses JSON configuration files to define test packages, executables
 #### Toolchain Steps
 | Property | Description | Required |
 |----------|-------------|----------|
-| `stepName` | Human-readable step name | ✓ |
-| `executablePath` | Path to executable (use `$EXE`, `$INPUT`) | ✓ |
-| `arguments` | Command arguments list | ✓ |
-| `output` | Output file path (optional) | |
-| `allowError` | Allow non-zero exit codes (optional) | |
-| `usesInStr` | Use test input stream as stdin (optional) | |
-| `usesRuntime` | Load runtime library (optional) | |
+| `exe` | Path to executable (supports `$EXE`, `$INPUT`, `$OUTPUT`) | ✓ |
+| `args` | Command arguments list (default: `[]`) | |
+| `allowError` | Allow non-zero exit codes (default: `false`) | |
+| `usesInStr` | Use test input stream as stdin (default: `false`) | |
+| `usesRuntime` | Load runtime library (default: `false`) | |
 
 #### Magic Variables
-- `$EXE` - Path to the tested executable
-- `$INPUT` - Input file (testfile for first step, previous output for others)
-- `$OUTPUT` - Output file for next step
-- `$RT_PATH` - Runtime library directory
-- `$RT_LIB` - Runtime library name
+- `$EXE` — Path to the tested executable
+- `$INPUT` — Input file (test file for first step, previous output for later steps)
+- `$OUTPUT` — Temporary output file for the next step
+
+Environment variables (`$RT_PATH`, `$RT_LIB`, etc.) are also expanded in step arguments.
 
 ## Test File Format
 
@@ -106,12 +99,13 @@ int main() {
 ```
 
 ### Directives
-- `INPUT:` - Single line of stdin (no newline)
-- `INPUT_FILE:` - Path to input file
-- `CHECK:` - Expected stdout (no newline)  
-- `CHECK_FILE:` - Path to expected output file
+- `CHECK:` — Expected stdout line (no trailing newline)
+- `CHECK_FILE:` — Path to expected output file
+- `INPUT:` — Single line of stdin (no trailing newline)
+- `INPUT_FILE:` — Path to input file
+- `SKIP` — Skip this test
 
-Multiple `INPUT:` and `CHECK:` directives are supported. `INPUT:` and `INPUT_FILE:` cannot be used together.
+Multiple `CHECK:` and `INPUT:` directives are concatenated with newlines. Inline and file variants of the same directive cannot be mixed in one test.
 
 ## Command Line Reference
 
@@ -120,13 +114,15 @@ Multiple `INPUT:` and `CHECK:` directives are supported. `INPUT:` and `INPUT_FIL
 dragon-runner [mode] config.json [options...]
 ```
 
+If no mode subcommand is given, `regular` is assumed.
+
 ### Modes
-- `regular` (default) - Standard test execution
-- `tournament` - Cross-product testing for grading
-- `perf` - Performance benchmarking
-- `memcheck` - Memory leak detection
-- `serve` - HTTP server mode
-- `script` - Run grading scripts
+- `regular` (default) — Standard test execution
+- `tournament` — Cross-product testing for grading
+- `perf` — Performance benchmarking
+- `memcheck` — Memory leak detection via valgrind
+- `serve` — HTTP server mode
+- `script` — Run grading scripts
 
 ### Options
 | Option | Description |
@@ -135,10 +131,13 @@ dragon-runner [mode] config.json [options...]
 | `--fail-log FILE` | Log failures to file |
 | `--verify` | Verify package exists for CCID |
 | `--debug-package PATH` | Test single package |
+| `-p, --package PATTERN` | Filter packages by glob pattern |
 | `-t, --time` | Show execution times |
 | `-v, --verbosity` | Increase output verbosity (repeat for more) |
-| `-s, --show-testcase` | Display test file contents |
+| `-s, --show-testcase` | Display test file contents on failure |
 | `-o, --output FILE` | Output file for results |
+| `-f, --fast-fail` | Stop on first failure |
+| `--full-path` | Print full file paths for test results |
 
 ### Examples
 
@@ -152,8 +151,8 @@ dragon-runner tournament -vv config.json
 # Performance testing with 5-second timeout
 dragon-runner perf --timeout 5.0 config.json
 
-# Serve configs on port 8080
-dragon-runner serve --port 8080 /path/to/configs
+# Serve config on custom address
+dragon-runner serve --bind 0.0.0.0:8080 config.json
 
 # Run grading script
 dragon-runner script build.py /path/to/submissions build.log 4
diff --git a/dragon_runner/scripts/__init__.py b/dragon_runner/scripts/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/dragon_runner/scripts/key.py b/dragon_runner/scripts/key.py
deleted file mode 100644
index 2ad5bbd..0000000
--- a/dragon_runner/scripts/key.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from typing import Tuple
-from pathlib import Path
-from typing import Iterator
-
-class Key: 
-    def __init__(self, key_path: Path):
-        self.key_path = key_path
-        self.sid_repo_suffix_map = {}
-
-        with open(key_path) as key_file: 
-            for line in key_file.readlines():
-                sids, repo_suffix = line.strip().split(' ')
-                sid_list = sids.strip().split(',') 
-                for sid in sid_list:
-                    self.sid_repo_suffix_map[sid] = repo_suffix
-    
-    def __str__(self):
-        s = ""
-        for k, v in self.sid_repo_suffix_map.items():
-            s += (f"{k}\t{v}")
-        return s
-    
-    def get_repo_for_sid(self, sid):
-        return self.sid_repo_suffix_map[sid]
-    
-    def iter_sids(self) -> Iterator[str]:
-        return iter(self.sid_repo_suffix_map.keys())
-
-    def iter_repos(self) -> Iterator[str]:
-        return iter(set(self.sid_repo_suffix_map.values()))
-
-    def iter_both(self) -> Iterator[Tuple[str, str]]:
-        return iter(self.sid_repo_suffix_map.items())
-
diff --git a/dragon_runner/src/__init__.py b/dragon_runner/src/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/dragon_runner/src/cli.py b/dragon_runner/src/cli.py
deleted file mode 100644
index f2ef95e..0000000
--- a/dragon_runner/src/cli.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from pathlib import Path
-from typing import Any, NamedTuple, List
-from dragon_runner.scripts.loader import Loader 
-from enum import Enum
-import argparse
-from enum import Enum
-from typing import List, NamedTuple, Protocol, runtime_checkable
-from pathlib import Path
-import argparse
-import sys
-import os
-
-class Mode(Enum):
-    REGULAR = "regular"
-    TOURNAMENT = "tournament"
-    PERF = "perf"
-    MEMCHECK = "memcheck"
-    SERVE = "serve"
-    SCRIPT = "script"
-
-@runtime_checkable
-class CLIArgs(Protocol):
-    mode: Mode
-
-class RunnerArgs(NamedTuple):
-    mode: Mode
-    config_file: str = ""
-    output: str = ""
-    failure_log: str = ""
-    debug_package: str = ""
-    package_filter: str = ""
-    timeout: float = 2.0
-    time: bool = False
-    verbosity: int = 0
-    verify: bool = False
-    show_testcase: bool = False
-    fast_fail: bool = False
-
-class ScriptArgs(NamedTuple):
-    mode: Mode
-    args: List[str] = []
-
-class ServerArgs(NamedTuple):
-    mode: Mode
-    port: int = 5000
-    serve_path: Path = Path(".")
-
-def parse_runner_args(argv_skip: int=1) -> RunnerArgs:
-    parser = argparse.ArgumentParser(description="CMPUT 415 testing utility")
-    
-    parser.add_argument("config_file", help="Path to the JSON configuration file")
-    parser.add_argument("--fail-log", dest="failure_log", default="")
-    parser.add_argument("--timeout", type=float, default=2.0)
-    parser.add_argument("--verify", action="store_true")
-    parser.add_argument("--debug-package", default="")
-    parser.add_argument("-p", "--package", dest="package_filter", default="", help="Filter packages by glob pattern (case insensitive)")
-    parser.add_argument("-t", "--time", action="store_true")
-    parser.add_argument("-v", "--verbosity", action="count", default=0)
-    parser.add_argument("-s", "--show-testcase", action="store_true")
-    parser.add_argument("-o", "--output", default="")
-    parser.add_argument("-f", "--fast-fail", dest="fast_fail", action="store_true")
-    
-    # Parse arguments
-    args = parser.parse_args(sys.argv[argv_skip:])
-    
-    # Set debug environment variable 
-    os.environ["DRAGON_RUNNER_DEBUG"] = str(args.verbosity)
-    
-    # Convert to dictionary and add mode
-    args_dict = vars(args)
-    args_dict["mode"] = Mode.REGULAR
-    
-    return RunnerArgs(**args_dict)
-
-
-def parse_server_args() -> ServerArgs:
-    parser = argparse.ArgumentParser(description="Server mode")
-    parser.add_argument("serve_path", type=Path, help="Config directory or file")
-    parser.add_argument("--port", type=int, default=5000)
-    
-    args = parser.parse_args(sys.argv[2:])
-    return ServerArgs(
-        mode=Mode.SERVE,
-        port=args.port,
-        serve_path=args.serve_path
-    )
-
-def parse_cli_args() -> Any:
-    if len(sys.argv) < 2:
-        print("Usage: dragon-runner [mode] config.json [args...]")
-        print("  mode: [regular|tournament|perf|memcheck|serve|script])")
-        print("  args: dragon-runner -h")
-        sys.exit(1)
-        
-    first_arg = sys.argv[1]
-    
-    # Create a mapping to convert string to Mode enum
-    mode_map = {mode.value: mode for mode in Mode}
-    
-    if first_arg in mode_map:
-        if first_arg == Mode.SERVE.value:
-            return parse_server_args()
-        elif first_arg == Mode.SCRIPT.value:
-            return ScriptArgs(mode=Mode.SCRIPT, args=sys.argv[2:])
-        else:
-            # For runner modes
-            args = parse_runner_args(argv_skip=2)
-            return RunnerArgs(**{**args._asdict(), "mode": mode_map[first_arg]})
-    else:
-        # If no mode is supplied, default to regular mode
-        return parse_runner_args(1)
-
diff --git a/dragon_runner/src/config.py b/dragon_runner/src/config.py
deleted file mode 100644
index d7b8a7d..0000000
--- a/dragon_runner/src/config.py
+++ /dev/null
@@ -1,275 +0,0 @@
-import json
-import os
-import sys
-from pathlib                        import Path
-from typing                         import Dict, List, Optional
-from dragon_runner.src.testfile     import TestFile
-from dragon_runner.src.errors       import ConfigError, Verifiable, ErrorCollection
-from dragon_runner.src.toolchain    import ToolChain
-from dragon_runner.src.utils        import resolve_relative
-from dragon_runner.src.log          import log
-from dragon_runner.src.cli          import RunnerArgs
-
-class SubPackage(Verifiable):
-    """
-    Represents a set of tests in a directory.
-    """
-    def __init__(self, path: str): 
-        self.path: str              = path
-        self.name: str              = os.path.basename(path)
-        self.tests: List[TestFile]  = [] 
-        if os.path.isdir(path):
-            self.tests = self.gather_tests()
-        else:
-            self.tests = [TestFile(path)]
-
-    def verify(self) -> ErrorCollection:
-        """
-        Verify the tests in our config have no errors.
-        """
-        ec = ErrorCollection();
-        for test in self.tests:
-            test_errors = test.verify()
-            ec.extend(test_errors) 
-        return ec
-        
-    @staticmethod
-    def is_test(test_path: str):
-        """
-        Ignore reserved output and input stream extensions and hidden files
-        """
-        return (os.path.isfile(test_path) and
-                not os.path.basename(test_path).startswith('.') and
-                not test_path.endswith(('.out', '.ins')))
-
-    def gather_tests(self) -> List[TestFile]:
-        """
-        Find all tests in the directory of the subpackage.
-        """
-        tests = []
-        for file in os.listdir(self.path):
-            test_path = os.path.join(self.path, file)
-            if self.is_test(test_path):
-                tests.append(TestFile(test_path))
-        return sorted(tests, key=lambda x: x.file) 
-
-class Package(Verifiable):
-    """
-    Represents a single test package. Shoud have a corresponding CCID if submitted. 
-    """
-    def __init__(self, path: str):
-        self.path: str      = path
-        self.name: str      = os.path.basename(path)
-        self.n_tests        = 0
-        self.subpackages    = [] 
-        
-        if os.path.isdir(path):
-            self.gather_subpackages()
-        else:
-            self.subpackages.append(SubPackage(path))
-
-    def verify(self) -> ErrorCollection:
-        """
-        Propogate up all errors in subpackages.
-        """ 
-        return ErrorCollection(ec for spkg in self.subpackages if (ec := spkg.verify()))
-
-    def add_subpackage(self, spkg: SubPackage):
-        """
-        Add a subpackage while keeping total test count up to date
-        """
-        self.n_tests += len(spkg.tests)
-        self.subpackages.append(spkg)
-
-    def gather_subpackages(self) -> List[SubPackage]:
-        """
-        Collect any directory within a package and create a subpackage.
-        """
-        subpackages = []
-        top_level_spkg = SubPackage(self.path) 
-        if len(top_level_spkg.tests) > 0:
-            self.add_subpackage(top_level_spkg)
-        for parent_path, dirs, _ in os.walk(self.path):
-            for dirname in dirs:
-                spkg = SubPackage(os.path.join(parent_path, dirname))
-                if len(spkg.tests) > 0:
-                    self.add_subpackage(spkg)
-        return subpackages
-
-class Executable(Verifiable):
-    """
-    Represents a single tested executable along with an optional associated runtime.
-    """
-    def __init__(self, id: str, exe_path: str, runtime: str):
-        self.id         = id
-        self.exe_path   = exe_path 
-        self.runtime    = runtime 
-        self.errors     = self.verify()
-    
-    def verify(self) -> ErrorCollection:
-        """
-        Check if the binary path exists and runtime path exists (if present)
-        """
-        errors = []
-        if not os.path.exists(self.exe_path):
-            errors.append(ConfigError(
-                f"Cannot find binary file: {self.exe_path} "
-                f"in Executable: {self.id}")
-            )
-        if self.runtime and not os.path.exists(self.runtime):
-            errors.append(ConfigError(
-                f"Cannot find runtime file: {self.runtime} "
-                f"in Executable: {self.id}")
-            )
-        return ErrorCollection(errors)
-
-    def source_env(self):
-        """
-        Source all env variables defined in this executables map
-        TODO: Eventually, this should be replaced with a more generic JSON config format that
-        allows env variables to be first class.
-        """
-        if self.runtime:
-            runtime_path = Path(self.runtime)
-            runtime_dir = runtime_path.parent
-            rt_filename = runtime_path.stem
-            
-            if sys.platform == "darwin":
-                preload_env = {
-                    "DYLD_LIBRARY_PATH": str(runtime_dir),
-                    "DYLD_INSERT_LIBRARIES": str(runtime_path)
-                }
-            else:
-                preload_env = {
-                    "LD_LIBRARY_PATH": str(runtime_dir),
-                    "LD_PRELOAD": str(runtime_path)
-                }
-
-            preload_env.update({
-                "RT_PATH": str(runtime_dir),
-                "RT_LIB": rt_filename[3:]
-            })
-
-            for key, value in preload_env.items():
-                os.environ[key] = value 
-    
-    def to_dict(self) -> Dict:
-        return {
-            'id': self.id,
-            'exe_path': self.exe_path
-        }
-
-class Config:
-    """
-    An in memory representation of the JSON configuration file which directs the tester. 
-    """
-    def __init__(self, config_path: str, config_data: Dict, debug_package: Optional[str], package_filter: str = ""):
-        self.name               = Path(config_path).stem
-        self.config_path        = os.path.abspath(config_path)
-        self.config_data        = config_data
-        self.debug_package      = debug_package
-        self.package_filter     = package_filter
-        self.test_dir           = resolve_relative(config_data['testDir'],
-                                                   os.path.abspath(config_path))
-        self.executables        = self.parse_executables(config_data['testedExecutablePaths'],
-                                                   config_data.get('runtimes', ""))
-        self.solution_exe       = config_data.get('solutionExecutable', None)
-        self.toolchains         = self.parse_toolchains(config_data['toolchains'])
-        self.packages           = self.gather_packages()
-        self.error_collection   = self.verify()
-    
-    def parse_executables(self, executables_data: Dict[str, str],
-                                runtimes_data: Dict[str, str]) -> List[Executable]:
-        """
-        Parse each executable and assign a corresponding runtime if supplied
-        """
-        def find_runtime(id) -> str:
-            if not runtimes_data:
-                return ""
-            for rt_id, rt_path in runtimes_data.items():
-                if rt_id == id :
-                    return os.path.abspath(resolve_relative(rt_path, self.config_path))
-            return ""
-        return [Executable(
-                    id,
-                    resolve_relative(path, self.config_path),
-                    find_runtime(id)
-                ) for id, path in executables_data.items()]
-    
-    def parse_toolchains(self, toolchains_data: Dict[str, List[Dict]]) -> List[ToolChain]:
-        """
-        Parse each toolchain from the config file and return a list of them.
-        """
-        return [ToolChain(name, steps) for name, steps in toolchains_data.items()]
-
-    def gather_packages(self) -> List[Package]:
-        """
-        Collect all top-level directories in testdir and create a package
-        """
-        packages = []
-        if self.debug_package:
-            packages.append(Package(self.debug_package))
-            return packages
-
-        for parent_path, dirs, _ in os.walk(self.test_dir):
-            for dirname in dirs:
-                pkg_path = os.path.join(parent_path, dirname)
-                packages.append(Package(pkg_path))
-            break
-        return packages
-
-    def log_test_info(self):
-        """
-        Prints a simple formatted table of test information.
-        """
-        log("\nPackages:", level=1)
-        for pkg in self.packages:
-            log(f"-- ({pkg.name})", level=1)
-            for spkg in pkg.subpackages:
-                log(f"  -- ({spkg.name})", level=2)
-                for test in spkg.tests:
-                    log(f"    -- ({test.file})", level=3)
-
-    def verify(self) -> ErrorCollection:
-        """
-        Pass up all errrors by value in downstream objects like Toolchain, Testfile and Executable
-        """
-        ec = ErrorCollection()
-        if not os.path.exists(self.test_dir):
-            ec.add(ConfigError(f"Cannot find test directory: {self.config_data['testDir']}"))  
-        for exe in self.executables:
-            ec.extend(exe.verify().errors)       
-        for tc in self.toolchains:
-            ec.extend(tc.verify().errors)
-        for pkg in self.packages:
-            ec.extend(pkg.verify().errors)
-        return ec
-
-    def to_dict(self) -> Dict: 
-        return {
-            'name': self.name,
-            'testDir': self.test_dir,
-            'executables': [exe.to_dict() for exe in self.executables],
-            'toolchains': {tc.name: tc.to_dict()[tc.name] for tc in self.toolchains},
-            'subpackages': [pkg.name for pkg in self.packages]
-        }
-    
-    def __repr__(self) -> str:
-        return json.dumps(self.to_dict(), indent=2)
-
-def load_config(config_path: str, args: Optional[RunnerArgs]=None) -> Optional[Config]:
-    """
-    Load and parse the JSON configuration file.
-    """
-    if not os.path.exists(config_path):
-        return None
-    try: 
-        with open(config_path, 'r') as config_file:
-            config_data = json.load(config_file)
-    except json.decoder.JSONDecodeError:
-        log("Config Error: Failed to parse config: ", config_path)
-        return None
-
-    debug_package = args.debug_package if args else None
-    package_filter = args.package_filter if args else ""
-    return Config(config_path, config_data, debug_package, package_filter)
diff --git a/dragon_runner/src/errors.py b/dragon_runner/src/errors.py
deleted file mode 100644
index b64adee..0000000
--- a/dragon_runner/src/errors.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from typing import List, Union, Iterable
-
-class Error:
-    def __str__(self): raise NotImplementedError("Must implement __str__")
-
-class ConfigError(Error):
-    def __init__(self, message: str):
-        self.message = message
-
-    def __str__(self):
-        return f"Config Error: {self.message}"
-
-class TestFileError(Error):
-    def __init__(self, message: str):
-        self.message = message
-
-    def __str__(self):
-        return f"Testfile Error: {self.message}"
-
-class ErrorCollection:
-    def __init__(self, errors: Union[None, 'ErrorCollection', Iterable[Error]] = None):
-        self.errors: List[Error] = []
-        if errors is not None:
-            if isinstance(errors, ErrorCollection):
-                self.errors = errors.errors.copy()
-            elif isinstance(errors, Iterable):
-                self.errors = list(errors)
-
-    def has_errors(self) -> bool:
-        return self.__bool__()
-
-    def add(self, error: Error):
-        self.errors.append(error)
-
-    def extend(self, errors: Union['ErrorCollection', Iterable[Error]]):
-        if isinstance(errors, ErrorCollection):
-            self.errors.extend(errors.errors)
-        elif isinstance(errors, Iterable):
-            self.errors.extend(errors)
-
-    def __bool__(self):
-        return len(self.errors) > 0
-
-    def __eq__(self, other):
-        if isinstance(other, bool):
-            return bool(self) == other
-        return False
-
-    def __len__(self):
-        return len(self.errors)
-
-    def __str__(self):
-        return "\n".join(str(error) for error in self.errors)
-
-class Verifiable:
-    def verify(self) -> ErrorCollection:
-        raise NotImplementedError("Subclasses must implement verify method")
-
diff --git a/dragon_runner/src/harness.py b/dragon_runner/src/harness.py
deleted file mode 100644
index af6c81c..0000000
--- a/dragon_runner/src/harness.py
+++ /dev/null
@@ -1,315 +0,0 @@
-import csv
-import fnmatch
-from colorama                   import Fore
-from typing                     import Any, List, Dict, Optional, Set
-from dragon_runner.src.cli      import RunnerArgs
-from dragon_runner.src.config   import Config, Executable, Package
-from dragon_runner.src.log      import log
-from dragon_runner.src.runner   import TestResult, ToolChainRunner
-from dragon_runner.src.utils    import file_to_str
-from itertools                  import zip_longest
-
-class TestHarness:
-    __test__ = False
-
-    def __init__(self, config: Config, cli_args: RunnerArgs):
-        self.config = config
-        self.cli_args: RunnerArgs = cli_args
-        self.failures: List[TestResult] = []
-        self.run_passed = True
-    
-    def process_test_result(self, test_result: TestResult, context: Dict[str, Any]):
-        """
-        Subclasses should override this method to handle test result processing and update counts.
-        """
-        raise NotImplementedError("Subclasses must implement this method")
-
-    def pre_subpackage_hook(self, spkg):
-        """Hook to run before iterating through a subpackage."""
-        pass
-
-    def post_subpackage_hook(self, context: Dict[str, Any]):
-        """Hook to run after iterating through a subpackage."""
-        pass
-
-    def pre_executable_hook(self, exe):
-        """Hook to run efore iterating through an executable."""
-        pass
-
-    def post_executable_hook(self):
-        """Hook to run after iterating through an executable"""
-        if self.failures != []:
-            pass
-            # todo: enable this with a flag
-            # log(f"Failure Summary: ({len(self.failures)} tests)") 
-            # for result in self.failures:
-            #     result.log()
-        self.failures = []
-    
-    def post_run_hook(self):
-        pass
-
-    def pre_run_hook(self):
-        pass
-
-    def iterate(self):
-        """
-        Basic structure to record which tests pass and fail. Additional functionality
-        can be implemented by overriding default hooks.
-        """
-        self.pre_run_hook()
-        for exe in self.config.executables:
-            self.pre_executable_hook(exe.id)
-            log(f"Running executable: {exe.id}", indent=0)
-            exe.source_env()
-            exe_pass_count = 0
-            exe_test_count = 0
-            for toolchain in self.config.toolchains:
-                tc_runner = ToolChainRunner(toolchain, self.cli_args.timeout)
-                log(f"Running Toolchain: {toolchain.name}", indent=1)
-                tc_pass_count = 0
-                tc_test_count = 0
-                for pkg in self.config.packages:
-                    pkg_pass_count = 0
-                    pkg_test_count = 0
-                    log(f"Entering package {pkg.name}", indent=2)
-                    for spkg in pkg.subpackages:
-                        # Glob pattern match against package_filter using subpackage path
-                        if self.config.package_filter:
-                            if not fnmatch.fnmatch(spkg.path.lower(), self.config.package_filter.lower()):
-                                continue
-                        log(f"Entering subpackage {spkg.name}", indent=3)
-                        counters = {"pass_count": 0, "test_count": 0}
-                        self.pre_subpackage_hook(spkg)
-                        for test in spkg.tests:
-                            test_result: TestResult = tc_runner.run(test, exe)
-                            self.process_test_result(test_result, counters)
-                            if self.cli_args.fast_fail and not test_result.did_pass:
-                                self.post_subpackage_hook(counters)
-                                self.post_executable_hook()
-                                self.post_run_hook()
-                                return
-                        self.post_subpackage_hook(counters)
-                        log("Subpackage Passed: ", counters["pass_count"], "/", counters["test_count"], indent=3)
-                        pkg_pass_count += counters["pass_count"]
-                        pkg_test_count += counters["test_count"]
-                    log("Packaged Passed: ", pkg_pass_count, "/", pkg_test_count, indent=2)
-                    tc_pass_count += pkg_pass_count
-                    tc_test_count += pkg_test_count
-                log("Toolchain Passed: ", tc_pass_count, "/", tc_test_count, indent=1)
-                exe_pass_count += tc_pass_count
-                exe_test_count += tc_test_count
-            log("Executable Passed: ", exe_pass_count, "/", exe_test_count)
-            self.post_executable_hook()
-        self.post_run_hook()
-
-    def run(self):
-        """Default run implementation."""
-        self.iterate()
-        return self.run_passed
-
-class RegularHarness(TestHarness):
-    
-    def process_test_result(self, test_result: TestResult, context: Dict[str, Any]):
-        """
-        Override the hook for regular run-specific implementation of counting passes
-        """
-        if test_result.did_pass:
-            context["pass_count"] += 1
-            test_result.log(args=self.cli_args)
-        else:
-            self.run_passed = False
-            self.failures.append(test_result)
-            test_result.log(args=self.cli_args)
-        context["test_count"] += 1
-
-class TournamentHarness(TestHarness):
-
-    def iterate(self):
-        """
-        Run the tester in grade mode. Run all test packages for each tested executable.
-        Write each toolchain table to the CSV file as it's completed.
-        """
-        attacking_pkgs = sorted(self.config.packages, key=lambda pkg: pkg.name.lower())
-        defending_exes = sorted(self.config.executables, key=lambda exe: exe.id.lower())
-        solution_exe = self.config.solution_exe
-        failure_log = self.cli_args.failure_log
-
-        for toolchain in self.config.toolchains:
-            tc_runner = ToolChainRunner(toolchain, self.cli_args.timeout)
-            tc_table = self.create_tc_dataframe(defending_exes, attacking_pkgs)
-
-            with open(f"toolchain_{toolchain.name}.csv", 'w') as toolchain_csv:
-                print(f"\nToolchain: {toolchain.name}")
-                csv_writer = csv.writer(toolchain_csv)
-                csv_writer.writerow([toolchain.name] + [pkg.name for pkg in attacking_pkgs])
-                toolchain_csv.flush()
-
-                for def_exe in defending_exes:
-                    def_exe.source_env()
-                    def_feedback_file = f"{def_exe.id}-{toolchain.name}feedback.txt"
-                    for a_pkg in attacking_pkgs:
-                        print(f"\n  {a_pkg.name:<12} --> {def_exe.id:<12}", end='') 
-                        pass_count = 0
-                        test_count = 0
-                        for a_spkg in a_pkg.subpackages:
-                            for test in a_spkg.tests:
-                                test_result: Optional[TestResult] = tc_runner.run(test, def_exe)
-                                if test_result and test_result.did_pass:
-                                    print(Fore.GREEN + '.' + Fore.RESET, end='')
-                                    pass_count += 1
-                                    if solution_exe == def_exe.id and failure_log:
-                                        with open("pass_log.txt", 'a') as f_log:
-                                            f_log.write(f"{toolchain.name} {a_pkg.name} {test_result.test.path}\n")
-                                else:
-                                    print(Fore.RED + '.' + Fore.RESET, end='')
-                                    self.log_failure_to_file(def_feedback_file, test_result)
-                                    if solution_exe == def_exe.id and failure_log:
-                                        with open(failure_log, 'a') as f_log:
-                                            f_log.write(f"{toolchain.name} {a_pkg.name} {test_result.test.path}\n")
-                                test_count += 1
-
-                        cell_value = f"{pass_count}/{test_count}"
-                        tc_table[def_exe.id][a_pkg.name] = cell_value
-                    csv_writer.writerow([def_exe.id] + [tc_table[def_exe.id][pkg.name] for pkg in attacking_pkgs])
-                    toolchain_csv.flush()
-
-    @staticmethod
-    def create_tc_dataframe(defenders: List[Executable],
-                            attackers: List[Package]) -> Dict[str, Dict[str, str]]:
-        """
-        Create an empty toolchain table with labels for defenders and attackers 
-        """ 
-        df = {exe.id: {pkg.name: '' for pkg in attackers} for exe in defenders}
-        return df
-
-    @staticmethod
-    def create_timing_dataframe() -> Dict[str, Dict[str, float]]:
-        """
-        TODO: Creating timing DF for Gazprea II (Only applicable for grading)
-        """
-        return {}
-
-    def log_failure_to_file(self, file, result: TestResult):
-        """
-        Give full feedback to a defender for all the tests they failed.
-        """
-        def trim_bytes(data: bytes, max_bytes: int = 10000) -> bytes:
-            trimmed = data[:max_bytes]
-            if len(data) > max_bytes:
-                trimmed += b"\n... (output trimmed to %d bytes)" % max_bytes
-            return trimmed
-        
-        if result.did_pass:
-            return
-
-        with open(file, 'a+') as feedback_file:
-            test_contents = result.test.pretty_print()
-            exp_out = trim_bytes(x) if isinstance(x := result.test.expected_out, bytes) else ""
-            gen_out = trim_bytes(x) if isinstance(x := result.gen_output, bytes) else ""
-            feedback_string = (
-              "="*80+'\n'
-              f"Test: {result.test.file}"
-              f"\nTest Contents:\n{test_contents}\n"
-              f"\nExpected Output: {exp_out}\n"
-              f"Generated Output: {gen_out}\n"
-            )
-
-            feedback_file.write(feedback_string)
-
-class MemoryCheckHarness(TestHarness):
-    
-    def __init__(self, config: Config, cli_args: RunnerArgs):
-        super().__init__(config, cli_args) 
-        self.leak_count = 0
-        self.test_count = 0
-        self.leak_tests: List[TestResult] = []
-    
-    def post_executable_hook(self):
-        """
-        Report failures to stdout.
-        """
-        log(f"Leak Summary: ({len(self.leak_tests)} tests)") 
-        for result in self.leak_tests:
-            log(Fore.YELLOW + "[LEAK] " + Fore.RESET + f"{result.test.file}",
-                indent=4)
-        self.leak_tests = []
-        self.test_count = 0 # reset for each executable
-
-    def process_test_result(self, test_result: TestResult, context: Dict[str, Any]):
-        """
-        Override the hook for regular run-specific implementation of counting passes
-        """
-        # TODO: Refactor an clean up. Not simple enough
-
-        # increment the test count
-        self.test_count += 1
-        context["test_count"] += 1
-
-        # log the test result
-        test_result.log(args=self.cli_args)
-        
-        # track tests which leak
-        if test_result.memory_leak:
-            self.leak_tests.append(test_result)
-     
-        # track passes as usual
-        if test_result.did_pass:
-            context["pass_count"] += 1 
-       
-class PerformanceTestingHarness(TestHarness):
-    
-    def __init__(self, config: Config, cli_args: RunnerArgs):
-        super().__init__(config, cli_args)
-        self.csv_cols = []
-        self.cur_col = []
-        self.testfile_col = ["Test"]
-        self.first_exec = True
-
-    @staticmethod
-    def create_tc_dataframe(defenders: List[Executable],
-                            attackers: List[Package]) -> Dict[str, Set[str]]:
-        """
-        Create an empty toolchain table with labels for defenders and attackers 
-        """ 
-        df = {exe.id: {pkg.name for pkg in attackers} for exe in defenders}
-        return df
-    
-    def process_test_result(self, test_result: TestResult, context: Dict[str, Any]):
-        """
-        Override the hook for regular run-specific implementation of counting passes
-        """
-        # only construct a column for the test file names once 
-        if self.first_exec:
-            self.testfile_col.append(test_result.test.file)
-        
-        if test_result.did_pass:
-            context["pass_count"] += 1
-            test_result.log(args=self.cli_args)
-            self.cur_col.append(test_result.time)
-            
-        else:
-            self.cur_col.append(self.cli_args.timeout)
-            self.failures.append(test_result)
-            test_result.log(args=self.cli_args)
-        context["test_count"] += 1
-    
-    def pre_executable_hook(self, exe):
-        self.cur_col.append(exe)
-
-    def post_executable_hook(self): 
-        if self.first_exec:
-            self.csv_cols.append(self.testfile_col)
-            self.first_exec = False
-        
-        self.csv_cols.append(self.cur_col)
-        self.cur_col = []
-    
-    def post_run_hook(self):  
-        # transpose the columns into rows for writing
-        csv_rows = zip_longest(*self.csv_cols, fillvalue='')
-        
-        with open('perf.csv', 'w', newline='') as file:
-            writer = csv.writer(file)
-            writer.writerows(csv_rows)
-
diff --git a/dragon_runner/src/log.py b/dragon_runner/src/log.py
deleted file mode 100644
index 60db1c8..0000000
--- a/dragon_runner/src/log.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import os
-
-class Logger:
-    def __init__(self):
-        self.debug_level = self._get_debug_level()
-
-    def _get_debug_level(self):
-        return int(os.environ.get('DRAGON_RUNNER_DEBUG', '0')) 
-
-    def log(self, level, indent, *args, **kwargs):
-        prefix = ' '*indent
-        if self.debug_level >= level:
-            print(prefix, *args, **kwargs) 
-
-_logger_instance = None
-
-def get_logger():
-    """
-    get singleton logger for the entire program
-    """
-    global _logger_instance
-    if _logger_instance is None:
-        _logger_instance = Logger()
-    return _logger_instance
-
-def log_multiline(content: str, level=0, indent=0, **kwargs):
-    """
-    Log multiline content with proper indentation
-    """
-    for line in str(content).splitlines():
-        log(line.rstrip(), level=level, indent=indent, **kwargs)
-
-def log(*args, level=0, indent=0, **kwargs):
-    get_logger().log(level, indent, *args, **kwargs)
-
-def log_delimiter(title: str, level=0, indent=0):
-    delimiter = '-' * 20
-    log(delimiter + ' ' + title + ' ' + delimiter, level=level, indent=indent)
diff --git a/dragon_runner/src/main.py b/dragon_runner/src/main.py
deleted file mode 100644
index 89ff925..0000000
--- a/dragon_runner/src/main.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from colorama                       import init, Fore
-from dragon_runner.src.cli          import Mode, parse_cli_args, ServerArgs, ScriptArgs
-from dragon_runner.src.config       import load_config
-from dragon_runner.src.log          import log, log_multiline
-from dragon_runner.scripts.loader   import Loader
-from dragon_runner.src.server       import serve
-from dragon_runner.src.harness      import * 
-
-# initialize terminal colors
-init(autoreset=True)
-
-def main(): 
-    # parse and verify the CLI arguments
-    cli_args = parse_cli_args()
-    log(cli_args, level=1)
-    
-    # run the server for running configs through HTTP
-    if isinstance(cli_args, ServerArgs):
-        serve(cli_args)
-        return 0
-
-    # dragon-runner can also be used as a loader for grading & other scripts
-    if isinstance(cli_args, ScriptArgs):
-        loader = Loader()
-        return loader(cli_args.args)
-
-    # parse and verify the config
-    config = load_config(cli_args.config_file, cli_args)
-    if not config:
-        log(f"Could not open config file: {cli_args.config_file}")
-        return 1
-
-    if config.error_collection:
-        log(f"Found Config {len(config.error_collection)} error(s):")
-        log(f"Parsed {cli_args.config_file} below:")
-        log_multiline(str(config), indent=2)
-        log(Fore.RED + str(config.error_collection) + Fore.RESET)
-        return 1
-
-    if cli_args.verify:
-        ccid = input("Enter your CCID/Github Team Name: ")
-        assert config and not config.error_collection
-        found = False
-        for pkg in config.packages:
-            log("Searching.. ", pkg.name, indent=2)
-            if pkg.name == ccid:
-                found = True
-        if not found:
-            print(f"Could not find package named after CCID: {ccid}")
-            return 1
-
-    # display the config info before running tests
-    config.log_test_info()
-
-    if cli_args.mode == Mode.REGULAR:
-        # run in regular mode
-        harness = RegularHarness(config, cli_args)
-
-    elif cli_args.mode == Mode.TOURNAMENT:
-        # run the tester in tournament mode
-        harness = TournamentHarness(config, cli_args)
-
-    elif cli_args.mode == Mode.MEMCHECK:
-        # check tests for memory leaks
-        harness = MemoryCheckHarness(config, cli_args)
-
-    elif cli_args.mode == Mode.PERF:
-        # performance testing
-        harness = PerformanceTestingHarness(config, cli_args) 
-    else:
-        raise RuntimeError(f"Failed to provide valid mode: {cli_args.mode}")
-    
-    success = harness.run()
-    if success:
-        return 0
-    return 1
-
-if __name__ == "__main__":
-    main()
-        
diff --git a/dragon_runner/src/runner.py b/dragon_runner/src/runner.py
deleted file mode 100644
index 5236520..0000000
--- a/dragon_runner/src/runner.py
+++ /dev/null
@@ -1,423 +0,0 @@
-import subprocess
-import os
-import re
-import json
-import time
-import sys
-from subprocess                     import CompletedProcess
-from typing                         import List, Dict, Optional, Union
-from dataclasses                    import dataclass, asdict
-from colorama                       import Fore, init
-from dragon_runner.src.testfile     import TestFile 
-from dragon_runner.src.config       import Executable, ToolChain
-from dragon_runner.src.log          import log, log_multiline
-from dragon_runner.src.toolchain    import Step
-from dragon_runner.src.cli          import CLIArgs, RunnerArgs
-from dragon_runner.src.utils        import make_tmp_file, bytes_to_str,\
-                                       file_to_bytes, truncated_bytes,\
-                                       file_to_str
-# Terminal colors
-init(autoreset=True)
-
-# Reserve a specific status code to use for valgrind
-VALGRIND_EXIT_CODE = 111
-
-@dataclass
-class MagicParams:
-    exe_path: str                       # $EXE
-    input_file: Optional[str] = ""      # $INPUT
-    output_file: Optional[str] = ""     # $OUTPUT 
-    def __repr__(self):
-        return json.dumps(asdict(self), indent=2)
-
-class Command:
-    """
-    Wrapper for a list of arguments to run fork/exec style
-    """
-    def __init__(self, args):
-        self.args: List[str]    = args
-        self.cmd: str           = self.args[0] 
-
-@dataclass
-class CommandResult:
-    cmd:str
-    subprocess: Optional[CompletedProcess]=None
-    exit_status: int=0 
-    time: float=0
-    timed_out: bool=False
-
-    def log(self, level:int=0, indent=0):
-        if self.subprocess:
-            stdout = self.subprocess.stdout
-            stderr = self.subprocess.stderr
-            
-            if stderr is None:
-                stderr = b''
-            if stdout is None:
-                stdout = b''
-
-            log(f"==> {self.cmd} (exit {self.exit_status})", indent=indent, level=level) 
-            log(f"stdout ({len(stdout)} bytes):", truncated_bytes(stdout, max_bytes=512),
-                indent=indent+2, level=level) 
-            log(f"stderr ({len(stderr)} bytes):", truncated_bytes(stderr, max_bytes=512),
-                indent=indent+2, level=level)
-
-class TestResult:
-    """
-    Represents the result of running a test case, including pass/fail status,
-    execution time, and error information.
-    """
-    __test__ = False  # pytest gets confused when classes start with 'Test' 
-    def __init__(self, test:TestFile, did_pass:bool=False): 
-        # required fields 
-        self.test = test
-        self.did_pass: bool = did_pass
-        self.did_timeout: bool = False 
-        self.error_test: bool = False
-        self.memory_leak: bool = False
-        self.command_history: List[CommandResult] = []
-
-        # optional fields
-        self.gen_output: Optional[bytes] = None
-        self.time: Optional[float] = None
-        self.failing_step: Optional[str] = None
-
-    def log(self, file=sys.stdout, args: Union['RunnerArgs', None]=None):
-        """
-        Print a TestResult to the log with various levels of verbosity.
-        This is the main output the user is concerned with.
-        """
-        # TODO: This is very messy. Find some time to clean in up!
-        pass_msg = "[E-PASS] " if self.error_test else "[PASS] "
-        fail_msg = "[E-FAIL] " if self.error_test else "[FAIL] "
-        timeout_msg = "[TIMEOUT] "
-
-        test_name = f"{self.test.file:<50}".strip()    
-        show_time = args and args.time and self.time is not None
-        if self.did_timeout:
-            log(Fore.YELLOW + timeout_msg + Fore.RESET + f"{test_name.strip()}", indent=4, file=file)
-         
-        # Log test result
-        elif self.did_pass:
-            time_display = "" 
-            if show_time:
-                time_str = f"{self.time:.4f}"
-                time_display = f"{time_str:>10} (s)" 
-            log_msg = f"{Fore.GREEN}{pass_msg}{Fore.RESET}{test_name}{time_display}"
-            log(log_msg, indent=4, file=file)
-        else:
-            log(Fore.RED + fail_msg + Fore.RESET + f"{test_name}", indent=4, file=file)
-    
-        # Log testcase
-        if args and args.show_testcase:
-            content = self.test.pretty_print()
-            level = 2 if self.did_pass else 0
-            log_multiline(content, indent=6, level=level)
-
-        # Log the command history
-        level = 3 if self.did_pass else 2
-        log(f"==> Command History", indent=6, level=level)
-        for cmd in self.command_history:
-            cmd.log(level=level, indent=8)
-        
-        # Log test expected and generated
-        expected_out = self.test.get_expected_out()
-        generated_out = x if (x := self.gen_output) else b''
-            
-        log(f"==> Expected Out ({len(expected_out)} bytes):", indent=6, level=level-1)
-        log(str(expected_out), level=level-1, indent=7)
-        log(f"==> Generated Out ({len(generated_out)} bytes):", indent=6, level=level-1)
-        log(str(generated_out), level=level-1, indent=7) 
-        
-    def __repr__(self):
-        return "PASS" if self.did_pass else "FAIL"
-    
-class ToolChainRunner():
-    def __init__(self, tc: ToolChain, timeout: float, env: Dict[str, str]={}):
-        self.tc                     = tc
-        self.timeout                = timeout
-        self.env                    = env
-        self.reserved_exit_codes    = [VALGRIND_EXIT_CODE]
-        self.RUNTIME_ERRORS         = ["SizeError", "IndexError", "MathError", "StrideError"]
-    
-    def handle_error_test(self, tr: TestResult, produced: bytes, expected: bytes):
-        """
-        An error test requires specific handling since a diff between expected and
-        generated does not imply the test will fail. Instead we identify the relevent
-        components of the error message using regular expressions and perform a lenient diff.
-        """
-        try:
-            produced_str = produced.decode('utf-8').strip() if produced else None
-            expected_str = expected.decode('utf-8').strip() if expected else None
-        except UnicodeDecodeError as unicode_error:
-            tr.did_pass = False
-            return
-        
-        # An error test must be UTF-8 decodable.
-        if produced_str is None or expected_str is None:
-            tr.did_pass = False
-            return
-
-        rt_error = next((s for s in self.RUNTIME_ERRORS if s in expected_str), None)  
-        did_raise_rt_error = any(err in produced_str for err in self.RUNTIME_ERRORS)
-        if did_raise_rt_error:
-            # Expected can be either a runtime or compile time format.
-            if rt_error is None:
-                # Raised a runtime error but did not expect one.
-                tr.did_pass = False
-            else:
-                # Raised a runtime error and expected one as well. 
-                pattern = fr"{rt_error}(\s+on\s+Line\s+\d+)?(:.*)?" 
-                tr.did_pass = bool(
-                    re.search(pattern, produced_str) and 
-                    re.search(pattern, expected_str)
-                )
-        else:
-            # Expected must be in compile time format, i.e lines must match.
-            def extract_components(text):
-                error = re.search(r"(\w+Error)", text, re.IGNORECASE)
-                line = re.search(r"on\s+Line\s+(\d+)", text, re.IGNORECASE)
-                return error, line
-        
-            prod_error, prod_line = extract_components(produced_str)
-            exp_error, exp_line = extract_components(expected_str)
-            
-            if prod_error and prod_error.group(1) == "MainError" and \
-               exp_error and exp_error.group(1) == "MainError":
-                # hack in this case because spec doesn't define what line to throw MainError on.
-                tr.did_pass = True
-                return
-
-            if prod_error and exp_error and prod_line and exp_line:
-                
-                
-                tr.did_pass = (prod_line.group(1) == exp_line.group(1))
-            else:
-                tr.did_pass = False
-
-    def run_command(self, command, stdin: bytes) -> CommandResult:
-        """
-        Run a command and return the CommandResult
-        """
-        env = os.environ.copy()
-        start_time = time.time()
-        cr = CommandResult(cmd=command.cmd)
-        try:
-            result = subprocess.run(
-                command.args,
-                env=env,
-                input=stdin,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                check=False,
-                timeout=self.timeout
-            )
-            wall_time = time.time() - start_time
-            cr.subprocess = result
-            cr.exit_status = result.returncode 
-            cr.time = wall_time
-        except subprocess.TimeoutExpired:
-            cr.time = self.timeout
-            cr.timed_out = True
-            cr.exit_status = 255
-        except Exception:
-            cr.exit_status = 1
-        return cr
-        
-    def resolve_output_file(self, step: Step) -> Optional[str]:
-        """
-        make absolute path from output file in step
-        """
-        current_dir = os.getcwd()
-        output_file = os.path.join(current_dir, step.output) if step.output else None
-        return output_file
-    
-    def resolve_command(self, step: Step, params: MagicParams) -> Command:
-        """
-        replace magic parameters with real arguments
-        """
-        command = Command(args=[step.exe_path] + step.arguments)
-        command = self.replace_magic_args(command, params)
-        command = self.replace_env_vars(command)
-        exe = command.args[0]
-        if not os.path.isabs(exe):
-            command.args[0] = os.path.abspath(exe)
-        return command
-    
-    def run(self, test: TestFile, exe: Executable) -> TestResult: 
-        """
-        run each step of the toolchain for a given test and executable
-        """
-        input_file = test.path
-        expected = test.expected_out if isinstance(test.expected_out, bytes) else b'' 
-        tr = TestResult(test=test, did_pass=False)
-        
-        for index, step in enumerate(self.tc):
-            
-            # set up input and output
-            last_step = (index == len(self.tc) - 1) 
-            input_stream = test.get_input_stream() if step.uses_ins else b''
-            output_file = self.resolve_output_file(step)
-            
-            # resolve magic parameters for currents step
-            magic_params = MagicParams(exe.exe_path, input_file, output_file)
-            command = self.resolve_command(step, magic_params)
-            command_result  = self.run_command(command, input_stream) 
-            
-            # save command history for logging
-            tr.command_history.append(command_result)
- 
-            # Check if the command timed out
-            if command_result.timed_out:
-                """
-                A step timed out based on the max timeout specified by CLI arg.
-                """
-                tr.did_pass=False;
-                tr.did_timeout=True
-                tr.failing_step=step.name;
-                tr.time = self.timeout
-                return tr
-            
-            child_process = command_result.subprocess
-            if not child_process:
-                """
-                OS failed to exec the command.
-                """
-                tr.did_pass = False;
-                return tr
-            
-            step_stdout = bytes(child_process.stdout) or b''
-            step_stderr = bytes(child_process.stderr) or b''
-            step_time = round(command_result.time, 4) 
-            
-            if child_process.returncode in self.reserved_exit_codes:
-                """
-                Special case for reserved exit codes
-                1) Valgrind
-                """
-                if child_process.returncode == VALGRIND_EXIT_CODE:
-                    tr.memory_leak = True 
-            
-            if child_process.returncode != 0 and \
-               child_process.returncode not in self.reserved_exit_codes:
-                """
-                A step in the toolchain has returned a non-zero exit status. If "allowError"
-                is specified in the config, we can perform a lenient diff based on CompileTime
-                or RuntimeError message rules. Otherwise, we abort the toolchain.
-                """
-                tr.gen_output=step_stderr
-                tr.failing_step=step.name
-                tr.error_test=True
-
-                # fail by default if errors are not explicitly allowed in config
-                if step.allow_error:
-                    self.handle_error_test(tr, step_stderr, expected)
-                    return tr
-                else: 
-                    tr.did_pass = False
-                    return tr
-
-            elif last_step:
-                """
-                The last step terminated gracefully at this point. We write to the output file and
-                make a precise diff to determine if the test has passed.
-                """
-                if output_file and not os.path.exists(output_file):
-                    raise RuntimeError(f"Command did not create specified output file {output_file}")
-                
-                if output_file is not None:
-                    step_stdout = file_to_bytes(output_file) or b''
-                  
-                tr.time=step_time
-                tr.gen_output=step_stdout
-
-                # Diff the produced and expected outputs
-                diff = precise_diff(step_stdout, expected)
-                if not diff:
-                    tr.did_pass = True
-                else:
-                    tr.did_pass = False
-
-                return tr 
-            else:
-                """
-                Set up the next steps input file which is the $OUTPUT of the previous step.
-                If $OUTPUT is not supplied, we create a temporary pipe.
-                """
-                input_file = output_file or make_tmp_file(child_process.stdout)
-        
-        # this code should be unreachable for well-defined toolchains 
-        raise RuntimeError("Toolchain reached undefined conditions during execution.")
-
-    @staticmethod
-    def replace_env_vars(cmd: Command) -> Command:
-        """
-        Expand environment variables with the values from current shell
-        """
-        resolved = []
-        for arg in cmd.args:
-            matches = re.findall(r'\$(\w+)|\$\{(\w+)\}', arg)
-            if matches:
-                for match in matches:
-                    var_name = match[0] or match[1]
-                    env_value = os.environ.get(var_name)
-                    if env_value is not None: 
-                        arg = arg.replace(f"${var_name}", env_value)\
-                                .replace(f"${{{var_name}}}", env_value) 
-                resolved.append(arg)
-            else:
-                resolved.append(arg)
-        cmd.args = resolved 
-        return cmd
-
-    @staticmethod
-    def replace_magic_args(command: Command, params: MagicParams) -> Command: 
-        """
-        Magic args are inherited from previous steps
-        """
-        resolved = []
-        for arg in command.args:
-            if '$EXE' in arg:
-                resolved.append(arg.replace('$EXE', params.exe_path))
-            elif '$INPUT' in arg and params.input_file:
-                resolved.append(arg.replace('$INPUT', params.input_file))
-            elif '$OUTPUT' in arg and params.output_file:
-                resolved.append(arg.replace('$OUTPUT', params.output_file))
-            else:
-                resolved.append(arg)
-        command.args = resolved
-        command.cmd = command.args[0]
-        return command
-
-def diff_bytes(s1: bytes, s2: bytes) -> str:
-    """
-    The difflib library appears to have an infinite recursion bug.
-    It is simple to write our own.
-    """
-    result = []
-    i, j = 0, 0
-    while i < len(s1) and j < len(s2):
-        if s1[i] != s2[j]:
-            result.append(f"-{s1[i]}")
-            result.append(f"+{s2[j]}")
-        else:
-            result.append(f" {s1[i]}")
-        i += 1
-        j += 1
-    while i < len(s1):
-        result.append(f"-{s1[i]}")
-        i += 1
-    while j < len(s2):
-        result.append(f"+{s2[j]}")
-        j += 1 
-    return ''.join(result)
-
-def precise_diff(produced: bytes, expected: bytes) -> str:
-    """
-    Return the difference of two byte strings, otherwise empty string 
-    """
-    # identical strings implies no diff 
-    if produced == expected:
-        return ""
-    return diff_bytes(produced, expected)
-
diff --git a/dragon_runner/src/server.py b/dragon_runner/src/server.py
deleted file mode 100644
index 781c48a..0000000
--- a/dragon_runner/src/server.py
+++ /dev/null
@@ -1,282 +0,0 @@
-import os
-import subprocess
-import shutil
-from typing import                      List, Dict, Any, Optional
-from dragon_runner.src.cli import       ServerArgs
-from dragon_runner.src.runner import    TestResult, ToolChainRunner, Command, CommandResult
-from dragon_runner.src.toolchain import ToolChain
-from dragon_runner.src.config import    load_config, Config, Executable
-from dragon_runner.src.testfile import  TestFile
-from dragon_runner.src.utils import *
-from tempfile import                    NamedTemporaryFile
-from pathlib import                     Path
-from flask import                       Blueprint, Flask, request, jsonify, current_app
-from flask_cors import                  CORS
-
-SERVER_MODE = os.environ.get("DR_SERVER_MODE", "DEBUG").upper()
-IS_PRODUCTION = (SERVER_MODE == "PROD")
-app = Flask(__name__)
-CORS(app)
-
-class SecureToolChainRunner(ToolChainRunner):
-    """
-    ToolChainRunner using firejail sandboxing
-    """ 
-    def __init__(self, tc, timeout: float, env=None, restrict_exes: List[Executable]=[]):
-        super().__init__(tc, timeout, env or {})
-        self.firejail_available = self._check_firejail()
-        self.restrict_exes = restrict_exes
-        
-    def _check_firejail(self) -> bool:
-        """
-        Check if firejail is available on the system.
-        """
-        return shutil.which('firejail') is not None
-
-    def _create_firejail_command(self, original_cmd: List[str]) -> List[str]:
-        """
-        Wrap command with firejail security options.
-        """
-        if not self.firejail_available:
-            return original_cmd
-            
-        firejail_cmd = [
-            'firejail',
-            '--noprofile',
-            '--seccomp',
-            '--noroot',
-            '--net=none',
-            '--noexec=/home',
-            '--private-tmp',
-            '--private-dev',
-            '--read-only=/usr',
-            '--read-only=/bin',
-            '--read-only=/lib',
-            '--read-only=/lib64',
-            '--blacklist=/home',
-            '--blacklist=/root',
-            '--blacklist=/etc',
-            '--rlimit-nproc=2',
-            '--rlimit-fsize=1048576', #1MB
-            f'--timeout=00:00:{int(self.timeout):02d}',
-            '--quiet',
-            '--'
-        ] 
-        return firejail_cmd + original_cmd
-
-    def run_command(self, command: Command, stdin: bytes) -> CommandResult:
-        """
-        Override to wrap commands with firejail
-        """
-        if self.firejail_available:
-            secure_args = self._create_firejail_command(command.args)
-            secure_command = Command(secure_args)
-            return super().run_command(secure_command, stdin)
-        return CommandResult(cmd="", exit_status=1)
-
-class Payload:
-    def __init__(self):
-        self.data = {}
-
-    def to_dict(self):
-        return self.data
-
-class ConfigPayload(Payload):
-    def __init__(self, config: Config):
-        self.data = {
-            "name": config.name,
-            "executables": [e.id for e in config.executables],
-            "toolchains": [t.name for t in config.toolchains]
-        }
-
-class ToolChainPayload(Payload):
-    def __init__(self, tc: ToolChain):
-        self.data = tc.to_dict()
-
-class TestPayload(Payload):
-    def __init__(self, test: TestFile):
-        self.data = test.to_dict()
-        self.data.update({"content": utf8_file_to_base64(test.path)})    
-
-class ConfigAPI:
-    def __init__(self, config: Config):
-        self.config = config
-        self.config_path = config.config_path
-        self.name = Path(config.config_path).stem
-        self.tests: Dict = self.unpack_tests()        
-        
-        # Create blueprint for this config
-        self.bp = Blueprint(f"config_{self.name}", __name__)
-        self._register_routes()
-    
-    def unpack_tests(self) -> Dict:
-        tests = {} 
-        for pkg in self.config.packages:
-            for spkg in pkg.subpackages:
-                for test in spkg.tests:
-                    tests[test.file] = test
-        return tests
-
-    def _register_routes(self):
-        self.bp.route(f"/config/{self.name}", methods=["GET"])(self.get_config)
-        self.bp.route(f"/config/{self.name}/toolchains", methods=["GET"])(self.get_toolchains)
-        self.bp.route(f"/config/{self.name}/tests", methods=["GET"])(self.get_tests)
-        self.bp.route(f"/config/{self.name}/run", methods=["POST"])(self.run_test)
-   
-    def get_config(self):    
-        return jsonify(ConfigPayload(self.config).to_dict())
-   
-    def get_toolchains(self): 
-        return jsonify([ToolChainPayload(t).to_dict() for t in self.config.toolchains]) 
-    
-    def get_tests(self):
-        return jsonify([TestPayload(t).to_dict() for t in self.tests.values()])
-    
-    def run_test(self):
-        data = request.get_json(silent=True) or {}
-        toolchain_name: str = data.get('toolchain_name', "")
-        exe_name: str = data.get('exe_name', "")
-        test_stdin: Optional[bytes] = b64_to_bytes(data.get('stdin', ""))
-        test_contents: Optional[str] = b64_to_str(data.get('test_contents', ""))
-    
-        if test_stdin is None or test_contents is None:
-            app.logger.error(f"Test received stdin: {test_stdin} and contents {test_contents}")
-            return jsonify({
-                "status": "error",
-                "message": "Failed to decode stdin and/or test contents in request."
-            }), 500
-
-        try: 
-            # Find toolchain and executable
-            exe = next((e for e in self.config.executables if e.id == exe_name), 
-                      self.config.executables[0])
-            tc = next((x for x in self.config.toolchains if x.name == toolchain_name), 
-                     self.config.toolchains[0])
-            
-            if IS_PRODUCTION:
-                tc_runner = SecureToolChainRunner(tc, timeout=5, restrict_exes=self.config.executables)
-            else:
-                tc_runner = ToolChainRunner(tc, timeout=5)
-
-            # Create temporary file for runtime supplied test
-            with NamedTemporaryFile(mode='w+', delete=True, suffix='.test') as temp:
-                temp.write(test_contents)
-                temp.flush()
-                temp.seek(0) 
-                test = TestFile(temp.name)
-                test.set_input_stream(test_stdin)
-
-                # Run test in secure environment
-                app.logger.info(f"Running secure test: {test.stem} with toolchain: {toolchain_name}")
-                tr: TestResult = tc_runner.run(test, exe)
-                
-                cmd = tr.command_history[-1] if tr.command_history else None
-                
-                if cmd and cmd.subprocess:
-                    stdout = bytes_to_b64(cmd.subprocess.stdout)
-                    stderr = bytes_to_b64(cmd.subprocess.stderr)
-                    exit_status = cmd.exit_status
-                else:
-                    stdout = ""
-                    stderr = "Toolchain execution failed"
-                    exit_status = -1
-                
-                return jsonify({
-                    "config": self.name,
-                    "test": test.stem,
-                    "results": {
-                        "passed": tr.did_pass,
-                        "exit_status": exit_status,
-                        "stdout": stdout,
-                        "stderr": stderr,
-                        "time": str(tr.time),
-                        "expected_output": str(test.expected_out),
-                    }
-                })
-                
-        except subprocess.TimeoutExpired:
-            app.logger.error("Test execution timed out")
-            return jsonify({
-                "status": "error",
-                "message": "Test execution timed out"
-            }), 408
-        except Exception as e:
-            app.logger.error(f"Error running test: {str(e)}")
-            return jsonify({
-                "status": "error",
-                "message": str(e)
-            }), 500
-
-@app.route("/")
-def root():
-    """Base route that lists all available routes"""
-    return jsonify({
-        "service": "Dragon Runner API",
-        "status": "running",
-        "mode": "production" if IS_PRODUCTION else "debug",
-        "available_endpoints": [route['url'] for route in get_available_routes()]
-    })
-    
-def get_available_routes() -> List[Dict[str, Any]]:
-    """Helper function to list all available routes"""
-    routes = []
-    for rule in current_app.url_map.iter_rules():
-        if rule.endpoint != 'static' and rule.methods: 
-            routes.append({
-                "url": str(rule),
-                "methods": list(rule.methods - {"OPTIONS", "HEAD"})
-            })
-    return routes
-
-def get_configs_to_serve(config_dir: Path) -> List[Config]:
-    """Get all config files from a directory and its subdirectories"""
-    configs: List[Config] = []
-    
-    def fill_config(path: Path):
-        if path.is_file():
-            config = load_config(str(path))
-            if config is not None:
-                configs.append(config)
-            return
-        
-        for entry in path.iterdir():
-            if entry.is_dir() or entry.is_file():
-                fill_config(entry)
-    
-    fill_config(config_dir)
-    return configs
-
-def create_app(args: ServerArgs):
-    """Create App for WSGI deployment"""
-    configs = get_configs_to_serve(args.serve_path)  
- 
-    def root_route():
-        return jsonify([ConfigPayload(c).to_dict() for c in configs])   
-    
-    bp = Blueprint(f"configs", __name__)
-    bp.route("/configs", methods=["GET"])(root_route)
-    app.register_blueprint(bp)
-
-    # Create APIs for each config and register their blueprints
-    for config in configs:
-        api = ConfigAPI(config)
-        app.register_blueprint(api.bp)
-    
-    # Log security status
-    firejail_status = "ENABLED" if shutil.which('firejail') else "DISABLED"
-    app.logger.info(f"Security sandbox: {firejail_status}")
-    
-    return app
-
-def serve(args: ServerArgs):
-    create_app(args)
-    
-    if IS_PRODUCTION:
-        from wsgiref.simple_server import make_server
-        server = make_server('0.0.0.0', args.port, app)
-        print(f"Production server running on http://0.0.0.0:{args.port}")
-        server.serve_forever()
-    else:
-        print(f"Dev mode - Flask dev server on http://0.0.0.0:{args.port}")
-        app.run(debug=True, host="0.0.0.0", port=args.port)
-
diff --git a/dragon_runner/src/testfile.py b/dragon_runner/src/testfile.py
deleted file mode 100644
index 4ccb920..0000000
--- a/dragon_runner/src/testfile.py
+++ /dev/null
@@ -1,204 +0,0 @@
-import os
-from io                         import BytesIO
-from typing                     import Dict, Optional, Union
-from dragon_runner.src.utils    import file_to_str, str_to_bytes, file_to_bytes
-from dragon_runner.src.errors   import Verifiable, ErrorCollection, TestFileError
-
-class TestFile(Verifiable):
-    __test__ = False 
-    def __init__(self, test_path: str, input_dir="input", input_stream_dir="input-stream",
-                                  output_dir="output", comment_syntax="//"):   
-        self.path = test_path
-        self.stem, self.extension = os.path.splitext(os.path.basename(test_path))
-        self.file:str = self.stem + self.extension  
-        self.input_dir = input_dir
-        self.input_stream_dir = input_stream_dir          
-        self.output_dir = output_dir                
-        self.comment_syntax = comment_syntax # default C99 //
-        self.expected_out: Union[bytes, TestFileError] = self.get_content("CHECK:", "CHECK_FILE:")
-        self.input_stream: Union[bytes, TestFileError] = self.get_content("INPUT:", "INPUT_FILE:")
-    
-    @classmethod
-    def from_test_contents(cls, content: bytes, test_name: str):
-
-        instance = cls.__new__(cls)
-         
-        return instance
-    
-    def set_input_stream(self, input_stream: bytes):
-        """
-        Manually set the input stream.
-        """
-        self.input_stream = input_stream
-
-    def get_input_stream(self) -> bytes:
-        """
-        Get the input-stream supplied for the test. Assumes this testfile instance
-        has had self.verify() called beforehand.
-        """
-        if isinstance(self.input_stream, bytes):
-            return self.input_stream
-        return b''
-
-    def get_expected_out(self) -> bytes:
-        """
-        Get the expected output for the test. Assumes this testfile instance
-        has had self.verify() called beforehand.
-        """
-        if isinstance(self.expected_out, bytes):
-            return self.expected_out
-        return b''
-
-    def verify(self) -> ErrorCollection:
-        """
-        Ensure the paths supplied in CHECK_FILE and INPUT_FILE exist
-        """
-        collection = ErrorCollection()
-        # If a parse and read of a tests input or output fails, propagate here 
-        if isinstance(self.expected_out, TestFileError):
-            collection.add(self.expected_out)
-        if isinstance(self.input_stream, TestFileError):
-            collection.add(self.input_stream) 
-        return collection
-
-    def get_content(self, inline_directive: str, file_directive: str) -> Union[bytes, TestFileError]:
-        """
-        Generic method to get content based on directives
-        """
-        inline_contents = self._get_directive_contents(inline_directive)
-        file_contents = self._get_directive_contents(file_directive)
-        
-        if inline_contents and file_contents:
-            return TestFileError(f"Directive Conflict for test {self.file}: Supplied both\
-                                 {inline_directive} and {file_directive}")
-        
-        elif inline_contents:
-            return inline_contents
-
-        elif file_contents: 
-            if isinstance(file_contents, TestFileError):
-                return file_contents
-
-            file_str = file_contents.decode()
- 
-            full_path = os.path.join(os.path.dirname(self.path), file_str.strip())
-            if not os.path.exists(full_path):
-                return TestFileError(f"Failed to locate path supplied to {file_directive}\n\tTest:{self.path}\n\tPath:{full_path}\n")
-            
-            file_bytes = file_to_bytes(full_path)
-            if file_bytes is None:
-                return TestFileError(f"Failed to convert file {full_path} to bytes")
-            
-            return file_bytes 
-        else:
-            return b''
-    
-    def _get_file_bytes(self, file_path: str) -> Optional[bytes]:
-        """
-        Get file contents in bytes
-        """
-        try:
-            with open(file_path, "rb") as f:
-                file_bytes = f.read()
-                assert isinstance(file_bytes, bytes), "expected bytes"
-                return file_bytes 
-        except FileNotFoundError:
-            return None
-
-    def _get_directive_contents(self, directive_prefix: str) -> Optional[Union[bytes, TestFileError]]:
-        """
-        Look into the testfile itself for contents defined in directives.
-        Directives can appear anywhere in a line, as long as they're preceded by a comment syntax.
-        """
-        contents = BytesIO()
-        first_match = True
-        try:
-            with open(self.path, 'r') as test_file:
-                for line in test_file:
-                    comment_index = line.find(self.comment_syntax)
-                    directive_index = line.find(directive_prefix)
-                    if comment_index == -1 or directive_index == -1 or\
-                       comment_index > directive_index:
-                        continue
-                    
-                    rhs_line = line.split(directive_prefix, 1)[1]
-                    rhs_bytes = str_to_bytes(rhs_line, chop_newline=True)
-                    if rhs_bytes is None:
-                        return None
-                    if not first_match:
-                        contents.write(b'\n')
-
-                    contents.write(rhs_bytes)                
-                    first_match = False
-            contents.seek(0)
-            return contents.getvalue() if contents else None
-        except UnicodeDecodeError as e:
-            return TestFileError(e.reason)
-        except Exception as e:
-            return TestFileError(f"Unkown error occured while parsing testfile: {self.path}")
-
-    def __repr__(self):
-        max_test_name_length = 30
-        test_name = os.path.basename(self.path)
-        if len(test_name) > max_test_name_length:
-            test_name = test_name[:max_test_name_length - 3] + "..."
-        
-        expected_out = b''
-        if isinstance(self.expected_out, bytes):
-            expected_out = self.expected_out
-
-        input_stream = b''
-        if isinstance(self.input_stream, bytes):
-            input_stream = self.input_stream
-
-        return (f"{test_name:<{max_test_name_length}}"
-                f"{len(expected_out):>4}\t"
-                f"{len(input_stream):>4}")
-    
-    def to_dict(self) -> Dict:  
-        out = str(self.expected_out)
-        ins = str(self.input_stream)
-        return {
-            "name": self.stem,
-            "path": self.path,
-            "expected_output": out,
-            "input_stream": ins 
-        }
-
-    def pretty_print(self) -> str:
-        """
-        Generate a pretty-formatted string representation of the test file contents
-        with borders around it.
-        """
-        file_content = file_to_str(self.path)
-        if not file_content: 
-            return f"Error reading file {self.path}:"
-        
-        # query size of border to draw for user
-        try:
-            term_width = os.get_terminal_size().columns if hasattr(os, 'get_terminal_size') else 80
-        except OSError:
-            term_width = 80
-        content_width = min(term_width - 10, 100) 
-        
-        # ascii border characters
-        top_border = '┌' + '─' * (content_width - 2) + '┐'
-        bottom_border = '└' + '─' * (content_width - 2) + '┘'
-        
-        # apply border format to each line in the file
-        formatted_lines = []
-        formatted_lines.append(top_border) 
-        for line in file_content.splitlines():
-            # truncate long lines
-            if len(line) > content_width - 4:
-                display_line = line[:content_width - 7] + '...'
-            else:
-                display_line = line  
-            
-            # format content with border 
-            padded_line = display_line.ljust(content_width - 4)
-            formatted_lines.append(f'│ {padded_line} │') 
-
-        formatted_lines.append(bottom_border) 
-        return '\n'.join(formatted_lines)
-
diff --git a/dragon_runner/src/toolchain.py b/dragon_runner/src/toolchain.py
deleted file mode 100644
index 9ede064..0000000
--- a/dragon_runner/src/toolchain.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import json
-import os
-import subprocess
-from typing import Dict, List, Iterator
-from dragon_runner.src.errors import *
-
-class Step(Verifiable):
-    def __init__(self, **kwargs):
-        self.name           = kwargs.get('stepName', None)
-        self.exe_path       = kwargs.get('executablePath', None)
-        self.arguments      = kwargs.get('arguments', None)
-        self.output         = kwargs.get('output', None)
-        self.allow_error    = kwargs.get('allowError', False)
-        self.uses_ins       = kwargs.get('usesInStr', False)
-        self.uses_runtime   = kwargs.get('usesRuntime', False)
-    
-    def verify(self) -> ErrorCollection:
-        errors = ErrorCollection()
-        if not self.name:
-            errors.add(ConfigError(f"Missing required filed 'stepName' in Step {self.name}"))
-        
-        if not self.exe_path:
-            errors.add(ConfigError(f"Missing required field 'exe_path' in Step: {self.name}"))
-
-        elif not os.path.exists(self.exe_path) and not self.exe_path.startswith('$'):
-            errors.add(ConfigError(f"Cannot find exe_path '{self.exe_path}' in Step: {self.name}"))
-        
-        return errors 
-
-    def to_dict(self) -> Dict:
-        return {
-            'stepName': self.name,
-            'exe_path': self.exe_path,
-            'arguments': self.arguments,
-            'output': self.output,
-            'allowError': self.allow_error,
-            'usesInStr': self.uses_ins,
-            'usesRuntime': self.uses_runtime
-        }
-
-    def __repr__(self):
-        return json.dumps(self.to_dict(), indent=2)
-
-class ToolChain(Verifiable):
-    def __init__(self, name: str, steps: List[Dict]):
-        self.name       = name
-        self.steps      = [Step(**step) for step in steps]
-    
-    def verify(self) -> ErrorCollection:
-        errors = ErrorCollection()
-        for step in self.steps:
-            errors.extend(step.verify().errors)
-        return errors
-
-    def to_dict(self) -> Dict[str, List[Dict]]:
-        return {self.name: [step.to_dict() for step in self.steps]}
-
-    def __repr__(self):
-        return json.dumps(self.to_dict(), indent=2)
-    
-    def __iter__(self) -> Iterator[Step]:
-        return iter(self.steps)
-
-    def __len__(self) -> int:
-        return len(self.steps)
-
-    def __getitem__(self, index: int) -> Step:
-        return self.steps[index]
diff --git a/dragon_runner/src/utils.py b/dragon_runner/src/utils.py
deleted file mode 100644
index 2f7fcee..0000000
--- a/dragon_runner/src/utils.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import os
-import sys
-import tempfile
-import base64
-from typing     import Optional
-from colorama   import init
-
-# Initialize colorama
-init(autoreset=True)
-
-def resolve_relative(relative_dir: str, abs_path: str) -> str:
-    """
-    Resolve relative path into an absolute path wrt to abs_path.
-    """
-    if os.path.isfile(abs_path):
-        abs_path = os.path.dirname(abs_path) 
-    return os.path.join(abs_path, relative_dir)
-
-def make_tmp_file(content: bytes) -> Optional[str]:
-    """
-    Create a file in tmp with the bytes from content.
-    """
-    try: 
-        with tempfile.NamedTemporaryFile(delete=False) as tmp:
-            tmp.write(content)
-            os.chmod(tmp.name, 0o700)
-            return tmp.name
-    except Exception as e:
-        print(f"Failed to make temporary file with error: {e}", file=sys.stderr)
-        return None
-
-def str_to_bytes(string: str, chop_newline: bool=False) -> Optional[bytes]:
-    """
-    Convert a string to bytes. Optionally chop off the newline. Used for
-    directive parsing.
-    """
-    if chop_newline and string.endswith('\n'):
-        string = string[:-1]
-    try:
-        return string.encode('utf-8')
-    except UnicodeEncodeError:
-        return None
-
-def bytes_to_str(data: bytes, encoding: str='utf-8') -> Optional[str]:
-    """
-    Convert bytes into a string.  
-    """
-    assert isinstance(data, bytes), "Supplied bytes that are not of type bytes."
-    try:
-        return data.decode(encoding)
-    except UnicodeDecodeError:
-        return str(data)
-    except:
-        return None
-
-def file_to_bytes(file: str) -> Optional[bytes]:
-    """
-    Read a file in binary mode and return the bytes inside.
-    Return None if an exception is thrown.
-    """
-    try:
-        with open(file, 'rb') as f:
-            return f.read()
-    except Exception as e:
-        print(f"Reading bytes from file failed with: {e}")
-        return None
-
-def utf8_file_to_base64(file_path: str) -> Optional[str]:
-    """Convert file to base64 string"""
-    try:
-        with open(file_path, 'rb') as file:
-            return base64.b64encode(file.read()).decode('utf-8')
-    except: 
-        return None
-
-def b64_to_bytes(b64_string: str) -> Optional[bytes]:
-   """
-   Convert base64 string to bytes.
-   """
-   try:
-       return base64.b64decode(b64_string)
-   except Exception as e:
-       print(f"Base64 decoding failed with: {e}", file=sys.stderr)
-       return None
-
-def b64_to_str(b64_string: str) -> Optional[str]:
-   """
-   Convert base64 string to string.
-   """
-   try:
-       return bytes_to_str(base64.b64decode(b64_string))
-   except Exception as e:
-       print(f"Base64 decoding failed with: {e}", file=sys.stderr)
-       return None
-
-def bytes_to_b64(data: bytes) -> Optional[str]:
-   """
-   Convert bytes to base64 string.
-   """
-   assert isinstance(data, bytes), "Supplied data that is not of type bytes."
-   try:
-       return base64.b64encode(data).decode('utf-8')
-   except Exception as e:
-       print(f"Base64 encoding failed with: {e}", file=sys.stderr)
-       return None
-
-def truncated_bytes(data: bytes, max_bytes: int = 1024) -> bytes:
-    """
-    Return a truncated version of the input bytes, with middle contents omitted if
-    size exceeds max_bytes. 
-    """
-    if len(data) <= max_bytes:
-        return data
-
-    omission_message = b'\n{{ omitted for brevity }}\n'
-    available_bytes = max_bytes - len(omission_message)
-    half = available_bytes // 2 
-    truncated = data[:half] + omission_message + data[-half:]
-    
-    return truncated
-
-def file_to_str(file: str, max_bytes=1024) -> Optional[str]:
-    """
-    return file in string form, with middle contents trucated if
-    size exceeds max_bytes
-    """ 
-    file_bytes = file_to_bytes(file)
-    if file_bytes is None:
-        return ""
-    
-    if len(file_bytes) <= max_bytes:
-        return bytes_to_str(file_bytes)
-    
-    half = (max_bytes - 3) // 2 
-    truncated_bytes = file_bytes[:half] + \
-        b'\n{{ Omitted middle bytes for brevity }}\n' + \
-        file_bytes[-half:]
-    
-    return bytes_to_str(truncated_bytes)
-
-def bytes_to_file(file: str, data: bytes) -> Optional[str]:
-    """
-    Write bytes directly into a file 
-    """
-    assert isinstance(data, bytes), "Supplied bytes that are not of type bytes."
-    try:
-        with open(file, 'wb') as f:
-            f.write(data)
-            return file
-    except Exception as e:
-        print(f"Writting bytes to file failed with: {e}")
-        return None
-    
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..67cca4c
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,61 @@
+{
+  "nodes": {
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1772433332,
+        "narHash": "sha256-izhTDFKsg6KeVBxJS9EblGeQ8y+O8eCa6RcW874vxEc=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "cf59864ef8aa2e178cccedbe2c178185b0365705",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/flake.nix b/flake.nix
index 00ed455..0983927 100644
--- a/flake.nix
+++ b/flake.nix
@@ -10,45 +10,31 @@
     flake-utils.lib.eachDefaultSystem (system:
       let
         pkgs = nixpkgs.legacyPackages.${system};
-        
-        python-packages = ps: with ps; [
-          colorama
-          pytest
-          numpy
-          flask
-          flask-cors
-        ];
-        
-        python-with-packages = pkgs.python3.withPackages python-packages;
       in
       {
         devShells.default = pkgs.mkShell {
           buildInputs = with pkgs; [
-            python-with-packages
-            python3Packages.pip
-            python3Packages.setuptools
-            python3Packages.wheel
+            rustc
+            cargo
+            rustfmt
+            clippy
           ];
-          
           shellHook = ''
             echo "Dragon Runner development environment"
-            export PYTHONPATH="$PWD:$PYTHONPATH"
           '';
         };
 
-        packages.default = pkgs.python3Packages.buildPythonPackage {
+        packages.default = pkgs.rustPlatform.buildRustPackage {
           pname = "dragon-runner";
-          version = "1.0.0";
+          version = "0.1.0";
           src = ./.;
-          
-          propagatedBuildInputs = python-packages pkgs.python3Packages;
-          
+          cargoLock.lockFile = ./Cargo.lock;
+          doCheck = false;
           meta = with pkgs.lib; {
-            description = "An experimental successor to the 415 tester";
-            license = licenses.unfree;
+            description = "The 415 compiler unit tester";
+            license = licenses.mit;
             maintainers = [ ];
           };
         };
       });
 }
-
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index aa95c87..0000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,26 +0,0 @@
-[build-system]
-requires = ["setuptools>=45", "wheel"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "dragon-runner"
-version = "1.0.0"
-description = "An experimental successor to the 415 tester"
-authors = [{name = "Justin Meimar", email = "meimar@ualberta.ca"}]
-license = {file = "LICENSE"}
-readme = "README.md"
-requires-python = ">=3.6"
-classifiers = ["Tester"]
-dependencies = [
-    "colorama==0.4.6",
-    "pytest==8.3.3",
-    "numpy==2.2.4",
-    "Flask==3.1.0",
-    'flask-cors==6.0.0'
-]
-
-[project.scripts]
-dragon-runner = "dragon_runner.src.main:main"
-
-[tool.setuptools.package-data]
-dragon_runner = ["py.typed"]
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index a635c5c..0000000
--- a/pytest.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-[pytest]
-pythonpath = .
diff --git a/dragon_runner/__init__.py b/scripts/__init__.py
similarity index 100%
rename from dragon_runner/__init__.py
rename to scripts/__init__.py
diff --git a/dragon_runner/scripts/add_empty.py b/scripts/add_empty.py
similarity index 69%
rename from dragon_runner/scripts/add_empty.py
rename to scripts/add_empty.py
index 31eee68..bd7f24b 100644
--- a/dragon_runner/scripts/add_empty.py
+++ b/scripts/add_empty.py
@@ -2,7 +2,8 @@
 ============================== 415 Grading Script ==============================
 Author: Justin Meimar
 Name: add_empty.py
-Desc:
+Desc: Suplement competitive test suite submissions with empty tests to adhere to
+the submission minimum (usually 5 tests).
 ================================================================================
 """
 import sys
@@ -11,7 +12,8 @@
 import string
 from pathlib import Path
 from typing import List
-from dragon_runner.scripts.base import Script
+from base import Script
+from key import Key
 
 
 class AddEmptyScript(Script):
@@ -30,42 +32,22 @@ def get_parser(cls) -> argparse.ArgumentParser:
             prog="add_empty",
             description="Add empty test cases to test packages"
         )
-        parser.add_argument("key_file", type=Path, help="Key file which has a line for each (SID, GH_Username) pair")
+        parser.add_argument("key_file", type=Path, help="Path to CSV key file")
         parser.add_argument("search_path", type=Path, help="Path to search for test files")
         parser.add_argument("empty_content", type=str, help="Empty content to write into files")
         return parser
 
-    @staticmethod
-    def load_key(key_path):
-        config = {}
-        with open(key_path) as key_file:
-            for line in key_file.readlines():
-                sid, gh_username = line.strip().split(' ')
-                print("SID: ", sid, "\tGH Username: ", gh_username)
-                config[sid] = gh_username
-        print("Config Loaded...")
-        return config
-
-    @staticmethod
-    def count_files_with_exclusions(directory: Path, excluded_extensions: list) -> int:
-        count = 0
-        for path in directory.rglob('*'):
-            if path.is_file():
-                if path.suffix.lower() not in excluded_extensions:
-                    count += 1
-        return count
-
     @staticmethod
     def add_empty(key_file: Path, search_path: Path, empty_content: str):
-        config = AddEmptyScript.load_key(key_file)
+        key = Key(key_file)
 
         if not search_path.is_dir():
-            error = "Could not create test directory."
-            print(error)
+            print("Could not find search directory.")
             return 1
 
         all_fine = True
-        for (sid, gh_user) in config.items():
+        for rec in key.iter_students():
+            sid = rec.sid
             all_matches = list(search_path.rglob(sid))
             if len(all_matches) == 0:
                 print(f"Can not find a directory matching: {sid} in {search_path.name}")
@@ -90,14 +72,14 @@ def add_empty(key_file: Path, search_path: Path, empty_content: str):
 
             all_fine = False
             while test_count < 5:
-                suffix= ''.join(random.choices(string.ascii_letters + string.digits, k=8))
+                suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
                 file_path = sid_test_dir / f"TA_empty_{test_count+1}_{suffix}.in"
                 file_path.write_text(empty_content)
                 test_count += 1
                 print(f"{sid} - Writing an empty file: {file_path.name}...")
 
         if all_fine:
-            print("All students submited at least five testcases!")
+            print("All students submitted at least five testcases!")
 
     @classmethod
     def main(cls, args: List[str]) -> int:
@@ -108,4 +90,3 @@ def main(cls, args: List[str]) -> int:
 
 if __name__ == '__main__':
     sys.exit(AddEmptyScript.main(sys.argv[1:]))
-
diff --git a/dragon_runner/scripts/base.py b/scripts/base.py
similarity index 96%
rename from dragon_runner/scripts/base.py
rename to scripts/base.py
index 4f4f8d3..7501372 100644
--- a/dragon_runner/scripts/base.py
+++ b/scripts/base.py
@@ -38,6 +38,5 @@ def usage(cls) -> str:
 
     @classmethod
     @abstractmethod
-    def main(cls, args: List[str]) -> int: 
+    def main(cls, args: List[str]) -> int:
         pass
-
diff --git a/dragon_runner/scripts/build.py b/scripts/build.py
similarity index 85%
rename from dragon_runner/scripts/build.py
rename to scripts/build.py
index aef7875..f02b6fc 100644
--- a/dragon_runner/scripts/build.py
+++ b/scripts/build.py
@@ -14,7 +14,8 @@
 import argparse
 from pathlib import Path
 from typing import List
-from dragon_runner.scripts.base import Script
+from base import Script
+from key import Key
 
 
 class BuildScript(Script):
@@ -37,15 +38,22 @@ def get_parser(cls) -> argparse.ArgumentParser:
         parser.add_argument("log_file", type=Path, help="Path to log file")
         parser.add_argument("dir_prefix", type=str, help="Prefix common to all directories to be built")
         parser.add_argument("n", type=int, default=2, help="n_threads")
+        parser.add_argument("--key", type=Path, default=None, help="Path to CSV key file")
+        parser.add_argument("--assignment", type=str, default=None, help="Assignment column name from key file")
         return parser
 
     @classmethod
-    def build(cls, start_dir, log_path, dir_prefix, n_threads="2"):
+    def build(cls, start_dir, log_path, dir_prefix, n_threads="2", key_path=None, assignment=None):
         root_path = Path(start_dir).absolute()
         log_path = Path(log_path).absolute()
 
         directories = [d for d in root_path.iterdir() if d.is_dir() and (dir_prefix in d.name) and d.name != '.']
 
+        if key_path and assignment:
+            key = Key(key_path)
+            valid_repos = set(key.iter_repos(assignment))
+            directories = [d for d in directories if any(repo in d.name for repo in valid_repos)]
+
         print("Directories to build:")
         for d in directories:
             print(" ", d)
@@ -96,9 +104,9 @@ def main(cls, args: List[str]) -> int:
         parser = cls.get_parser()
         parsed_args = parser.parse_args(args)
         parsed_args.log_file.unlink(missing_ok=True)
-        cls.build(parsed_args.start_dir, parsed_args.log_file, parsed_args.dir_prefix, str(parsed_args.n))
+        cls.build(parsed_args.start_dir, parsed_args.log_file, parsed_args.dir_prefix,
+                  str(parsed_args.n), parsed_args.key, parsed_args.assignment)
         return 0
 
 if __name__ == '__main__':
     sys.exit(BuildScript.main(sys.argv[1:]))
-
diff --git a/dragon_runner/scripts/checkout.py b/scripts/checkout.py
similarity index 78%
rename from dragon_runner/scripts/checkout.py
rename to scripts/checkout.py
index 9ef154e..44f6f10 100644
--- a/dragon_runner/scripts/checkout.py
+++ b/scripts/checkout.py
@@ -1,10 +1,19 @@
+"""
+============================== 415 Grading Script ==============================
+Author: Justin Meimar
+Name: checkout.py
+Desc: Once all the repositories are pulled from gh-classroom, this script will
+checkout each to the latest commit before the deadline.
+================================================================================
+"""
 import sys
 import subprocess
 import argparse
 from datetime import datetime
 from pathlib import Path
 from typing import List
-from dragon_runner.scripts.base import Script
+from base import Script
+from key import Key
 
 
 class CheckoutScript(Script):
@@ -28,6 +37,8 @@ def get_parser(cls) -> argparse.ArgumentParser:
                           help='Directory of repositories to checkout')
         parser.add_argument('checkout_time',
                           help='Checkout time in format: "YYYY-MM-DD HH:MM:SS"')
+        parser.add_argument("--key", type=Path, default=None, help="Path to CSV key file")
+        parser.add_argument("--assignment", type=str, default=None, help="Assignment column name from key file")
         return parser
 
     @classmethod
@@ -65,11 +76,19 @@ def checkout_commit(cls, repo_path, commit_hash):
         return result.returncode == 0
 
     @classmethod
-    def process_repositories(cls, submissions_dir: Path, checkout_time: str):
+    def process_repositories(cls, submissions_dir: Path, checkout_time: str, key_path=None, assignment=None):
+        valid_repos = None
+        if key_path and assignment:
+            key = Key(key_path)
+            valid_repos = set(key.iter_repos(assignment))
+
         for submission_dir in sorted(submissions_dir.iterdir()):
             if not submission_dir.is_dir():
                 continue
 
+            if valid_repos is not None and not any(repo in submission_dir.name for repo in valid_repos):
+                continue
+
             git_dir = submission_dir / '.git'
             if not git_dir.exists():
                 print(f"\nSkipping {submission_dir.name} - not a git repository")
@@ -114,9 +133,9 @@ def main(cls, args: List[str]) -> int:
         print(f"Using submission dir: {sub}")
         print(f"Checking out to latest commit before: {parsed_args.checkout_time}")
 
-        cls.process_repositories(sub, parsed_args.checkout_time)
+        cls.process_repositories(sub, parsed_args.checkout_time,
+                                  parsed_args.key, parsed_args.assignment)
         return 0
 
 if __name__ == "__main__":
     sys.exit(CheckoutScript.main(sys.argv[1:]))
-
diff --git a/dragon_runner/scripts/clean_build.py b/scripts/clean_build.py
similarity index 69%
rename from dragon_runner/scripts/clean_build.py
rename to scripts/clean_build.py
index a60c07f..e78364c 100644
--- a/dragon_runner/scripts/clean_build.py
+++ b/scripts/clean_build.py
@@ -3,7 +3,8 @@
 from pathlib import Path
 import argparse
 from typing import List
-from dragon_runner.scripts.base import Script
+from base import Script
+from key import Key
 
 
 class CleanBuildScript(Script):
@@ -23,14 +24,24 @@ def get_parser(cls) -> argparse.ArgumentParser:
             description="Remove build directories from all submissions"
         )
         parser.add_argument('submission_dir', type=Path, help='Directory of submissions to clean')
+        parser.add_argument("--key", type=Path, default=None, help="Path to CSV key file")
+        parser.add_argument("--assignment", type=str, default=None, help="Assignment column name from key file")
         return parser
 
     @staticmethod
-    def remove_build_dirs(submissions_dir: Path):
+    def remove_build_dirs(submissions_dir: Path, key_path=None, assignment=None):
+        valid_repos = None
+        if key_path and assignment:
+            key = Key(key_path)
+            valid_repos = set(key.iter_repos(assignment))
+
         for submission_dir in sorted(submissions_dir.iterdir()):
             if not submission_dir.is_dir():
                 continue
 
+            if valid_repos is not None and not any(repo in submission_dir.name for repo in valid_repos):
+                continue
+
             build_dir = submission_dir / 'build'
             if not build_dir.exists():
                 continue
@@ -53,9 +64,8 @@ def main(cls, args: List[str]) -> int:
             print("Submission directory does not exist...")
             return 1
 
-        cls.remove_build_dirs(sub)
+        cls.remove_build_dirs(sub, parsed_args.key, parsed_args.assignment)
         return 0
 
 if __name__ == "__main__":
     sys.exit(CleanBuildScript.main(sys.argv[1:]))
-
diff --git a/dragon_runner/scripts/gather.py b/scripts/gather.py
similarity index 55%
rename from dragon_runner/scripts/gather.py
rename to scripts/gather.py
index 3db1eb3..0e0ad46 100644
--- a/dragon_runner/scripts/gather.py
+++ b/scripts/gather.py
@@ -11,7 +11,8 @@
 import argparse
 from pathlib import Path
 from typing import List
-from dragon_runner.scripts.base import Script
+from base import Script
+from key import Key
 
 
 class GatherScript(Script):
@@ -30,62 +31,51 @@ def get_parser(cls) -> argparse.ArgumentParser:
             prog="gather",
             description="Gather all the testfiles in student directories"
         )
-        parser.add_argument("key_file", type=Path, help="Key file which has a line for each (SID, GH_Username) pair")
+        parser.add_argument("key_file", type=Path, help="Path to CSV key file")
         parser.add_argument("search_path", type=Path, help="Path to search for test files")
-        parser.add_argument("project_name", type=Path, help="Path to search for test files")
+        parser.add_argument("--assignment", type=str, required=True,
+            help="Assignment column name from key file (e.g. A1)")
         return parser
 
-    @staticmethod
-    def load_key(key_path: Path):
-        config = {}
-        with open(key_path) as key_file:
-            for line in key_file.readlines():
-                sid, gh_username = line.strip().split(' ')
-                print("SID: ", sid, "\tGH Username: ", gh_username)
-                config[sid] = gh_username
-        return config
-
     @staticmethod
     def gather(key_file: Path,
-           search_path: str,
-           project_name: str,
+           search_path: Path,
+           assignment: str,
            output_dir: str = "submitted-testfiles"):
-        is_rt = True
-        config = GatherScript.load_key(key_file)
+
+        key = Key(key_file)
         search_dir = Path(search_path)
-        project_name = str(project_name).strip()
 
         if not search_dir.is_dir():
-            error = "Could not create test directory."
-            print(error)
+            print("Could not find search directory.")
             return 1
 
-        directories = [d for d in search_dir.iterdir() if d.is_dir() and str(project_name) in d.name]
-        for (sid, gh_user) in config.items():
-            print("Finding submission for: ", gh_user)
+        directories = [d for d in search_dir.iterdir() if d.is_dir()]
+        for rec in key.iter_students():
+            repo = rec.repos.get(assignment)
+            if not repo:
+                print(f"No repo for {rec.sid} in assignment {assignment}, skipping")
+                continue
+
+            print(f"Finding submission for: {rec.ccid} (repo: {repo})")
             for d in directories:
-                if gh_user in str(d):
-                    if is_rt:
-                        suffix = '-'.join(gh_user.split('-')[1:])
-                        expected_test_dir = d / "tests" / "testfiles" / suffix
-                    else:
-                        expected_test_dir = d / "tests" / "testfiles" / sid
+                if repo in d.name:
+                    expected_test_dir = d / "tests" / "testfiles" / rec.sid
 
                     if expected_test_dir.is_dir():
-                        print(f"-- Found properly formatted testfiles for {sid}")
-                        shutil.copytree(expected_test_dir, (Path(output_dir) / sid), dirs_exist_ok=True)
+                        print(f"-- Found properly formatted testfiles for {rec.sid}")
+                        shutil.copytree(expected_test_dir, (Path(output_dir) / rec.sid), dirs_exist_ok=True)
                         break
                     else:
-                        print(f"-- Could NOT find testfiles for {sid}")
+                        print(f"-- Could NOT find testfiles for {rec.sid}")
                         exit(1)
 
     @classmethod
     def main(cls, args: List[str]) -> int:
         parser = cls.get_parser()
         parsed_args = parser.parse_args(args)
-        cls.gather(parsed_args.key_file, parsed_args.search_path, parsed_args.project_name)
+        cls.gather(parsed_args.key_file, parsed_args.search_path, parsed_args.assignment)
         return 0
 
 if __name__ == '__main__':
     sys.exit(GatherScript.main(sys.argv[1:]))
-
diff --git a/dragon_runner/scripts/gen_config.py b/scripts/gen_config.py
similarity index 53%
rename from dragon_runner/scripts/gen_config.py
rename to scripts/gen_config.py
index 392628e..4607b38 100644
--- a/dragon_runner/scripts/gen_config.py
+++ b/scripts/gen_config.py
@@ -10,39 +10,8 @@
 import argparse
 from typing import Optional, List
 from pathlib import Path
-from typing import Iterator, Tuple
-from dragon_runner.scripts.base import Script
-
-
-class Key: 
-    def __init__(self, key_path: Path):
-        self.key_path = key_path
-        self.sid_repo_suffix_map = {}
-
-        with open(key_path) as key_file: 
-            for line in key_file.readlines():
-                sids, repo_suffix = line.strip().split(' ')
-                sid_list = sids.strip().split(',') 
-                for sid in sid_list:
-                    self.sid_repo_suffix_map[sid] = repo_suffix
-    
-    def __str__(self):
-        s = ""
-        for k, v in self.sid_repo_suffix_map.items():
-            s += (f"{k}\t{v}")
-        return s
-    
-    def get_repo_for_sid(self, sid):
-        return self.sid_repo_suffix_map[sid]
-    
-    def iter_sids(self) -> Iterator[str]:
-        return iter(self.sid_repo_suffix_map.keys())
-
-    def iter_repos(self) -> Iterator[str]:
-        return iter(set(self.sid_repo_suffix_map.values()))
-
-    def iter_both(self) -> Iterator[Tuple[str, str]]:
-        return iter(self.sid_repo_suffix_map.items())
+from base import Script
+from key import Key
 
 
 class GenConfigScript(Script):
@@ -62,20 +31,23 @@ def get_parser(cls) -> argparse.ArgumentParser:
             description="Generate dragon-runner configuration from student submissions"
         )
         parser.add_argument("key_path", type=Path,
-            help="Path to key file containing each team/ccid on a line.")
+            help="Path to CSV key file")
         parser.add_argument("submissions_path", type=Path,
             help="Path to project submissions cloned from github classroom.")
         parser.add_argument("binary", type=str,
-            help="Name of binary to expect in prohjects bin/")
+            help="Name of binary to expect in projects bin/")
+        parser.add_argument("--assignment", type=str, required=True,
+            help="Assignment column name from key file (e.g. A1)")
         parser.add_argument("--runtime", type=str, default=None,
-            help="Name of runtime library to expect in prohjects bin/")
+            help="Name of runtime library to expect in projects bin/")
         return parser
 
     @staticmethod
-    def gen_config(key_path:Path,
-               submission_dir:Path,
-               binary:str,
-               runtime:Optional[str]=None):
+    def gen_config(key_path: Path,
+               submission_dir: Path,
+               binary: str,
+               assignment: str,
+               runtime: Optional[str] = None):
 
         executables_config = {}
         runtimes_config = {}
@@ -85,35 +57,38 @@ def gen_config(key_path:Path,
         assert submission_dir.is_dir(), "must supply directory to submissions."
 
         key = Key(key_path)
-        for (sids, repo_suffix) in key.iter_both():
-            match_dir = [d for d in submission_dir.iterdir() if d.is_dir() and str(repo_suffix) in d.name]
-            if match_dir == []:
-                print(f"Couldn't find: repo with suffix {repo_suffix}")
+        for repo in key.iter_repos(assignment):
+            match_dir = [d for d in submission_dir.iterdir() if d.is_dir() and repo in d.name]
+            if not match_dir:
+                print(f"Couldn't find: repo with name {repo}")
                 exit(1)
 
             match_dir = Path(match_dir[0])
-            expected_package = match_dir / "tests/testfiles" / sids
+            members = key.students_for_repo(assignment, repo)
+            sid_label = ",".join(rec.sid for rec in members)
+
+            expected_package = match_dir / "tests/testfiles" / sid_label
             expected_binary = match_dir / f"bin/{binary}"
             expected_runtime = match_dir / f"bin/{runtime}"
 
             if not expected_package.is_file:
                 print(f"Can not find expected package: {expected_package}")
-                break;
+                break
 
             if not expected_binary.is_file:
                 print(f"Can not find expected binary: {expected_binary}")
-                break;
+                break
 
             if runtime is not None and not expected_runtime.is_file:
-                print(f"Can not find expected binary: {expected_binary}")
-                break;
+                print(f"Can not find expected runtime: {expected_runtime}")
+                break
 
-            executables_config.update({f"{sids}":f"{Path.absolute(expected_binary)}"})
-            runtimes_config.update({f"{sids}":f"{Path.absolute(expected_runtime)}"})
+            executables_config[sid_label] = str(Path.absolute(expected_binary))
+            runtimes_config[sid_label] = str(Path.absolute(expected_runtime))
 
-        config.update({"testedExecutablePaths": executables_config})
+        config["testedExecutablePaths"] = executables_config
         if runtime is not None:
-            config.update({"runtimes": runtimes_config})
+            config["runtimes"] = runtimes_config
 
         print(json.dumps(config, indent=4))
         with open('config.json', 'w') as f:
@@ -123,9 +98,9 @@ def gen_config(key_path:Path,
     def main(cls, args: List[str]) -> int:
         parser = cls.get_parser()
         parsed_args = parser.parse_args(args)
-        cls.gen_config(parsed_args.key_path, parsed_args.submissions_path, parsed_args.binary, parsed_args.runtime)
+        cls.gen_config(parsed_args.key_path, parsed_args.submissions_path,
+                       parsed_args.binary, parsed_args.assignment, parsed_args.runtime)
         return 0
 
 if __name__ == '__main__':
-    sys.exit(GenConfigScript.main(sys.argv[1:])) 
- 
+    sys.exit(GenConfigScript.main(sys.argv[1:]))
diff --git a/dragon_runner/scripts/grade.py b/scripts/grade.py
similarity index 99%
rename from dragon_runner/scripts/grade.py
rename to scripts/grade.py
index e859880..ac5eca8 100644
--- a/dragon_runner/scripts/grade.py
+++ b/scripts/grade.py
@@ -7,7 +7,7 @@
 from pathlib import Path
 from fractions import Fraction
 from typing import List
-from dragon_runner.scripts.base import Script
+from base import Script
 
 
 class GradeScript(Script):
diff --git a/dragon_runner/scripts/grade_perf.py b/scripts/grade_perf.py
similarity index 98%
rename from dragon_runner/scripts/grade_perf.py
rename to scripts/grade_perf.py
index 615a451..34adb9e 100644
--- a/dragon_runner/scripts/grade_perf.py
+++ b/scripts/grade_perf.py
@@ -20,7 +20,7 @@
 import numpy as np
 from pathlib import Path
 from typing import List
-from dragon_runner.scripts.base import Script
+from base import Script
 
 
 class GradePerfScript(Script):
@@ -89,4 +89,3 @@ def main(cls, args: List[str]) -> int:
 
 if __name__ == "__main__":
     sys.exit(GradePerfScript.main(sys.argv[1:]))
-    
diff --git a/scripts/key.py b/scripts/key.py
new file mode 100644
index 0000000..81ee3a1
--- /dev/null
+++ b/scripts/key.py
@@ -0,0 +1,72 @@
+import csv
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, Iterator, List, Optional
+
+
+@dataclass
+class StudentRecord:
+    sid: str
+    ccid: str
+    github_id: str
+    repos: Dict[str, str] = field(default_factory=dict)  # assignment -> repo name
+
+
+class Key:
+    def __init__(self, key_path: Path):
+        self.key_path = key_path
+        self._records: List[StudentRecord] = []
+        self._by_sid: Dict[str, StudentRecord] = {}
+        self._by_ccid: Dict[str, StudentRecord] = {}
+        self._by_github: Dict[str, StudentRecord] = {}
+        self.assignments: List[str] = []
+
+        with open(key_path, newline='') as f:
+            reader = csv.DictReader(f)
+            headers = reader.fieldnames or []
+            if len(headers) < 3:
+                raise ValueError(f"Key file must have at least SID,CCID,GitHubID columns, got: {headers}")
+
+            self.assignments = headers[3:]
+
+            for row in reader:
+                vals = list(row.values())
+                sid, ccid, github_id = vals[0].strip(), vals[1].strip(), vals[2].strip()
+                repos = {}
+                for i, assignment in enumerate(self.assignments):
+                    val = vals[3 + i].strip() if vals[3 + i] else ""
+                    if val:
+                        repos[assignment] = val
+
+                rec = StudentRecord(sid=sid, ccid=ccid, github_id=github_id, repos=repos)
+                self._records.append(rec)
+                self._by_sid[sid] = rec
+                self._by_ccid[ccid] = rec
+                self._by_github[github_id] = rec
+
+    def get(self, identifier: str) -> Optional[StudentRecord]:
+        """Lookup by any of SID, CCID, or GitHubID."""
+        return self._by_sid.get(identifier) or self._by_ccid.get(identifier) or self._by_github.get(identifier)
+
+    def iter_students(self) -> Iterator[StudentRecord]:
+        return iter(self._records)
+
+    def iter_repos(self, assignment: str) -> Iterator[str]:
+        """Unique repo names for an assignment."""
+        seen = set()
+        for rec in self._records:
+            repo = rec.repos.get(assignment)
+            if repo and repo not in seen:
+                seen.add(repo)
+                yield repo
+
+    def students_for_repo(self, assignment: str, repo: str) -> List[StudentRecord]:
+        """Team members sharing a repo for an assignment."""
+        return [rec for rec in self._records if rec.repos.get(assignment) == repo]
+
+    def get_repo(self, identifier: str, assignment: str) -> Optional[str]:
+        """Repo for a student + assignment."""
+        rec = self.get(identifier)
+        if rec is None:
+            return None
+        return rec.repos.get(assignment)
diff --git a/dragon_runner/scripts/loader.py b/scripts/loader.py
similarity index 76%
rename from dragon_runner/scripts/loader.py
rename to scripts/loader.py
index a77e05b..ca46879 100644
--- a/dragon_runner/scripts/loader.py
+++ b/scripts/loader.py
@@ -3,7 +3,7 @@
 import importlib
 from typing import List, Dict, Type, Optional
 from pathlib import Path
-from dragon_runner.scripts.base import Script
+from base import Script
 
 
 class Loader:
@@ -12,16 +12,19 @@ class Loader:
     Each script is executed as a subprocess using Python's -m flag to ensure
     consistent behavior whether called directly or through dragon-runner.
     """
+    # Directory containing the script modules
+    SCRIPTS_DIR = Path(__file__).parent
+
     def __init__(self):
         self.script_modules = {
-            "add_empty":     "dragon_runner.scripts.add_empty",
-            "build":         "dragon_runner.scripts.build",
-            "clean-build":   "dragon_runner.scripts.clean_build",
-            "checkout":      "dragon_runner.scripts.checkout",
-            "gather":        "dragon_runner.scripts.gather",
-            "gen-config":    "dragon_runner.scripts.gen_config",
-            "grade":         "dragon_runner.scripts.grade",
-            "grade-perf":    "dragon_runner.scripts.grade_perf",
+            "add_empty":     "add_empty",
+            "build":         "build",
+            "clean-build":   "clean_build",
+            "checkout":      "checkout",
+            "gather":        "gather",
+            "gen-config":    "gen_config",
+            "grade":         "grade",
+            "grade-perf":    "grade_perf",
         }
 
     def _load_script_class(self, module_name: str) -> Optional[Type[Script]]:
@@ -31,7 +34,6 @@ def _load_script_class(self, module_name: str) -> Optional[Type[Script]]:
         """
         try:
             module = importlib.import_module(module_name)
-            # Look for a class that inherits from Script
             for attr_name in dir(module):
                 attr = getattr(module, attr_name)
                 if (isinstance(attr, type) and
@@ -52,7 +54,8 @@ def __call__(self, args: List[str]):
             return 1
 
         module = self.script_modules[args[0]]
-        cmd = [sys.executable, "-m", module] + args[1:]
+        script_path = self.SCRIPTS_DIR / f"{module}.py"
+        cmd = [sys.executable, str(script_path)] + args[1:]
         try:
             result = subprocess.run(cmd, check=False)
             return result.returncode
@@ -72,4 +75,3 @@ def __repr__(self):
                 s += f" * {script_name}: {(len(max_script) - len(script_name))* ' '} "
                 s += f"{script_class.description()}\n"
         return s
-
diff --git a/src/cli.rs b/src/cli.rs
new file mode 100644
index 0000000..b8fcf19
--- /dev/null
+++ b/src/cli.rs
@@ -0,0 +1,185 @@
+use std::fmt;
+use std::path::PathBuf;
+
+use clap::{Args, Parser, Subcommand};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum Mode {
+    #[default]
+    Regular,
+    Tournament,
+    Perf,
+    Memcheck,
+}
+
+impl fmt::Display for Mode {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Mode::Regular => write!(f, "regular"),
+            Mode::Tournament => write!(f, "tournament"),
+            Mode::Perf => write!(f, "perf"),
+            Mode::Memcheck => write!(f, "memcheck"),
+        }
+    }
+}
+
+/// Shared flags available in all modes (also used as the runtime args type).
+#[derive(Args, Debug, Clone, Default)]
+pub struct RunnerArgs {
+    /// Set by the subcommand, not by clap.
+    #[arg(skip)]
+    pub mode: Mode,
+
+    /// Path to the JSON configuration file
+    pub config_file: PathBuf,
+
+    /// Path to write failure log (tournament mode)
+    #[arg(long = "fail-log")]
+    pub failure_log: Option<PathBuf>,
+
+    /// Executable ID to use as the solution (tournament mode)
+    #[arg(long = "solution-exe")]
+    pub solution_exe: Option<String>,
+
+    /// Timeout in seconds for each step
+    #[arg(long, default_value_t = 2.0)]
+    pub timeout: f64,
+
+    /// Verify CCID in packages
+    #[arg(long)]
+    pub verify: bool,
+
+    /// Debug a specific package path
+    #[arg(long = "debug-package")]
+    pub debug_package: Option<String>,
+
+    /// Filter packages by glob pattern (case insensitive)
+    #[arg(short = 'p', long = "package")]
+    pub package_filter: Option<String>,
+
+    /// Show timing information
+    #[arg(short = 't', long = "time")]
+    pub time: bool,
+
+    /// Increase verbosity (can be repeated: -v, -vv, -vvv)
+    #[arg(short = 'v', long = "verbosity", action = clap::ArgAction::Count)]
+    pub verbosity: u8,
+
+    /// Show test case contents on failure
+    #[arg(short = 's', long = "show-testcase")]
+    pub show_testcase: bool,
+
+    /// Output file path
+    #[arg(short = 'o', long = "output")]
+    pub output: Option<PathBuf>,
+
+    /// Stop on first failure
+    #[arg(short = 'f', long = "fast-fail")]
+    pub fast_fail: bool,
+
+    /// Print full file paths for test results instead of just the filename
+    #[arg(long = "full-path")]
+    pub full_path: bool,
+}
+
+/// CMPUT 415 testing utility
+#[derive(Parser, Debug)]
+#[command(name = "dragon-runner", about = "CMPUT 415 testing utility")]
+pub struct Cli {
+    #[command(subcommand)]
+    pub command: Commands,
+}
+
+#[derive(Subcommand, Debug)]
+pub enum Commands {
+    /// Run in regular mode (default)
+    Regular {
+        #[command(flatten)]
+        flags: RunnerArgs,
+    },
+    /// Run in tournament/grading mode
+    Tournament {
+        #[command(flatten)]
+        flags: RunnerArgs,
+    },
+    /// Run performance tests
+    Perf {
+        #[command(flatten)]
+        flags: RunnerArgs,
+    },
+    /// Run with memory checking (valgrind)
+    Memcheck {
+        #[command(flatten)]
+        flags: RunnerArgs,
+    },
+    /// Run a grading script
+    Script {
+        /// Script name and arguments
+        #[arg(trailing_var_arg = true, allow_hyphen_values = true)]
+        args: Vec<String>,
+    },
+    /// Start an HTTP server exposing the test runner API
+    Serve {
+        /// Path to the JSON configuration file
+        config_file: PathBuf,
+        /// Address to bind the server to
+        #[arg(long, default_value = "127.0.0.1:3000")]
+        bind: String,
+        /// Timeout in seconds for each step
+        #[arg(long, default_value_t = 2.0)]
+        timeout: f64,
+        /// Maximum number of concurrent test executions
+        #[arg(long, default_value_t = 4)]
+        max_concurrent: usize,
+    },
+}
+
+/// Result of parsing CLI arguments — either a runner mode, a script invocation, or a server.
+pub enum CliAction {
+    Run(RunnerArgs),
+    Script(Vec<String>),
+    Serve {
+        config_file: PathBuf,
+        bind: String,
+        timeout: f64,
+        max_concurrent: usize,
+    },
+}
+
+/// Parse CLI arguments into a CliAction.
+///
+/// Supports: `dragon-runner <mode> config.json [flags...]`
+///           `dragon-runner script <name> [args...]`
+/// If no recognized subcommand is given, defaults to "regular".
+pub fn parse_cli_args() -> CliAction {
+    let raw_args: Vec<String> = std::env::args().collect();
+
+    // Try parsing as-is first. If that fails, assume the user omitted the
+    // subcommand and default to "regular".
+    let cli = Cli::try_parse_from(&raw_args).unwrap_or_else(|_| {
+        let mut patched = vec![raw_args[0].clone(), "regular".to_string()];
+        patched.extend_from_slice(&raw_args[1..]);
+        Cli::parse_from(patched)
+    });
+
+    match cli.command {
+        Commands::Script { args } => CliAction::Script(args),
+        Commands::Serve { config_file, bind, timeout, max_concurrent } => {
+            CliAction::Serve { config_file, bind, timeout, max_concurrent }
+        }
+        commands => {
+            let (mode, mut args) = match commands {
+                Commands::Regular { flags } => (Mode::Regular, flags),
+                Commands::Tournament { flags } => (Mode::Tournament, flags),
+                Commands::Perf { flags } => (Mode::Perf, flags),
+                Commands::Memcheck { flags } => (Mode::Memcheck, flags),
+                Commands::Script { .. } | Commands::Serve { .. } => unreachable!(),
+            };
+            args.mode = mode;
+
+            crate::log::set_debug_level(args.verbosity as u32);
+
+            CliAction::Run(args)
+        }
+    }
+}
diff --git a/src/config.rs b/src/config.rs
new file mode 100644
index 0000000..827cdf9
--- /dev/null
+++ b/src/config.rs
@@ -0,0 +1,444 @@
+use std::collections::HashMap;
+use std::fmt;
+use std::fs;
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+
+use serde::Deserialize;
+
+use crate::{debug, trace, trace2};
+use crate::cli::RunnerArgs;
+use crate::error::{DragonError, Validate};
+use crate::testfile::TestFile;
+use crate::toolchain::{Step, ToolChain};
+use crate::util::resolve_relative;
+
+/// Raw JSON shape of a config file, deserialized directly by serde.
+#[derive(Deserialize, Default)]
+#[serde(rename_all = "camelCase")]
+struct RawConfig {
+    #[serde(default)]
+    test_dir: String,
+    #[serde(default)]
+    tested_executable_paths: HashMap<String, String>,
+    #[serde(default)]
+    runtimes: HashMap<String, String>,
+    #[serde(default)]
+    toolchains: HashMap<String, Vec<Step>>,
+}
+
+// ---------------------------------------------------------------------------
+// SubPackage
+// ---------------------------------------------------------------------------
+
+/// Represents a set of tests in a directory.
+#[derive(Debug, Clone)]
+pub struct SubPackage {
+    pub path: PathBuf,
+    pub name: String,
+    pub depth: usize,
+    pub tests: Vec<Arc<TestFile>>,
+}
+
+impl SubPackage {
+    pub fn new(path: &Path, depth: usize) -> Self {
+        let name = path
+            .file_name()
+            .unwrap_or_default()
+            .to_string_lossy()
+            .into_owned();
+
+        let tests = if path.is_dir() {
+            Self::gather_tests(path)
+        } else {
+            vec![Arc::new(TestFile::new(path))]
+        };
+
+        Self { path: path.into(), name, depth, tests }
+    }
+
+    fn gather_tests(dir: &Path) -> Vec<Arc<TestFile>> {
+        let mut tests: Vec<Arc<TestFile>> = fs::read_dir(dir)
+            .into_iter()
+            .flatten()
+            .filter_map(|e| e.ok())
+            .filter(|e| TestFile::is_test(&e.path()))
+            .map(|e| Arc::new(TestFile::new(&e.path())))
+            .collect();
+        tests.sort_by(|a, b| a.file.cmp(&b.file));
+        tests
+    }
+}
+
+impl Validate for SubPackage {
+    fn validate(&self) -> Vec<DragonError> {
+        self.tests.iter().flat_map(|t| t.validate()).collect()
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Package
+// ---------------------------------------------------------------------------
+
+/// Represents a single test package.
+#[derive(Debug, Clone)]
+pub struct Package {
+    pub path: PathBuf,
+    pub name: String,
+    pub n_tests: usize,
+    pub subpackages: Vec<SubPackage>,
+}
+
+impl Package {
+    pub fn new(path: &Path) -> Self {
+        let name = path
+            .file_name()
+            .unwrap_or_default()
+            .to_string_lossy()
+            .into_owned();
+
+        let mut pkg = Self {
+            path: path.into(),
+            name,
+            n_tests: 0,
+            subpackages: Vec::new(),
+        };
+
+        if path.is_dir() {
+            pkg.gather_subpackages();
+        } else {
+            pkg.push_subpackage(SubPackage::new(path, 0));
+        }
+
+        pkg
+    }
+
+    fn push_subpackage(&mut self, spkg: SubPackage) {
+        self.n_tests += spkg.tests.len();
+        self.subpackages.push(spkg);
+    }
+
+    fn gather_subpackages(&mut self) {
+        let top_level = SubPackage::new(&self.path, 0);
+        if !top_level.tests.is_empty() {
+            self.push_subpackage(top_level);
+        }
+        let path = self.path.clone();
+        for spkg in Self::collect_subpackages_recursive(&path, 1) {
+            self.push_subpackage(spkg);
+        }
+    }
+
+    fn collect_subpackages_recursive(dir: &Path, depth: usize) -> Vec<SubPackage> {
+        fs::read_dir(dir)
+            .into_iter()
+            .flatten()
+            .filter_map(|e| e.ok())
+            .filter(|e| e.path().is_dir())
+            .flat_map(|e| {
+                let entry_path = e.path();
+                let spkg = SubPackage::new(&entry_path, depth);
+                let children = Self::collect_subpackages_recursive(&entry_path, depth + 1);
+                let head = if spkg.tests.is_empty() { None } else { Some(spkg) };
+                head.into_iter().chain(children)
+            })
+            .collect()
+    }
+}
+
+impl Validate for Package {
+    fn validate(&self) -> Vec<DragonError> {
+        self.subpackages.iter().flat_map(|s| s.validate()).collect()
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Executable
+// ---------------------------------------------------------------------------
+
+/// Represents a tested executable with an optional runtime.
+#[derive(Debug, Clone)]
+pub struct Executable {
+    pub id: String,
+    pub exe_path: PathBuf,
+    pub runtime: PathBuf,
+}
+
+impl Executable {
+    pub fn new(id: &str, exe_path: PathBuf, runtime: PathBuf) -> Self {
+        Self { id: id.into(), exe_path, runtime }
+    }
+
+    /// Build environment variables needed for runtime library injection.
+    /// Returns an empty map if no runtime is configured.
+    pub fn runtime_env(&self) -> HashMap<String, String> {
+        let mut env = HashMap::new();
+        if self.runtime.as_os_str().is_empty() {
+            return env;
+        }
+        let rt_dir = self.runtime.parent().unwrap_or(Path::new("")).display().to_string();
+        let rt_stem = self.runtime.file_stem().unwrap_or_default().to_string_lossy();
+        let rt_lib = rt_stem.strip_prefix("lib").unwrap_or(&rt_stem).to_string();
+        let rt_str = self.runtime.display().to_string();
+
+        if cfg!(target_os = "macos") {
+            env.insert("DYLD_LIBRARY_PATH".into(), rt_dir.clone());
+            env.insert("DYLD_INSERT_LIBRARIES".into(), rt_str);
+        } else {
+            env.insert("LD_LIBRARY_PATH".into(), rt_dir.clone());
+            env.insert("LD_PRELOAD".into(), rt_str);
+        }
+        env.insert("RT_PATH".into(), rt_dir);
+        env.insert("RT_LIB".into(), rt_lib);
+        env
+    }
+}
+
+impl Validate for Executable {
+    fn validate(&self) -> Vec<DragonError> {
+        let mut errors = Vec::new();
+        if !self.exe_path.exists() {
+            errors.push(DragonError::MissingFile {
+                path: self.exe_path.clone(),
+                context: format!("Executable '{}'", self.id),
+            });
+        }
+        if !self.runtime.as_os_str().is_empty() && !self.runtime.exists() {
+            errors.push(DragonError::MissingFile {
+                path: self.runtime.clone(),
+                context: format!("Executable '{}' runtime", self.id),
+            });
+        }
+        errors
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Config
+// ---------------------------------------------------------------------------
+
+/// In-memory representation of a JSON configuration file.
+#[derive(Debug, Clone)]
+pub struct Config {
+    pub name: String,
+    pub config_path: PathBuf,
+    pub test_dir: PathBuf,
+    pub executables: Vec<Executable>,
+    pub toolchains: Vec<ToolChain>,
+    pub packages: Vec<Package>,
+    pub package_filter: String,
+}
+
+impl Config {
+    fn new(
+        config_path: &Path,
+        raw: RawConfig,
+        debug_package: Option<&str>,
+        package_filter: &str,
+    ) -> Self {
+        let abs_config = fs::canonicalize(config_path)
+            .unwrap_or_else(|_| config_path.to_path_buf());
+
+        let name = config_path
+            .file_stem()
+            .unwrap_or_default()
+            .to_string_lossy()
+            .into_owned();
+
+        let test_dir = resolve_relative(Path::new(&raw.test_dir), &abs_config);
+
+        let executables = raw.tested_executable_paths
+            .iter()
+            .map(|(id, path_str)| {
+                let exe_path = resolve_relative(Path::new(path_str), &abs_config);
+                let runtime = raw.runtimes.get(id)
+                    .map(|rt_path| {
+                        let resolved = resolve_relative(Path::new(rt_path), &abs_config);
+                        fs::canonicalize(&resolved).unwrap_or(resolved)
+                    })
+                    .unwrap_or_default();
+                Executable::new(id, exe_path, runtime)
+            })
+            .collect();
+
+        let toolchains = raw.toolchains
+            .into_iter()
+            .map(|(name, steps)| ToolChain::new(&name, steps))
+            .collect();
+
+        let packages = Self::gather_packages(&test_dir, debug_package);
+
+        Self {
+            name,
+            config_path: abs_config,
+            test_dir,
+            executables,
+            toolchains,
+            packages,
+            package_filter: package_filter.into(),
+        }
+    }
+
+    fn gather_packages(test_dir: &Path, debug_package: Option<&str>) -> Vec<Package> {
+        if let Some(pkg) = debug_package.filter(|p| !p.is_empty()) {
+            return vec![Package::new(Path::new(pkg))];
+        }
+        fs::read_dir(test_dir)
+            .into_iter()
+            .flatten()
+            .filter_map(|e| e.ok())
+            .filter(|e| e.path().is_dir())
+            .map(|e| Package::new(&e.path()))
+            .collect()
+    }
+
+    fn collect_errors(&self) -> Vec<DragonError> {
+        let mut errors = Vec::new();
+        if !self.test_dir.exists() {
+            errors.push(DragonError::MissingTestDir {
+                path: self.test_dir.clone(),
+            });
+        }
+        errors.extend(
+            self.executables.iter()
+                .flat_map(|e| e.validate())
+                .chain(self.toolchains.iter().flat_map(|t| t.validate()))
+                .chain(self.packages.iter().flat_map(|p| p.validate()))
+        );
+        errors
+    }
+
+    pub fn log_test_info(&self) {
+        debug!(0, "\nPackages:");
+        for pkg in &self.packages {
+            debug!(2, "-- ({})", pkg.name);
+            for spkg in &pkg.subpackages {
+                trace!(4, "-- ({})", spkg.name);
+                for test in &spkg.tests {
+                    trace2!(6, "-- ({})", test.file);
+                }
+            }
+        }
+    }
+}
+
+impl fmt::Display for Config {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Debug::fmt(self, f)
+    }
+}
+
+/// Load and parse a JSON configuration file.
+pub fn load_config(config_path: &Path, args: Option<&RunnerArgs>) -> Result<Config, Vec<DragonError>> {
+    let path = config_path.to_path_buf();
+
+    let content = fs::read_to_string(config_path)
+        .map_err(|_| vec![DragonError::ConfigRead { path: path.clone() }])?;
+
+    let raw: RawConfig = serde_json::from_str(&content)
+        .map_err(|e| vec![DragonError::ConfigParse { path: path.clone(), reason: e.to_string() }])?;
+
+    let debug_package = args
+        .and_then(|a| a.debug_package.as_deref());
+    let package_filter = args.and_then(|a| a.package_filter.as_deref()).unwrap_or("");
+
+    let config = Config::new(config_path, raw, debug_package, package_filter);
+    let errors = config.collect_errors();
+    if errors.is_empty() {
+        Ok(config)
+    } else {
+        Err(errors)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn configs_dir() -> PathBuf {
+        PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests").join("configs")
+    }
+
+    fn config_path(name: &str) -> PathBuf {
+        configs_dir().join(name)
+    }
+
+    #[test]
+    fn test_valid_config() {
+        let path = config_path("gccPassConfig.json");
+        let config = load_config(&path, None).expect("config should load");
+
+        assert!(
+            config.test_dir.exists(),
+            "test_dir should exist: {}",
+            config.test_dir.display()
+        );
+        assert!(!config.packages.is_empty(), "should have packages");
+
+        for pkg in &config.packages {
+            assert!(!pkg.subpackages.is_empty(), "package {} should have subpackages", pkg.name);
+            for spkg in &pkg.subpackages {
+                assert!(!spkg.tests.is_empty(), "subpackage {} should have tests", spkg.name);
+            }
+        }
+    }
+
+    #[test]
+    fn test_package_filter() {
+        let path = config_path("gccPassConfig.json");
+        let config = load_config(&path, None).expect("config should load");
+
+        let all_subpackages: Vec<String> = config
+            .packages
+            .iter()
+            .flat_map(|pkg| pkg.subpackages.iter())
+            .map(|spkg| spkg.path.display().to_string())
+            .collect();
+
+        assert!(!all_subpackages.is_empty(), "should have subpackages");
+
+        let filter_pattern = "*ErrorPass*";
+        let filtered: Vec<&String> = all_subpackages
+            .iter()
+            .filter(|path| {
+                glob::Pattern::new(&filter_pattern.to_lowercase())
+                    .map(|pat| pat.matches(&path.to_lowercase()))
+                    .unwrap_or(false)
+            })
+            .collect();
+
+        assert!(!filtered.is_empty(), "filter should match some subpackages");
+
+        for path in &filtered {
+            assert!(
+                path.to_lowercase().contains("errorpass"),
+                "filtered path should contain 'errorpass': {}",
+                path
+            );
+        }
+    }
+
+    #[test]
+    fn test_invalid_dir_config() {
+        let path = config_path("invalidDirConfig.json");
+        let errors = load_config(&path, None).unwrap_err();
+
+        assert!(!errors.is_empty(), "should have errors for invalid dir");
+        assert!(
+            errors.iter().any(|e| matches!(e, DragonError::MissingTestDir { .. })),
+            "should have a MissingTestDir error"
+        );
+    }
+
+    #[test]
+    fn test_invalid_exe_config() {
+        let path = config_path("invalidExeConfig.json");
+        let errors = load_config(&path, None).unwrap_err();
+
+        assert!(!errors.is_empty(), "should have errors for invalid exe");
+        assert!(
+            errors.iter().any(|e| matches!(e, DragonError::MissingFile { .. })),
+            "should have a MissingFile error"
+        );
+    }
+}
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..635b2bd
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,37 @@
+use std::path::PathBuf;
+
+use thiserror::Error;
+
+#[derive(Debug, Clone, Error)]
+pub enum DragonError {
+    #[error("Failed to read config file: {path}")]
+    ConfigRead { path: PathBuf },
+
+    #[error("Failed to parse config file {path}: {reason}")]
+    ConfigParse { path: PathBuf, reason: String },
+
+    #[error("Missing file: {path} ({context})")]
+    MissingFile { path: PathBuf, context: String },
+
+    #[error("Missing test directory: {path}")]
+    MissingTestDir { path: PathBuf },
+
+    #[error("Missing required field '{field}' in {context}")]
+    MissingField { field: String, context: String },
+
+    #[error("Directive conflict in {test}: both {inline} and {file_dir} supplied")]
+    DirectiveConflict { test: String, inline: String, file_dir: String },
+
+    #[error("Failed to read test file: {path}")]
+    TestFileRead { path: PathBuf },
+
+    #[error("Referenced file not found: {path} (directive {directive} in test {test})")]
+    ReferencedFileNotFound { path: PathBuf, directive: String, test: PathBuf },
+
+    #[error("Failed to read referenced file: {path}")]
+    ReferencedFileRead { path: PathBuf },
+}
+
+pub trait Validate {
+    fn validate(&self) -> Vec<DragonError>;
+}
diff --git a/src/harness.rs b/src/harness.rs
new file mode 100644
index 0000000..20562c6
--- /dev/null
+++ b/src/harness.rs
@@ -0,0 +1,572 @@
+use std::fs::{self, OpenOptions};
+use std::io::Write;
+
+use colored::Colorize;
+use rayon::prelude::*;
+
+use crate::info;
+use crate::cli::{Mode, RunnerArgs};
+use crate::config::{Config, Executable, Package};
+use crate::log::log;
+use crate::runner::{TestResult, ToolChainRunner};
+use crate::testfile::TestFile;
+
+/// Format a skip count suffix for summary lines.
+fn skip_suffix(skip_count: usize) -> String {
+    if skip_count > 0 {
+        format!(" ({skip_count} skipped)")
+    } else {
+        String::new()
+    }
+}
+
+/// Returns the full path or just the filename depending on the flag.
+fn test_display_name(test: &TestFile, full_path: bool) -> String {
+    if full_path {
+        test.path.display().to_string()
+    } else {
+        test.file.clone()
+    }
+}
+
+/// Format a timing suffix for the PASS/FAIL line.
+/// Matches Python: right-aligned in a 10-char field followed by ` (s)`.
+fn time_suffix(result: &TestResult, show_time: bool) -> String {
+    if show_time {
+        if let Some(t) = result.time {
+            return format!("{:>10.4} (s)", t);
+        }
+    }
+    String::new()
+}
+
+/// Truncate bytes with middle omission if they exceed `max_bytes`.
+/// Matches Python's `truncated_bytes()`.
+fn truncated_bytes(data: &[u8], max_bytes: usize) -> Vec<u8> {
+    if data.len() <= max_bytes {
+        return data.to_vec();
+    }
+    let omission = b"\n{{ omitted for brevity }}\n";
+    let available = max_bytes.saturating_sub(omission.len());
+    let half = available / 2;
+    let mut out = Vec::with_capacity(max_bytes);
+    out.extend_from_slice(&data[..half]);
+    out.extend_from_slice(omission);
+    out.extend_from_slice(&data[data.len() - half..]);
+    out
+}
+
+/// Generate a pretty-printed box around file contents.
+/// Matches Python's `TestFile.pretty_print()`.
+fn pretty_print_file(path: &std::path::Path) -> Option<String> {
+    let content = fs::read_to_string(path).ok()?;
+    let term_width = terminal_size::terminal_size()
+        .map(|(w, _)| w.0 as usize)
+        .unwrap_or(80);
+    let content_width = std::cmp::min(term_width.saturating_sub(10), 100);
+    if content_width < 6 {
+        return Some(content);
+    }
+
+    let mut lines = Vec::new();
+    // top border
+    lines.push(format!("\u{250c}{}\u{2510}", "\u{2500}".repeat(content_width - 2)));
+    for line in content.lines() {
+        let display = if line.len() > content_width - 4 {
+            format!("{}...", &line[..content_width - 7])
+        } else {
+            line.to_string()
+        };
+        lines.push(format!("\u{2502} {:<width$} \u{2502}", display, width = content_width - 4));
+    }
+    // bottom border
+    lines.push(format!("\u{2514}{}\u{2518}", "\u{2500}".repeat(content_width - 2)));
+    Some(lines.join("\n"))
+}
+
+/// Print additional test details below the PASS/FAIL line based on CLI flags.
+/// Called by both RegularHarness and MemoryCheckHarness.
+/// Matches the Python `TestResult.log()` output order and verbosity levels.
+fn print_test_details(result: &TestResult, cli_args: &RunnerArgs, indent: usize) {
+    // -s: show testcase (level 0 on fail, level 2 on pass)
+    if cli_args.show_testcase {
+        let level: u32 = if result.did_pass { 2 } else { 0 };
+        if let Some(boxed) = pretty_print_file(&result.test.path) {
+            for line in boxed.lines() {
+                log(level, indent + 2, line);
+            }
+        }
+    }
+
+    // Command history: level 3 on pass, level 2 on fail
+    let cmd_level: u32 = if result.did_pass { 3 } else { 2 };
+    log(cmd_level, indent + 2, &format!("==> Command History"));
+    for cr in &result.command_history {
+        log(cmd_level, indent + 4, &format!("==> {} (exit {})", cr.cmd, cr.exit_status));
+        let stdout = truncated_bytes(&cr.stdout, 512);
+        log(cmd_level, indent + 6, &format!(
+            "stdout ({} bytes): {}", cr.stdout.len(), String::from_utf8_lossy(&stdout),
+        ));
+        let stderr = truncated_bytes(&cr.stderr, 512);
+        log(cmd_level, indent + 6, &format!(
+            "stderr ({} bytes): {}", cr.stderr.len(), String::from_utf8_lossy(&stderr),
+        ));
+    }
+
+    // Expected vs Generated output: level 2 on pass, level 1 on fail
+    let diff_level: u32 = if result.did_pass { 2 } else { 1 };
+    let expected_out = result.test.get_expected_out();
+    let generated_out = result.gen_output.as_deref().unwrap_or(b"");
+    log(diff_level, indent + 2, &format!("==> Expected Out ({} bytes):", expected_out.len()));
+    log(diff_level, indent + 3, &format!("{:?}", expected_out));
+    log(diff_level, indent + 2, &format!("==> Generated Out ({} bytes):", generated_out.len()));
+    log(diff_level, indent + 3, &format!("{:?}", generated_out));
+}
+
+/// Counters passed through hooks during iteration.
+pub struct SubPackageCounters {
+    pub pass_count: usize,
+    pub test_count: usize,
+    pub skip_count: usize,
+    pub depth: usize,
+}
+
+/// Implemented by any `TestHarness` which makes a single, sequential iteration
+/// over the tests in each package and subpackage. Applies to all except for
+/// the `TournamentHarness`, which iterates in a cross product.
+pub trait SequentialTestHarness {
+
+    fn run_passed(&self) -> bool;
+    fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters);
+    fn pre_run_hook(&mut self) {}
+    fn post_run_hook(&mut self) {}
+    fn pre_executable_hook(&mut self, _exe_id: &str) {}
+    fn post_executable_hook(&mut self) {}
+    fn pre_subpackage_hook(&mut self, _spkg: &crate::config::SubPackage) {}
+    fn post_subpackage_hook(&mut self, _counters: &SubPackageCounters) {}
+
+    /// Default iteration: executables x toolchains x packages x subpackages x tests.
+    fn iterate(&mut self, config: &Config, cli_args: &RunnerArgs) {
+        self.pre_run_hook();
+
+        let filter_pat = if config.package_filter.is_empty() {
+            None
+        } else {
+            glob::Pattern::new(&config.package_filter.to_lowercase()).ok()
+        };
+
+        for exe in &config.executables {
+            self.pre_executable_hook(&exe.id);
+            info!(0, "Running executable: {}", exe.id);
+            let exe_env = exe.runtime_env();
+            let mut exe_pass = 0;
+            let mut exe_total = 0;
+            let mut exe_skip = 0;
+
+            for tc in &config.toolchains {
+                let runner = ToolChainRunner::new(tc, cli_args.timeout)
+                    .with_env(exe_env.clone())
+                    .with_memcheck(cli_args.mode == Mode::Memcheck);
+                info!(1, "Running Toolchain: {}", tc.name);
+                let mut tc_pass = 0;
+                let mut tc_total = 0;
+                let mut tc_skip = 0;
+
+                for pkg in &config.packages {
+                    let mut pkg_pass = 0;
+                    let mut pkg_total = 0;
+                    let mut pkg_skip = 0;
+                    info!(2, "Entering package {}", pkg.name);
+
+                    for spkg in &pkg.subpackages {
+                        if let Some(ref pat) = filter_pat {
+                            if !pat.matches(&spkg.path.display().to_string().to_lowercase()) {
+                                continue;
+                            }
+                        }
+
+                        info!(3 + spkg.depth, "Entering subpackage {}", spkg.name);
+                        let mut counters = SubPackageCounters { pass_count: 0, test_count: 0, skip_count: 0, depth: spkg.depth };
+                        self.pre_subpackage_hook(spkg);
+
+                        let results: Vec<TestResult> = spkg.tests
+                            .par_iter()
+                            .map(|test| runner.run(test, exe))
+                            .collect();
+
+                        for result in results {
+                            let fast_fail = cli_args.fast_fail && !result.did_pass;
+                            self.process_test_result(result, cli_args, &mut counters);
+                            if fast_fail {
+                                self.post_subpackage_hook(&counters);
+                                self.post_executable_hook();
+                                self.post_run_hook();
+                                return;
+                            }
+                        }
+
+                        self.post_subpackage_hook(&counters);
+                        info!(3 + spkg.depth, "Subpackage Passed:  {} / {}{}", counters.pass_count, counters.test_count, skip_suffix(counters.skip_count));
+                        pkg_pass += counters.pass_count;
+                        pkg_total += counters.test_count;
+                        pkg_skip += counters.skip_count;
+                    }
+
+                    info!(2, "Packaged Passed:  {} / {}{}", pkg_pass, pkg_total, skip_suffix(pkg_skip));
+                    tc_pass += pkg_pass;
+                    tc_total += pkg_total;
+                    tc_skip += pkg_skip;
+                }
+
+                info!(1, "Toolchain Passed:  {} / {}{}", tc_pass, tc_total, skip_suffix(tc_skip));
+                exe_pass += tc_pass;
+                exe_total += tc_total;
+                exe_skip += tc_skip;
+            }
+
+            info!(0, "Executable Passed:  {} / {}{}", exe_pass, exe_total, skip_suffix(exe_skip));
+            self.post_executable_hook();
+        }
+
+        self.post_run_hook();
+    }
+
+    fn run(&mut self, config: &Config, cli_args: &RunnerArgs) -> bool {
+        self.iterate(config, cli_args);
+        self.run_passed()
+    }
+}
+
+// ---------------------------------------------------------------------------
+// RegularHarness
+// ---------------------------------------------------------------------------
+
+pub struct RegularHarness {
+    pub passed: bool,
+}
+
+impl RegularHarness {
+    pub fn new() -> Self {
+        Self { passed: true }
+    }
+}
+
+impl SequentialTestHarness for RegularHarness {
+    fn run_passed(&self) -> bool { self.passed }
+
+    fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
+        let indent = 4 + counters.depth;
+        let test_name = test_display_name(&result.test, cli_args.full_path);
+        if result.skipped {
+            info!(indent, "{}{}", "[SKIP] ".yellow(), test_name);
+            counters.skip_count += 1;
+            return;
+        }
+        let time = time_suffix(&result, cli_args.time);
+        if result.did_pass {
+            let tag = if result.error_test { "[E-PASS] " } else { "[PASS] " };
+            info!(indent, "{}{}{}", tag.green(), test_name, time);
+            counters.pass_count += 1;
+        } else {
+            let tag = if result.error_test { "[E-FAIL] " } else { "[FAIL] " };
+            info!(indent, "{}{}{}", tag.red(), test_name, time);
+            self.passed = false;
+        }
+        counters.test_count += 1;
+        print_test_details(&result, cli_args, indent);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// TournamentHarness
+// ---------------------------------------------------------------------------
+
+pub struct TournamentHarness {
+    pub passed: bool,
+}
+
+impl TournamentHarness {
+    pub fn new() -> Self {
+        Self { passed: true }
+    }
+
+    /// Tournament has its own iteration logic (cross-product of packages x executables).
+    pub fn run(&mut self, config: &Config, cli_args: &RunnerArgs) -> bool {
+        self.tournament_iterate(config, cli_args);
+        self.passed
+    }
+
+    fn log_failure_to_file(file: &str, result: &TestResult) {
+        if result.did_pass {
+            return;
+        }
+        let Ok(mut f) = OpenOptions::new().create(true).append(true).open(file) else { return };
+
+        let exp = String::from_utf8_lossy(result.test.get_expected_out());
+        let gen = result.gen_output.as_deref()
+            .map(|b| String::from_utf8_lossy(b).into_owned())
+            .unwrap_or_default();
+
+        let _ = writeln!(f, "{}\nTest: {}\n\nExpected Output: {exp:?}\nGenerated Output: {gen:?}",
+            "=".repeat(80), result.test.file);
+    }
+
+    fn append_log(path: &std::path::Path, line: &str) {
+        if let Ok(mut f) = OpenOptions::new().create(true).append(true).open(path) {
+            let _ = writeln!(f, "{line}");
+        }
+    }
+
+    fn tournament_iterate(&mut self, config: &Config, cli_args: &RunnerArgs) {
+        let mut attacking_pkgs: Vec<&Package> = config.packages.iter().collect();
+        attacking_pkgs.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
+
+        let mut defending_exes: Vec<&Executable> = config.executables.iter().collect();
+        defending_exes.sort_by(|a, b| a.id.to_lowercase().cmp(&b.id.to_lowercase()));
+
+        let Some(solution_exe) = cli_args.solution_exe.as_deref() else {
+            eprintln!("Error: --solution-exe is required in tournament mode");
+            self.passed = false;
+            return;
+        };
+
+        if !config.executables.iter().any(|e| e.id == solution_exe) {
+            eprintln!("Error: --solution-exe '{}' does not match any executable in the config.\nAvailable: {:?}",
+                solution_exe, config.executables.iter().map(|e| &e.id).collect::<Vec<_>>());
+            self.passed = false;
+            return;
+        }
+        let failure_log = cli_args.failure_log.as_deref();
+
+        for tc in &config.toolchains {
+            let csv_filename = format!("toolchain_{}.csv", tc.name);
+            let mut csv_file = fs::File::create(&csv_filename).expect("cannot create CSV");
+
+            let header: Vec<&str> = std::iter::once(tc.name.as_str())
+                .chain(attacking_pkgs.iter().map(|p| p.name.as_str()))
+                .collect();
+            let _ = writeln!(csv_file, "{}", header.join(","));
+            println!("\nToolchain: {}", tc.name);
+
+            for def_exe in &defending_exes {
+                let runner = ToolChainRunner::new(tc, cli_args.timeout)
+                    .with_env(def_exe.runtime_env());
+                let feedback_file = format!("{}-{}feedback.txt", def_exe.id, tc.name);
+                let mut row_cells: Vec<String> = vec![def_exe.id.clone()];
+
+                for a_pkg in &attacking_pkgs {
+                    print!("\n  {:<12} --> {:<12}", a_pkg.name, def_exe.id);
+                    let mut pass_count = 0usize;
+                    let mut test_count = 0usize;
+
+                    let tests = a_pkg.subpackages.iter().flat_map(|s| &s.tests);
+                    for test in tests {
+                        let result = runner.run(test, def_exe);
+                        if result.skipped {
+                            print!("{}", ".".yellow());
+                            continue;
+                        }
+                        let is_solution = solution_exe == def_exe.id;
+
+                        if result.did_pass {
+                            print!("{}", ".".green());
+                            pass_count += 1;
+                            if is_solution && failure_log.is_some() {
+                                Self::append_log("pass_log.txt".as_ref(), &format!(
+                                    "{} {} {}", tc.name, a_pkg.name, result.test.path.display()
+                                ));
+                            }
+                        } else {
+                            print!("{}", ".".red());
+                            Self::log_failure_to_file(&feedback_file, &result);
+                            if let Some(log) = failure_log {
+                                if is_solution {
+                                    Self::append_log(log, &format!(
+                                        "{} {} {}", tc.name, a_pkg.name, result.test.path.display()
+                                    ));
+                                }
+                            }
+                        }
+                        test_count += 1;
+                    }
+
+                    row_cells.push(format!("{pass_count}/{test_count}"));
+                }
+
+                let _ = writeln!(csv_file, "{}", row_cells.join(","));
+            }
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// MemoryCheckHarness
+// ---------------------------------------------------------------------------
+
+pub struct MemoryCheckHarness {
+    pub passed: bool,
+    pub leak_tests: Vec<TestResult>,
+    pub test_count: usize,
+}
+
+impl MemoryCheckHarness {
+    pub fn new() -> Self {
+        Self { passed: true, leak_tests: Vec::new(), test_count: 0 }
+    }
+}
+
+impl SequentialTestHarness for MemoryCheckHarness {
+    fn run_passed(&self) -> bool { self.passed }
+
+    fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
+        let indent = 4 + counters.depth;
+        let test_name = test_display_name(&result.test, cli_args.full_path);
+        if result.skipped {
+            info!(indent, "{}{}", "[SKIP] ".yellow(), test_name);
+            counters.skip_count += 1;
+            return;
+        }
+        self.test_count += 1;
+        counters.test_count += 1;
+
+        let time = time_suffix(&result, cli_args.time);
+        if result.did_pass {
+            info!(indent, "{}{}{}", "[PASS] ".green(), test_name, time);
+            counters.pass_count += 1;
+        } else {
+            info!(indent, "{}{}{}", "[FAIL] ".red(), test_name, time);
+        }
+
+        print_test_details(&result, cli_args, indent);
+
+        if result.memory_leak {
+            self.leak_tests.push(result);
+        }
+    }
+
+    fn post_executable_hook(&mut self) {
+        info!(0, "Leak Summary: ({} tests)", self.leak_tests.len());
+        for result in &self.leak_tests {
+            info!(4, "{}{}", "[LEAK] ".yellow(), result.test.file);
+        }
+        self.leak_tests.clear();
+        self.test_count = 0;
+    }
+}
+
+// ---------------------------------------------------------------------------
+// PerformanceTestingHarness
+// ---------------------------------------------------------------------------
+
+pub struct PerformanceTestingHarness {
+    pub passed: bool,
+    pub csv_cols: Vec<Vec<String>>,
+    pub cur_col: Vec<String>,
+    pub testfile_col: Vec<String>,
+    pub first_exec: bool,
+}
+
+impl PerformanceTestingHarness {
+    pub fn new() -> Self {
+        Self {
+            passed: true,
+            csv_cols: Vec::new(),
+            cur_col: Vec::new(),
+            testfile_col: vec!["Test".into()],
+            first_exec: true,
+        }
+    }
+}
+
+impl SequentialTestHarness for PerformanceTestingHarness {
+    fn run_passed(&self) -> bool { self.passed }
+
+    fn process_test_result(&mut self, result: TestResult, cli_args: &RunnerArgs, counters: &mut SubPackageCounters) {
+        let indent = 4 + counters.depth;
+        let test_name = test_display_name(&result.test, cli_args.full_path);
+        if result.skipped {
+            info!(indent, "{}{}", "[SKIP] ".yellow(), test_name);
+            counters.skip_count += 1;
+            return;
+        }
+        if self.first_exec {
+            self.testfile_col.push(result.test.file.clone());
+        }
+
+        if result.did_pass {
+            counters.pass_count += 1;
+            info!(indent, "{}{}", "[PASS] ".green(), test_name);
+            self.cur_col.push(result.time.map(|t| format!("{t:.4}")).unwrap_or_default());
+        } else {
+            self.cur_col.push(format!("{:.4}", cli_args.timeout));
+        }
+        counters.test_count += 1;
+    }
+
+    fn pre_executable_hook(&mut self, exe_id: &str) {
+        self.cur_col.push(exe_id.into());
+    }
+
+    fn post_executable_hook(&mut self) {
+        if self.first_exec {
+            self.csv_cols.push(self.testfile_col.clone());
+            self.first_exec = false;
+        }
+        self.csv_cols.push(std::mem::take(&mut self.cur_col));
+    }
+
+    fn post_run_hook(&mut self) {
+        let max_len = self.csv_cols.iter().map(|c| c.len()).max().unwrap_or(0);
+        let mut f = fs::File::create("perf.csv").expect("cannot create perf.csv");
+        for row_idx in 0..max_len {
+            let row: Vec<&str> = self.csv_cols.iter()
+                .map(|col| col.get(row_idx).map(|s| s.as_str()).unwrap_or(""))
+                .collect();
+            let _ = writeln!(f, "{}", row.join(","));
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::path::PathBuf;
+
+    use crate::cli::{Mode, RunnerArgs};
+    use crate::config::load_config;
+    use super::TournamentHarness;
+
+    fn config_path(name: &str) -> PathBuf {
+        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .join("tests").join("configs").join(name)
+    }
+
+    #[test]
+    fn test_grader_config() {
+        let path = config_path("ConfigGrade.json");
+        let config = load_config(&path, None).expect("config should load");
+
+        let tmp = tempfile::tempdir().expect("failed to create temp dir");
+        let prev_dir = std::env::current_dir().unwrap();
+        std::env::set_current_dir(tmp.path()).unwrap();
+
+        let failure_log = tmp.path().join("Failures_rs.txt");
+
+        let args = RunnerArgs {
+            mode: Mode::Tournament,
+            failure_log: Some(failure_log.clone()),
+            solution_exe: Some("TA".into()),
+            timeout: 2.0,
+            ..Default::default()
+        };
+
+        let mut harness = TournamentHarness::new();
+        harness.run(&config, &args);
+
+        assert!(
+            failure_log.exists(),
+            "failure log should have been created"
+        );
+
+        std::env::set_current_dir(prev_dir).unwrap();
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..8f9276b
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,11 @@
+pub mod cli;
+pub mod config;
+pub mod error;
+pub mod harness;
+pub mod log;
+pub mod runner;
+pub mod script;
+pub mod testfile;
+pub mod toolchain;
+pub mod server;
+pub mod util;
diff --git a/src/log.rs b/src/log.rs
new file mode 100644
index 0000000..143f328
--- /dev/null
+++ b/src/log.rs
@@ -0,0 +1,51 @@
+use std::io::Write;
+use std::sync::atomic::{AtomicU32, Ordering};
+
+static DEBUG_LEVEL: AtomicU32 = AtomicU32::new(0);
+
+/// Set the global debug/verbosity level.
+pub fn set_debug_level(level: u32) {
+    DEBUG_LEVEL.store(level, Ordering::Relaxed);
+}
+
+/// Log a message at a given verbosity level with indentation.
+/// Use the `info!`, `debug!`, `trace!`, or `trace2!` macros instead of calling this directly.
+#[doc(hidden)]
+pub fn log(level: u32, indent: usize, msg: &str) {
+    if DEBUG_LEVEL.load(Ordering::Relaxed) >= level {
+        println!("{:indent$}{msg}", "", indent = indent);
+        let _ = std::io::stdout().flush();
+    }
+}
+
+/// Always printed (level 0).
+#[macro_export]
+macro_rules! info {
+    ($indent:expr, $($arg:tt)*) => {
+        $crate::log::log(0, $indent, &format!($($arg)*))
+    };
+}
+
+/// Printed with -v (level 1).
+#[macro_export]
+macro_rules! debug {
+    ($indent:expr, $($arg:tt)*) => {
+        $crate::log::log(1, $indent, &format!($($arg)*))
+    };
+}
+
+/// Printed with -vv (level 2).
+#[macro_export]
+macro_rules! trace {
+    ($indent:expr, $($arg:tt)*) => {
+        $crate::log::log(2, $indent, &format!($($arg)*))
+    };
+}
+
+/// Printed with -vvv (level 3).
+#[macro_export]
+macro_rules! trace2 {
+    ($indent:expr, $($arg:tt)*) => {
+        $crate::log::log(3, $indent, &format!($($arg)*))
+    };
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..a052f94
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,56 @@
+use colored::Colorize;
+use dragon_runner_rs::cli::{parse_cli_args, CliAction, Mode};
+use dragon_runner_rs::config::load_config;
+use dragon_runner_rs::harness::*;
+use dragon_runner_rs::{info, debug};
+use dragon_runner_rs::script::run_script;
+use dragon_runner_rs::server;
+
+fn main() {
+    let action = parse_cli_args();
+    let cli_args = match action {
+        CliAction::Script(args) => {
+            std::process::exit(run_script(args));
+        }
+        CliAction::Serve { config_file, bind, timeout, max_concurrent } => {
+            let config = match load_config(&config_file, None) {
+                Ok(c) => c,
+                Err(errors) => {
+                    info!(0, "Found Config {} error(s):", errors.len());
+                    for e in &errors {
+                        info!(0, "{}", format!("{e}").red());
+                    }
+                    std::process::exit(1);
+                }
+            };
+            let rt = tokio::runtime::Runtime::new().expect("failed to create tokio runtime");
+            rt.block_on(server::run_server(config, &bind, timeout, max_concurrent));
+            return;
+        }
+        CliAction::Run(args) => args,
+    };
+
+    debug!(0, "{:?}", cli_args);
+    let config = match load_config(&cli_args.config_file, Some(&cli_args)) {
+        Ok(c) => c,
+        Err(errors) => {
+            info!(0, "Found Config {} error(s):", errors.len());
+            info!(0, "Parsed {} below:", cli_args.config_file.display());
+            for e in &errors {
+                info!(0, "{}", format!("{e}").red());
+            }
+            std::process::exit(1);
+        }
+    };
+
+    config.log_test_info();
+
+    let success = match cli_args.mode {
+        Mode::Regular => RegularHarness::new().run(&config, &cli_args),
+        Mode::Tournament => TournamentHarness::new().run(&config, &cli_args),
+        Mode::Memcheck => MemoryCheckHarness::new().run(&config, &cli_args),
+        Mode::Perf => PerformanceTestingHarness::new().run(&config, &cli_args),
+    };
+
+    std::process::exit(if success { 0 } else { 1 });
+}
diff --git a/src/runner.rs b/src/runner.rs
new file mode 100644
index 0000000..d66c72b
--- /dev/null
+++ b/src/runner.rs
@@ -0,0 +1,732 @@
+use std::collections::HashMap;
+use std::env;
+use std::fs;
+use std::ops::ControlFlow;
+use std::path::{Path, PathBuf};
+use std::process;
+use std::sync::{Arc, LazyLock};
+use std::time::{Duration, Instant};
+
+use regex::Regex;
+use wait_timeout::ChildExt;
+
+static ENV_VAR_RE: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"\$(\w+)|\$\{(\w+)\}").unwrap());
+static ERROR_KIND_RE: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"(?i)(\w+Error)").unwrap());
+static ERROR_LINE_RE: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"(?i)on\s+Line\s+(\d+)").unwrap());
+
+use crate::config::Executable;
+use crate::testfile::TestFile;
+use crate::toolchain::{Step, ToolChain};
+use crate::util::{make_tmp_file, make_empty_tmp_file};
+
+/// Reserved exit code for valgrind leak detection.
+pub const VALGRIND_EXIT_CODE: i32 = 111;
+
+const RESERVED_EXIT_CODES: &[i32] = &[VALGRIND_EXIT_CODE];
+const RUNTIME_ERRORS: &[&str] = &["SizeError", "IndexError", "MathError", "StrideError"];
+
+/// State threaded between pipeline steps during a toolchain run.
+struct PipelineState {
+    input_file: PathBuf,
+    tmp_handles: Vec<tempfile::TempPath>,
+    command_history: Vec<CommandResult>,
+    memory_leak: bool,
+}
+
+/// Magic variable placeholders used in toolchain step arguments.
+pub enum MagicArg {
+    Exe,
+    Input,
+    Output,
+}
+
+impl MagicArg {
+    pub const ALL: &[MagicArg] = &[MagicArg::Exe, MagicArg::Input, MagicArg::Output];
+
+    pub fn pattern(&self) -> &'static str {
+        match self {
+            MagicArg::Exe => "$EXE",
+            MagicArg::Input => "$INPUT",
+            MagicArg::Output => "$OUTPUT",
+        }
+    }
+
+    fn resolve<'a>(&self, params: &'a MagicParams) -> Option<&'a str> {
+        match self {
+            MagicArg::Exe => Some(&params.exe_path),
+            MagicArg::Input if !params.input_file.is_empty() => Some(&params.input_file),
+            MagicArg::Input => None,
+            MagicArg::Output => params.output_file.as_deref(),
+        }
+    }
+}
+
+/// Magic parameter values substituted into toolchain step arguments.
+pub struct MagicParams {
+    pub exe_path: String,
+    pub input_file: String,
+    pub output_file: Option<String>,
+}
+
+/// A resolved command ready to execute.
+pub struct ResolvedCommand {
+    pub args: Vec<String>,
+}
+
+impl ResolvedCommand {
+    pub fn new(args: Vec<String>) -> Self {
+        Self { args }
+    }
+}
+
+/// Result of executing a single subprocess.
+pub struct CommandResult {
+    pub cmd: String,
+    pub exit_status: i32,
+    pub stdout: Vec<u8>,
+    pub stderr: Vec<u8>,
+    pub time: f64,
+    pub timed_out: bool,
+}
+
+impl CommandResult {
+    pub fn new(cmd: &str) -> Self {
+        Self {
+            cmd: cmd.to_string(),
+            exit_status: 0,
+            stdout: Vec::new(),
+            stderr: Vec::new(),
+            time: 0.0,
+            timed_out: false,
+        }
+    }
+}
+
+/// Result of running a complete test case through a toolchain.
+pub struct TestResult {
+    pub test: Arc<TestFile>,
+    pub did_pass: bool,
+    pub did_timeout: bool,
+    pub error_test: bool,
+    pub memory_leak: bool,
+    pub skipped: bool,
+    pub command_history: Vec<CommandResult>,
+    pub gen_output: Option<Vec<u8>>,
+    pub time: Option<f64>,
+    pub failing_step: Option<String>,
+}
+
+impl TestResult {
+    fn skipped(test: &Arc<TestFile>) -> Self {
+        Self {
+            test: Arc::clone(test),
+            did_pass: false,
+            did_timeout: false,
+            error_test: false,
+            memory_leak: false,
+            skipped: true,
+            command_history: Vec::new(),
+            gen_output: None,
+            time: None,
+            failing_step: None,
+        }
+    }
+
+    fn finished(
+        test: &Arc<TestFile>,
+        history: Vec<CommandResult>,
+        output: Vec<u8>,
+        time: f64,
+        memory_leak: bool,
+    ) -> Self {
+        let expected = test.get_expected_out();
+        Self {
+            did_pass: output == expected,
+            test: Arc::clone(test),
+            did_timeout: false,
+            error_test: false,
+            memory_leak,
+            skipped: false,
+            command_history: history,
+            gen_output: Some(output),
+            time: Some(time),
+            failing_step: None,
+        }
+    }
+
+    fn timeout(
+        test: &Arc<TestFile>,
+        history: Vec<CommandResult>,
+        step_name: &str,
+        timeout: f64,
+    ) -> Self {
+        Self {
+            test: Arc::clone(test),
+            did_pass: false,
+            did_timeout: true,
+            error_test: false,
+            memory_leak: false,
+            skipped: false,
+            command_history: history,
+            gen_output: None,
+            time: Some(timeout),
+            failing_step: Some(step_name.to_string()),
+        }
+    }
+
+    fn fail(test: &Arc<TestFile>, history: Vec<CommandResult>, failing_step: Option<String>) -> Self {
+        Self {
+            test: Arc::clone(test),
+            did_pass: false,
+            did_timeout: false,
+            error_test: false,
+            memory_leak: false,
+            skipped: false,
+            command_history: history,
+            gen_output: None,
+            time: None,
+            failing_step,
+        }
+    }
+
+    fn error(
+        test: &Arc<TestFile>,
+        history: Vec<CommandResult>,
+        stderr: Vec<u8>,
+        step_name: &str,
+        did_pass: bool,
+        memory_leak: bool,
+    ) -> Self {
+        Self {
+            test: Arc::clone(test),
+            did_pass,
+            did_timeout: false,
+            error_test: true,
+            memory_leak,
+            skipped: false,
+            command_history: history,
+            gen_output: Some(stderr),
+            time: None,
+            failing_step: Some(step_name.to_string()),
+        }
+    }
+}
+
+const VALGRIND_BIN: &str = "valgrind";
+
+/// Runs a toolchain against a test file and executable.
+pub struct ToolChainRunner<'a> {
+    pub tc: &'a ToolChain,
+    pub timeout: f64,
+    /// Extra environment variables to inject into spawned subprocesses (e.g. runtime lib paths).
+    pub extra_env: HashMap<String, String>,
+    /// When true, automatically wrap the last toolchain step with valgrind.
+    pub memcheck: bool,
+}
+
+impl<'a> ToolChainRunner<'a> {
+    pub fn new(tc: &'a ToolChain, timeout: f64) -> Self {
+        Self {
+            tc,
+            timeout,
+            extra_env: HashMap::new(),
+            memcheck: false,
+        }
+    }
+
+    pub fn with_env(mut self, env: HashMap<String, String>) -> Self {
+        self.extra_env = env;
+        self
+    }
+
+    pub fn with_memcheck(mut self, memcheck: bool) -> Self {
+        self.memcheck = memcheck;
+        self
+    }
+
+    /// Run each step of the toolchain for a given test and executable.
+    pub fn run(&self, test: &Arc<TestFile>, exe: &Executable) -> TestResult {
+        if test.skip {
+            return TestResult::skipped(test);
+        }
+        let tc_len = self.tc.len();
+        let init = PipelineState {
+            input_file: test.path.clone(),
+            tmp_handles: Vec::new(),
+            command_history: Vec::new(),
+            memory_leak: false,
+        };
+
+        let result = self.tc.iter().enumerate().try_fold(init, |state, (index, step)| {
+            self.run_step(state, step, index == tc_len - 1, test, exe)
+        });
+
+        match result {
+            ControlFlow::Break(tr) => tr,
+            ControlFlow::Continue(_) => panic!("Toolchain reached undefined conditions"),
+        }
+    }
+
+    fn run_step(
+        &self,
+        mut state: PipelineState,
+        step: &Step,
+        last_step: bool,
+        test: &Arc<TestFile>,
+        exe: &Executable,
+    ) -> ControlFlow<TestResult, PipelineState> {
+        
+        let input_stream = if step.uses_ins { test.get_input_stream() } else { b"" };
+        let output_resolved = self.resolve_output_file(step);
+        let output_path = output_resolved.as_ref().map(|(p, _)| p.clone());
+        let magic = MagicParams {
+            exe_path: exe.exe_path.display().to_string(),
+            input_file: state.input_file.display().to_string(),
+            output_file: output_path.as_ref().map(|p| p.display().to_string()),
+        };
+
+        // Keep temp handle alive for the duration of the step
+        if let Some((_, handle)) = output_resolved {
+            state.tmp_handles.push(handle);
+        }
+
+        let mut command = self.resolve_command(step, &magic);
+
+        // In memcheck mode, wrap the last step with valgrind
+        if self.memcheck && last_step && !self.wrap_valgrind(&mut command) {
+            return ControlFlow::Break(TestResult::fail(
+                test, state.command_history,
+                Some("memcheck: valgrind not found".to_string()),
+            ));
+        }
+
+        let cr = self.run_command(&command, &input_stream);
+        if cr.timed_out {
+            state.command_history.push(cr);
+            return ControlFlow::Break(TestResult::timeout(
+                test, state.command_history, &step.display_name(exe), self.timeout,
+            ));
+        }
+
+        if cr.exit_status == -1 {
+            state.command_history.push(cr);
+            return ControlFlow::Break(TestResult::fail(
+                test, state.command_history, None,
+            ));
+        }
+
+        let stdout = cr.stdout.clone();
+        let stderr = cr.stderr.clone();
+        let step_time = (cr.time * 10000.0).round() / 10000.0;
+        let exit_status = cr.exit_status;
+
+        if exit_status == VALGRIND_EXIT_CODE {
+            state.memory_leak = true;
+        }
+        state.command_history.push(cr);
+
+        if exit_status != 0 && !RESERVED_EXIT_CODES.contains(&exit_status) {
+            let did_pass = step.allow_error
+                && self.check_error_test(&stderr, test.get_expected_out());
+            return ControlFlow::Break(TestResult::error(
+                test, state.command_history, stderr,
+                &step.display_name(exe), did_pass, state.memory_leak,
+            ));
+        }
+
+        if last_step {
+            let final_output = match output_path {
+                Some(ref p) if p.exists() => fs::read(p).unwrap_or_default(),
+                Some(_) => return ControlFlow::Break(TestResult::finished(
+                    test, state.command_history, Vec::new(), step_time, state.memory_leak,
+                )),
+                None => stdout,
+            };
+            return ControlFlow::Break(TestResult::finished(
+                test, state.command_history, final_output, step_time, state.memory_leak,
+            ));
+        }
+
+        // Not the last step — continue the pipeline
+        state.input_file = output_path.unwrap_or_else(|| {
+            match make_tmp_file(&stdout) {
+                Some((path, handle)) => {
+                    state.tmp_handles.push(handle);
+                    path
+                }
+                None => PathBuf::new(),
+            }
+        });
+        ControlFlow::Continue(state)
+    }
+
+    /// Prepend valgrind flags to command. Returns false if valgrind is not installed.
+    fn wrap_valgrind(&self, command: &mut ResolvedCommand) -> bool {
+        let ok = process::Command::new(VALGRIND_BIN)
+            .arg("--version")
+            .stdout(process::Stdio::null())
+            .stderr(process::Stdio::null())
+            .status()
+            .is_ok_and(|s| s.success());
+        if ok {
+            let mut wrapped = vec![
+                VALGRIND_BIN.to_string(),
+                "--leak-check=full".to_string(),
+                format!("--error-exitcode={VALGRIND_EXIT_CODE}"),
+                "--log-file=/dev/null".to_string(),
+            ];
+            wrapped.append(&mut command.args);
+            command.args = wrapped;
+        }
+        ok
+    }
+
+    fn run_command(&self, command: &ResolvedCommand, stdin: &[u8]) -> CommandResult {
+        let mut cr = CommandResult::new(&command.args[0]);
+        let start = Instant::now();
+
+        let mut cmd = process::Command::new(&command.args[0]);
+        cmd.args(&command.args[1..])
+            .stdin(process::Stdio::piped())
+            .stdout(process::Stdio::piped())
+            .stderr(process::Stdio::piped())
+            .envs(&self.extra_env);
+        let result = cmd.spawn();
+
+        match result {
+            Ok(mut child) => {
+                // Write stdin then close it
+                if let Some(mut child_stdin) = child.stdin.take() {
+                    use std::io::Write;
+                    let _ = child_stdin.write_all(stdin);
+                }
+
+                let timeout_dur = Duration::from_secs_f64(self.timeout);
+                match child.wait_timeout(timeout_dur) {
+                    Ok(Some(status)) => {
+                        // Process exited within timeout — read remaining output
+                        cr.time = start.elapsed().as_secs_f64();
+                        cr.exit_status = status.code().unwrap_or(1);
+
+                        // Read stdout and stderr from the pipes
+                        use std::io::Read;
+                        if let Some(mut out) = child.stdout.take() {
+                            let _ = out.read_to_end(&mut cr.stdout);
+                        }
+                        if let Some(mut err) = child.stderr.take() {
+                            let _ = err.read_to_end(&mut cr.stderr);
+                        }
+                    }
+                    Ok(None) => {
+                        // Still running — timeout
+                        let _ = child.kill();
+                        let _ = child.wait();
+                        cr.timed_out = true;
+                        cr.time = self.timeout;
+                        cr.exit_status = 255;
+                    }
+                    Err(_) => {
+                        cr.exit_status = 1;
+                        cr.time = start.elapsed().as_secs_f64();
+                    }
+                }
+            }
+            Err(_) => {
+                cr.exit_status = -1;
+                cr.time = start.elapsed().as_secs_f64();
+            }
+        }
+
+        cr
+    }
+
+    fn resolve_output_file(&self, step: &Step) -> Option<(PathBuf, tempfile::TempPath)> {
+        if step.args.iter().any(|a| a.contains(MagicArg::Output.pattern())) {
+            make_empty_tmp_file()
+        } else {
+            None
+        }
+    }
+
+    fn resolve_command(&self, step: &Step, params: &MagicParams) -> ResolvedCommand {
+        let mut args = vec![step.exe_raw.clone()];
+        args.extend(step.args.iter().cloned());
+        let mut command = ResolvedCommand::new(args);
+        self.replace_magic_args(&mut command, params);
+        self.replace_env_vars(&mut command);
+        // Only resolve paths containing '/' — bare names (e.g. "gcc") use $PATH lookup
+        if !command.args.is_empty() && command.args[0].contains('/') && !Path::new(&command.args[0]).is_absolute() {
+            if let Ok(abs) = fs::canonicalize(&command.args[0]) {
+                command.args[0] = abs.to_string_lossy().into_owned();
+            } else if let Ok(cwd) = env::current_dir() {
+                let abs = cwd.join(&command.args[0]);
+                command.args[0] = abs.to_string_lossy().into_owned();
+            }
+        }
+        command
+    }
+
+    fn replace_magic_args(&self, command: &mut ResolvedCommand, params: &MagicParams) {
+        for arg in command.args.iter_mut() {
+            for magic in MagicArg::ALL {
+                if arg.contains(magic.pattern()) {
+                    if let Some(val) = magic.resolve(params) {
+                        *arg = arg.replace(magic.pattern(), val);
+                    }
+                }
+            }
+        }
+    }
+
+    fn replace_env_vars(&self, command: &mut ResolvedCommand) {
+        for arg in command.args.iter_mut() {
+            let original = arg.clone();
+            for caps in ENV_VAR_RE.captures_iter(&original) {
+                let var_name = caps
+                    .get(1)
+                    .or_else(|| caps.get(2))
+                    .map(|m| m.as_str())
+                    .unwrap_or("");
+                // Check runner's extra_env first, then fall back to process env
+                let val = self.extra_env.get(var_name).cloned()
+                    .or_else(|| env::var(var_name).ok());
+                if let Some(val) = val {
+                    *arg = arg
+                        .replace(&format!("${var_name}"), &val)
+                        .replace(&format!("${{{var_name}}}"), &val);
+                }
+            }
+        }
+    }
+
+    fn check_error_test(&self, produced: &[u8], expected: &[u8]) -> bool {
+        let produced_str = match std::str::from_utf8(produced) {
+            Ok(s) => s.trim(),
+            Err(_) => return false,
+        };
+        let expected_str = match std::str::from_utf8(expected) {
+            Ok(s) => s.trim(),
+            Err(_) => return false,
+        };
+
+        if produced_str.is_empty() || expected_str.is_empty() {
+            return false;
+        }
+
+        let rt_error = RUNTIME_ERRORS
+            .iter()
+            .find(|e| expected_str.contains(**e))
+            .copied();
+        let did_raise_rt = RUNTIME_ERRORS
+            .iter()
+            .any(|e| produced_str.contains(e));
+
+        if did_raise_rt {
+            if let Some(rt_err) = rt_error {
+                let pattern = format!(r"{}(\s+on\s+Line\s+\d+)?(:.+)?", rt_err);
+                let re = Regex::new(&pattern).unwrap();
+                re.is_match(produced_str) && re.is_match(expected_str)
+            } else {
+                false
+            }
+        } else {
+            let prod_error = ERROR_KIND_RE.captures(produced_str);
+            let exp_error = ERROR_KIND_RE.captures(expected_str);
+            let prod_line = ERROR_LINE_RE.captures(produced_str);
+            let exp_line = ERROR_LINE_RE.captures(expected_str);
+            match (prod_error, exp_error, prod_line, exp_line) {
+                (Some(_), Some(_), Some(pl), Some(el)) => {
+                    pl.get(1).map(|m| m.as_str()) == el.get(1).map(|m| m.as_str())
+                }
+                _ => false,
+            }
+        }
+    }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use std::path::PathBuf;
+
+    use crate::config::{load_config, Config};
+    use super::ToolChainRunner;
+
+    fn configs_dir() -> PathBuf {
+        PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests").join("configs")
+    }
+
+    fn config_path(name: &str) -> PathBuf {
+        configs_dir().join(name)
+    }
+
+    fn create_config(name: &str) -> Config {
+        let path = config_path(name);
+        load_config(&path, None).expect("config should load")
+    }
+
+    fn _assert_send_sync() {
+        fn check<T: Send + Sync>() {}
+        check::<ToolChainRunner<'_>>();
+    }
+
+    fn run_tests_for_config(config: &Config, expected_result: bool) {
+        for exe in &config.executables {
+            for tc in &config.toolchains {
+                let runner = ToolChainRunner::new(tc, 10.0)
+                    .with_env(exe.runtime_env());
+                for pkg in &config.packages {
+                    for spkg in &pkg.subpackages {
+                        for test in &spkg.tests {
+                            let result = runner.run(test, exe);
+                            if result.skipped {
+                                continue;
+                            }
+                            assert_eq!(
+                                result.did_pass, expected_result,
+                                "Test {} expected {} but got {}",
+                                test.file,
+                                if expected_result { "PASS" } else { "FAIL" },
+                                if result.did_pass { "PASS" } else { "FAIL" },
+                            );
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_gcc_pass() {
+        let config = create_config("gccPassConfig.json");
+
+        run_tests_for_config(&config, true);
+    }
+
+    #[test]
+    fn test_gcc_fail() {
+        let config = create_config("gccFailConfig.json");
+
+        run_tests_for_config(&config, false);
+    }
+
+    fn valgrind_available() -> bool {
+        std::process::Command::new("valgrind")
+            .arg("--version")
+            .stdout(std::process::Stdio::null())
+            .stderr(std::process::Stdio::null())
+            .status()
+            .is_ok_and(|s: std::process::ExitStatus| s.success())
+    }
+
+    /// Memcheck wrapping works on gccPassConfig — runner still produces results.
+    #[test]
+    fn test_memcheck_clean_programs() {
+        if !valgrind_available() {
+            eprintln!("skipping: valgrind not found");
+            return;
+        }
+        let config = create_config("gccPassConfig.json");
+
+        let mut ran_any = false;
+        for exe in &config.executables {
+            for tc in &config.toolchains {
+                let runner = ToolChainRunner::new(tc, 10.0)
+                    .with_env(exe.runtime_env())
+                    .with_memcheck(true);
+                for pkg in &config.packages {
+                    for spkg in &pkg.subpackages {
+                        for test in &spkg.tests {
+                            let result = runner.run(test, exe);
+                            ran_any = true;
+                            // Tests that don't leak should still pass and not flag a leak
+                            if !test.file.contains("memleak") {
+                                assert!(
+                                    !result.memory_leak,
+                                    "Non-leaky test {} should not flag memory leak",
+                                    test.file,
+                                );
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        assert!(ran_any, "should have run at least one test");
+    }
+
+    /// Memcheck on MemoryLeaks package — leaky programs should be flagged.
+    #[test]
+    fn test_memcheck_detects_leaks() {
+        if !valgrind_available() {
+            eprintln!("skipping: valgrind not found");
+            return;
+        }
+        let config = create_config("gccMemcheckConfig.json");
+
+        for exe in &config.executables {
+            for tc in &config.toolchains {
+                let runner = ToolChainRunner::new(tc, 10.0)
+                    .with_env(exe.runtime_env())
+                    .with_memcheck(true);
+                for pkg in &config.packages {
+                    for spkg in &pkg.subpackages {
+                        for test in &spkg.tests {
+                            let result = runner.run(test, exe);
+                            if test.path.to_string_lossy().contains("leaky") {
+                                assert!(
+                                    result.memory_leak,
+                                    "Leaky test {} should be detected as memory leak",
+                                    test.file,
+                                );
+                            } else if test.path.to_string_lossy().contains("safe") && test.file.contains("001_safe") {
+                                assert!(
+                                    !result.memory_leak,
+                                    "Safe test {} should not have memory leak",
+                                    test.file,
+                                );
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_runtime_gcc_toolchain() {
+        let tests_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests");
+        let compile_script = tests_dir.join("scripts/test-scripts/compile_lib.py");
+        let lib_src_dir = tests_dir.join("lib/src");
+        let lib_out_dir = tests_dir.join("lib");
+
+        assert!(compile_script.exists(), "missing compile_lib.py");
+
+        let (lib_name, config_name) = if cfg!(target_os = "macos") {
+            ("lib/libfib.dylib", "runtimeConfigDarwin.json")
+        } else {
+            ("lib/libfib.so", "runtimeConfigLinux.json")
+        };
+        let expected_lib = tests_dir.join(lib_name);
+        if !expected_lib.exists() {
+            let status = std::process::Command::new("python3")
+                .args([
+                    compile_script.to_str().unwrap(),
+                    lib_src_dir.to_str().unwrap(),
+                    lib_out_dir.to_str().unwrap(),
+                ])
+                .status()
+                .expect("failed to run compile_lib.py");
+            assert!(status.success(), "shared object compilation failed");
+            assert!(expected_lib.exists(), "failed to create shared object");
+        }
+
+        let path = config_path(config_name);
+        let config = load_config(&path, None).expect("config should load");
+
+        run_tests_for_config(&config, true);
+    }
+}
diff --git a/src/script.rs b/src/script.rs
new file mode 100644
index 0000000..28be54b
--- /dev/null
+++ b/src/script.rs
@@ -0,0 +1,63 @@
+use std::path::PathBuf;
+use std::process::Command;
+
+/// Directory containing grading scripts.
+/// Uses CARGO_MANIFEST_DIR baked in at compile time, so it works for both
+/// `cargo run` and `cargo install --path .` (as long as the source tree remains).
+/// Override with DRAGON_RUNNER_SCRIPTS env var if needed.
+fn scripts_dir() -> PathBuf {
+    if let Ok(dir) = std::env::var("DRAGON_RUNNER_SCRIPTS") {
+        return PathBuf::from(dir);
+    }
+    PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("scripts")
+}
+
+/// (CLI name, Python module filename) for each available script.
+const SCRIPTS: &[(&str, &str)] = &[
+    ("add_empty", "add_empty.py"),
+    ("build", "build.py"),
+    ("clean-build", "clean_build.py"),
+    ("checkout", "checkout.py"),
+    ("gather", "gather.py"),
+    ("gen-config", "gen_config.py"),
+    ("grade", "grade.py"),
+    ("grade-perf", "grade_perf.py"),
+];
+
+pub fn run_script(args: Vec<String>) -> i32 {
+    if args.is_empty() {
+        eprintln!("Available scripts:");
+        for (name, _) in SCRIPTS {
+            eprintln!("  {}", name);
+        }
+        return 1;
+    }
+
+    let script_name = &args[0];
+    let module = match SCRIPTS.iter().find(|(name, _)| name == script_name) {
+        Some((_, m)) => m,
+        None => {
+            eprintln!("Unknown script: {}", script_name);
+            return 1;
+        }
+    };
+
+    let script_path = scripts_dir().join(module);
+    if !script_path.exists() {
+        eprintln!("Script file not found: {}", script_path.display());
+        return 1;
+    }
+
+    let status = Command::new("python3")
+        .arg(&script_path)
+        .args(&args[1..])
+        .status();
+
+    match status {
+        Ok(s) => s.code().unwrap_or(1),
+        Err(e) => {
+            eprintln!("Failed to run script: {}", e);
+            1
+        }
+    }
+}
diff --git a/src/server/index.html b/src/server/index.html
new file mode 100644
index 0000000..ea84480
--- /dev/null
+++ b/src/server/index.html
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html><head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<title>dragon-runner</title>
+<style>
+*{box-sizing:border-box;margin:0;padding:0}
+body{font:13px/1.4 monospace;background:#1a1a2e;color:#e0e0e0;padding:1.5rem;max-width:720px}
+h1{font-size:1.1rem;margin-bottom:.8rem;color:#7fdbca}
+select,textarea,button{font:inherit;background:#16213e;color:#e0e0e0;border:1px solid #334;border-radius:3px;padding:.3rem .4rem}
+textarea{resize:vertical;tab-size:4}
+.row textarea{width:220px}
+#code{width:100%;margin-top:.2rem}
+button{background:#0f3460;cursor:pointer;padding:.3rem 1rem}
+button:hover{background:#1a5276}
+button:disabled{opacity:.5;cursor:wait}
+.row{display:flex;gap:.5rem;margin-bottom:.4rem;align-items:baseline}
+.row label{min-width:5.5rem;color:#666;font-size:12px}
+#out{margin-top:.8rem;white-space:pre-wrap;background:#0f0f23;padding:.6rem;border-radius:3px;font-size:12px;line-height:1.35}
+.pass{color:#50fa7b}.fail{color:#ff5555}.dim{color:#555}
+</style>
+</head><body>
+<h1>dragon-runner</h1>
+<div class="row"><label>toolchain</label><select id="tc"></select></div>
+<div class="row"><label>executable</label><select id="exe"></select></div>
+<div class="row"><label>stdin</label><textarea id="sin" rows="1" placeholder="(optional)"></textarea></div>
+<div class="row"><label>expected</label><textarea id="exp" rows="1" placeholder="(optional)"></textarea></div>
+<div class="row"><label>code</label></div>
+<textarea id="code" rows="12" placeholder="enter source code..." spellcheck="false"></textarea>
+<div class="row" style="margin-top:.4rem"><button id="btn" onclick="go()">Run</button><span id="st" class="dim"></span></div>
+<div id="out"></div>
+<script>
+let B = s => btoa(s), D = s => atob(s)
+let $ = id => document.getElementById(id)
+
+fetch("/api/info").then(r => r.json()).then(d => {
+  d.toolchains.forEach(t => { let o = new Option(t.name); $("tc").add(o) })
+  d.executables.forEach(e => { let o = new Option(e.id); $("exe").add(o) })
+  $("st").textContent = d.config_name
+})
+
+async function go () {
+  let b = $("btn"); b.disabled = true; $("st").textContent = "running..."; $("st").className = ""
+  let body = { toolchain: $("tc").value, executable: $("exe").value, code: B($("code").value) }
+  let s = $("sin").value; if (s) body.stdin = B(s)
+  let e = $("exp").value; if (e) body.expected_output = B(e)
+  try {
+    let r = await fetch("/api/run", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body) })
+    let d = await r.json()
+    if (!r.ok) { $("out").textContent = d.error; $("st").textContent = "error"; $("st").className = "fail"; return }
+    let p = d.passed, t = d.time_secs?.toFixed(3) + "s"
+    $("st").textContent = (p ? "PASS" : "FAIL") + " " + t
+    $("st").className = p ? "pass" : "fail"
+    let lines = []
+    d.steps.forEach(s => lines.push(`[${s.exit_status}] ${s.name} (${s.time_secs.toFixed(3)}s)`))
+    if (d.stdout) lines.push("\n--- stdout ---\n" + D(d.stdout))
+    if (d.stderr && D(d.stderr)) lines.push("\n--- stderr ---\n" + D(d.stderr))
+    if (d.timed_out) lines.push("\nTIMEOUT at: " + d.failing_step)
+    if (d.failing_step && !d.timed_out) lines.push("\nfailed at: " + d.failing_step)
+    $("out").textContent = lines.join("\n")
+  } catch (err) { $("out").textContent = err; $("st").textContent = "error"; $("st").className = "fail" }
+  finally { b.disabled = false }
+}
+
+$("code").addEventListener("keydown", e => {
+  if (e.key === "Tab") { e.preventDefault(); let t = e.target, s = t.selectionStart; t.value = t.value.slice(0, s) + "\t" + t.value.slice(t.selectionEnd); t.selectionStart = t.selectionEnd = s + 1 }
+})
+</script>
+</body></html>
diff --git a/src/server/mod.rs b/src/server/mod.rs
new file mode 100644
index 0000000..a57fc95
--- /dev/null
+++ b/src/server/mod.rs
@@ -0,0 +1,290 @@
+use std::sync::Arc;
+
+use axum::extract::State;
+use axum::http::StatusCode;
+use axum::response::Html;
+use axum::routing::{get, post};
+use axum::{Json, Router};
+use base64::engine::general_purpose::STANDARD as B64;
+use base64::Engine;
+use serde::{Deserialize, Serialize};
+use tokio::sync::Semaphore;
+use tower_http::cors::CorsLayer;
+
+use crate::config::{Config, Executable};
+use crate::runner::ToolChainRunner;
+use crate::testfile::TestFile;
+use crate::toolchain::ToolChain;
+
+struct AppState {
+    config: Config,
+    timeout: f64,
+    run_semaphore: Semaphore,
+}
+
+// ---------------------------------------------------------------------------
+// Request / Response types
+// ---------------------------------------------------------------------------
+
+#[derive(Deserialize)]
+struct RunRequest {
+    toolchain: String,
+    executable: String,
+    code: String,
+    stdin: Option<String>,
+    expected_output: Option<String>,
+}
+
+#[derive(Serialize)]
+struct RunResponse {
+    passed: bool,
+    exit_status: i32,
+    stdout: String,
+    stderr: String,
+    time_secs: Option<f64>,
+    timed_out: bool,
+    error_test: bool,
+    failing_step: Option<String>,
+    steps: Vec<StepInfo>,
+}
+
+#[derive(Serialize)]
+struct StepInfo {
+    name: String,
+    exit_status: i32,
+    time_secs: f64,
+}
+
+#[derive(Serialize)]
+struct InfoResponse {
+    config_name: String,
+    toolchains: Vec<ToolchainInfo>,
+    executables: Vec<ExecutableInfo>,
+    packages: Vec<PackageInfo>,
+}
+
+#[derive(Serialize)]
+struct ToolchainInfo {
+    name: String,
+    num_steps: usize,
+}
+
+#[derive(Serialize)]
+struct ExecutableInfo {
+    id: String,
+    path: String,
+}
+
+#[derive(Serialize)]
+struct PackageInfo {
+    name: String,
+    num_tests: usize,
+}
+
+#[derive(Serialize)]
+struct ErrorResponse {
+    error: String,
+}
+
+fn error_json(status: StatusCode, msg: impl Into<String>) -> (StatusCode, Json<ErrorResponse>) {
+    (status, Json(ErrorResponse { error: msg.into() }))
+}
+
+// ---------------------------------------------------------------------------
+// Handlers
+// ---------------------------------------------------------------------------
+
+async fn index() -> Html<&'static str> {
+    Html(include_str!("index.html"))
+}
+
+async fn health() -> &'static str {
+    "OK"
+}
+
+async fn info(State(state): State<Arc<AppState>>) -> Json<InfoResponse> {
+    let cfg = &state.config;
+    Json(InfoResponse {
+        config_name: cfg.name.clone(),
+        toolchains: cfg
+            .toolchains
+            .iter()
+            .map(|tc| ToolchainInfo {
+                name: tc.name.clone(),
+                num_steps: tc.len(),
+            })
+            .collect(),
+        executables: cfg
+            .executables
+            .iter()
+            .map(|e| ExecutableInfo {
+                id: e.id.clone(),
+                path: e.exe_path.display().to_string(),
+            })
+            .collect(),
+        packages: cfg
+            .packages
+            .iter()
+            .map(|p| PackageInfo {
+                name: p.name.clone(),
+                num_tests: p.n_tests,
+            })
+            .collect(),
+    })
+}
+
+async fn run(
+    State(state): State<Arc<AppState>>,
+    Json(req): Json<RunRequest>,
+) -> Result<Json<RunResponse>, (StatusCode, Json<ErrorResponse>)> {
+    // Look up toolchain by name
+    let tc: ToolChain = state
+        .config
+        .toolchains
+        .iter()
+        .find(|tc| tc.name == req.toolchain)
+        .cloned()
+        .ok_or_else(|| error_json(StatusCode::BAD_REQUEST, format!("unknown toolchain: {}", req.toolchain)))?;
+
+    // Look up executable by id
+    let exe: Executable = state
+        .config
+        .executables
+        .iter()
+        .find(|e| e.id == req.executable)
+        .cloned()
+        .ok_or_else(|| error_json(StatusCode::BAD_REQUEST, format!("unknown executable: {}", req.executable)))?;
+
+    // Decode source code
+    let code_bytes = B64.decode(&req.code)
+        .map_err(|e| error_json(StatusCode::BAD_REQUEST, format!("invalid base64 in code: {e}")))?;
+
+    // Decode optional stdin
+    let stdin_bytes = req.stdin
+        .as_ref()
+        .map(|s| B64.decode(s))
+        .transpose()
+        .map_err(|e| error_json(StatusCode::BAD_REQUEST, format!("invalid base64 in stdin: {e}")))?;
+
+    // Decode optional expected_output
+    let expected_bytes = req.expected_output
+        .as_ref()
+        .map(|s| B64.decode(s))
+        .transpose()
+        .map_err(|e| error_json(StatusCode::BAD_REQUEST, format!("invalid base64 in expected_output: {e}")))?;
+
+    // Acquire semaphore permit for backpressure
+    let _permit = state.run_semaphore.acquire().await
+        .map_err(|_| error_json(StatusCode::SERVICE_UNAVAILABLE, "server shutting down"))?;
+
+    let timeout = state.timeout;
+
+    // Run the toolchain in a blocking task
+    let result = tokio::task::spawn_blocking(move || {
+        // Write code to a temp file
+        let tmp = tempfile::Builder::new()
+            .suffix(".test")
+            .tempfile()
+            .map_err(|e| format!("failed to create temp file: {e}"))?;
+
+        {
+            use std::io::Write;
+            let mut f = tmp.as_file();
+            f.write_all(&code_bytes)
+                .map_err(|e| format!("failed to write temp file: {e}"))?;
+        }
+
+        // Build TestFile from the temp path
+        let mut test = TestFile::new(tmp.path());
+
+        // Override directives if provided in the request
+        if let Some(input) = stdin_bytes {
+            test.input_stream = Ok(input);
+        }
+        if let Some(expected) = expected_bytes {
+            test.expected_out = Ok(expected);
+        }
+
+        let test = Arc::new(test);
+
+        let runner = ToolChainRunner::new(&tc, timeout)
+            .with_env(exe.runtime_env());
+
+        let result = runner.run(&test, &exe);
+        Ok::<_, String>(result)
+    })
+    .await
+    .map_err(|e| error_json(StatusCode::INTERNAL_SERVER_ERROR, format!("task panicked: {e}")))?
+    .map_err(|e| error_json(StatusCode::INTERNAL_SERVER_ERROR, e))?;
+
+    // Build step info from command history
+    let steps: Vec<StepInfo> = result
+        .command_history
+        .iter()
+        .map(|cr| StepInfo {
+            name: cr.cmd.clone(),
+            exit_status: cr.exit_status,
+            time_secs: cr.time,
+        })
+        .collect();
+
+    let last_exit = result
+        .command_history
+        .last()
+        .map(|cr| cr.exit_status)
+        .unwrap_or(0);
+
+    let stdout_b64 = result
+        .gen_output
+        .as_deref()
+        .map(|b| B64.encode(b))
+        .unwrap_or_default();
+
+    let stderr_b64 = result
+        .command_history
+        .last()
+        .map(|cr| B64.encode(&cr.stderr))
+        .unwrap_or_default();
+
+    Ok(Json(RunResponse {
+        passed: result.did_pass,
+        exit_status: last_exit,
+        stdout: stdout_b64,
+        stderr: stderr_b64,
+        time_secs: result.time,
+        timed_out: result.did_timeout,
+        error_test: result.error_test,
+        failing_step: result.failing_step,
+        steps,
+    }))
+}
+
+// ---------------------------------------------------------------------------
+// Server entrypoint
+// ---------------------------------------------------------------------------
+
+pub async fn run_server(config: Config, bind: &str, timeout: f64, max_concurrent: usize) {
+    let state = Arc::new(AppState {
+        config,
+        timeout,
+        run_semaphore: Semaphore::new(max_concurrent),
+    });
+
+    let app = Router::new()
+        .route("/", get(index))
+        .route("/health", get(health))
+        .route("/api/info", get(info))
+        .route("/api/run", post(run))
+        .layer(CorsLayer::permissive())
+        .with_state(state);
+
+    let listener = tokio::net::TcpListener::bind(bind)
+        .await
+        .unwrap_or_else(|e| panic!("failed to bind to {bind}: {e}"));
+
+    println!("dragon-runner server listening on {bind}");
+
+    axum::serve(listener, app)
+        .await
+        .expect("server error");
+}
diff --git a/src/testfile.rs b/src/testfile.rs
new file mode 100644
index 0000000..2f717f2
--- /dev/null
+++ b/src/testfile.rs
@@ -0,0 +1,177 @@
+use std::fs;
+use std::io::{self, BufRead};
+use std::path::{Path, PathBuf};
+
+use crate::error::{DragonError, Validate};
+use crate::util::str_to_bytes;
+
+/// Result of parsing a directive — either successfully read bytes, or a structured error.
+pub type DirectiveResult = Result<Vec<u8>, DragonError>;
+
+/// Recognized directives that can appear in test files.
+pub enum Directive {
+    Check,
+    CheckFile,
+    Input,
+    InputFile,
+    Skip,
+}
+
+impl Directive {
+    /// The string tag to scan for in test file comments.
+    pub fn tag(&self) -> &'static str {
+        match self {
+            Directive::Check => "CHECK:",
+            Directive::CheckFile => "CHECK_FILE:",
+            Directive::Input => "INPUT:",
+            Directive::InputFile => "INPUT_FILE:",
+            Directive::Skip => "SKIP",
+        }
+    }
+}
+
+/// Represents a single test case file with parsed directives.
+#[derive(Debug, Clone)]
+pub struct TestFile {
+    pub path: PathBuf,
+    pub stem: String,
+    pub extension: String,
+    pub file: String,
+    pub comment_syntax: String,
+    pub expected_out: DirectiveResult,
+    pub input_stream: DirectiveResult,
+    pub skip: bool,
+}
+
+impl TestFile {
+    pub fn new(test_path: &Path) -> Self {
+        let stem = test_path.file_stem().unwrap_or_default().to_string_lossy().into_owned();
+        let extension = test_path
+            .extension()
+            .map(|e| format!(".{}", e.to_string_lossy()))
+            .unwrap_or_default();
+        let file = format!("{stem}{extension}");
+        let comment_syntax = "//".to_string();
+
+        let expected_out = Self::resolve_directive(
+            test_path, &comment_syntax,
+            Directive::Check.tag(), Directive::CheckFile.tag(),
+        );
+        let input_stream = Self::resolve_directive(
+            test_path, &comment_syntax,
+            Directive::Input.tag(), Directive::InputFile.tag(),
+        );
+        let skip = Self::parse_directive(test_path, &comment_syntax, Directive::Skip.tag()).is_some();
+
+        Self { path: test_path.into(), stem, extension, file, comment_syntax, expected_out, input_stream, skip }
+    }
+
+    pub fn get_expected_out(&self) -> &[u8] {
+        self.expected_out.as_deref().unwrap_or(b"")
+    }
+
+    pub fn get_input_stream(&self) -> &[u8] {
+        self.input_stream.as_deref().unwrap_or(b"")
+    }
+
+    /// Resolve inline vs file directives into final byte content.
+    fn resolve_directive(
+        test_path: &Path,
+        comment_syntax: &str,
+        inline_dir: &str,
+        file_dir: &str,
+    ) -> DirectiveResult {
+        let inline = Self::parse_directive(test_path, comment_syntax, inline_dir);
+        let file_ref = Self::parse_directive(test_path, comment_syntax, file_dir);
+
+        // Transpose Option<Result> → Result<Option> so we can use `?` for errors.
+        let inline = inline.transpose()?;
+        let file_ref = file_ref.transpose()?;
+
+        match (inline, file_ref) {
+            (Some(_), Some(_)) => Err(DragonError::DirectiveConflict {
+                test: test_path.file_name().unwrap_or_default().to_string_lossy().into_owned(),
+                inline: inline_dir.into(),
+                file_dir: file_dir.into(),
+            }),
+            (Some(bytes), _) => Ok(bytes),
+            (None, Some(ref_bytes)) => Self::read_referenced_file(test_path, file_dir, &ref_bytes),
+            (None, None) => Ok(Vec::new()),
+        }
+    }
+
+    /// Given file-reference bytes from a FILE directive, resolve and read the target file.
+    fn read_referenced_file(test_path: &Path, directive: &str, ref_bytes: &[u8]) -> DirectiveResult {
+        let file_str = String::from_utf8_lossy(ref_bytes).trim().to_string();
+        let parent = test_path.parent().unwrap_or(Path::new(""));
+        let full_path = parent.join(&file_str);
+
+        if !full_path.exists() {
+            return Err(DragonError::ReferencedFileNotFound {
+                path: full_path,
+                directive: directive.into(),
+                test: test_path.into(),
+            });
+        }
+
+        fs::read(&full_path)
+            .map_err(|_| DragonError::ReferencedFileRead { path: full_path })
+    }
+
+    /// Scan a test file for lines matching `// DIRECTIVE:value` and collect the values.
+    /// Returns None if no matches found.
+    fn parse_directive(
+        test_path: &Path,
+        comment_syntax: &str,
+        directive: &str,
+    ) -> Option<DirectiveResult> {
+        let file = match fs::File::open(test_path) {
+            Ok(f) => f,
+            Err(_) => return Some(Err(DragonError::TestFileRead { path: test_path.into() })),
+        };
+
+        let values: Result<Vec<Vec<u8>>, DragonError> = io::BufReader::new(file)
+            .lines()
+            .map(|line| line.map_err(|_| DragonError::TestFileRead { path: test_path.into() }))
+            .filter_map(|line| {
+                let line = match line {
+                    Ok(l) => l,
+                    Err(e) => return Some(Err(e)),
+                };
+                let comment_pos = line.find(comment_syntax)?;
+                let directive_pos = line.find(directive)?;
+                if comment_pos > directive_pos {
+                    return None;
+                }
+                let (_, rhs) = line.split_once(directive)?;
+                Some(Ok(str_to_bytes(rhs, true)))
+            })
+            .collect();
+
+        match values {
+            Err(e) => Some(Err(e)),
+            Ok(parts) if parts.is_empty() => None,
+            Ok(parts) => Some(Ok(parts.join(&b'\n'))),
+        }
+    }
+
+    /// Check if a path is a valid test file (not hidden, not .out/.ins extension).
+    pub fn is_test(path: &Path) -> bool {
+        path.is_file()
+            && !path.file_name().unwrap_or_default().to_string_lossy().starts_with('.')
+            && !matches!(
+                path.extension().and_then(|e| e.to_str()),
+                Some("out" | "ins")
+            )
+    }
+}
+
+impl Validate for TestFile {
+    fn validate(&self) -> Vec<DragonError> {
+        [&self.expected_out, &self.input_stream]
+            .into_iter()
+            .filter_map(|r| r.as_ref().err())
+            .cloned()
+            .collect()
+    }
+}
diff --git a/src/toolchain.rs b/src/toolchain.rs
new file mode 100644
index 0000000..f3db041
--- /dev/null
+++ b/src/toolchain.rs
@@ -0,0 +1,93 @@
+use std::path::Path;
+
+use serde::Deserialize;
+
+use crate::config::Executable;
+use crate::error::{DragonError, Validate};
+use crate::runner::MagicArg;
+
+/// A single step in a toolchain (e.g., compile, link, run).
+#[derive(Debug, Clone, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Step {
+    #[serde(rename = "exe", default)]
+    pub exe_raw: String,
+    #[serde(default)]
+    pub args: Vec<String>,
+    #[serde(default)]
+    pub allow_error: bool,
+    #[serde(rename = "usesInStr", default)]
+    pub uses_ins: bool,
+    #[serde(default)]
+    pub uses_runtime: bool,
+}
+
+impl Step {
+    /// Derive a human-readable step name from the raw exe string and the executable.
+    pub fn display_name(&self, exe: &Executable) -> String {
+        match self.exe_raw.as_str() {
+            s if s == MagicArg::Exe.pattern() => exe.id.clone(),
+            s if s == MagicArg::Input.pattern() => "run".to_string(),
+            other => {
+                // Use filename component for paths, bare name as-is
+                Path::new(other)
+                    .file_name()
+                    .unwrap_or(other.as_ref())
+                    .to_string_lossy()
+                    .into_owned()
+            }
+        }
+    }
+}
+
+impl Validate for Step {
+    fn validate(&self) -> Vec<DragonError> {
+        let mut errors = Vec::new();
+        if self.exe_raw.is_empty() {
+            errors.push(DragonError::MissingField {
+                field: "exe".into(),
+                context: "Step".into(),
+            });
+        } else if !self.exe_raw.starts_with('$') && self.exe_raw.contains('/') {
+            // Only check existence for paths (containing /), not bare names resolved via $PATH
+            if !Path::new(&self.exe_raw).exists() {
+                errors.push(DragonError::MissingFile {
+                    path: self.exe_raw.clone().into(),
+                    context: "Step".into(),
+                });
+            }
+        }
+        errors
+    }
+}
+
+/// An ordered sequence of Steps that form a compilation/execution pipeline.
+#[derive(Debug, Clone)]
+pub struct ToolChain {
+    pub name: String,
+    pub steps: Vec<Step>,
+}
+
+impl ToolChain {
+    pub fn new(name: &str, steps: Vec<Step>) -> Self {
+        Self { name: name.into(), steps }
+    }
+
+    pub fn len(&self) -> usize {
+        self.steps.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.steps.is_empty()
+    }
+
+    pub fn iter(&self) -> std::slice::Iter<'_, Step> {
+        self.steps.iter()
+    }
+}
+
+impl Validate for ToolChain {
+    fn validate(&self) -> Vec<DragonError> {
+        self.steps.iter().flat_map(|s| s.validate()).collect()
+    }
+}
diff --git a/src/util.rs b/src/util.rs
new file mode 100644
index 0000000..5be4780
--- /dev/null
+++ b/src/util.rs
@@ -0,0 +1,66 @@
+use std::path::{Path, PathBuf};
+use std::fs;
+use std::io::Write;
+use std::os::unix::fs::PermissionsExt;
+
+/// Resolve a relative path against an absolute path.
+/// If abs_path points to a file, resolve relative to its parent directory.
+pub fn resolve_relative(relative_dir: &Path, abs_path: &Path) -> PathBuf {
+    let base = if abs_path.is_file() {
+        abs_path.parent().unwrap_or(abs_path)
+    } else {
+        abs_path
+    };
+    base.join(relative_dir)
+}
+
+/// Convert a string to bytes, optionally chopping trailing newline.
+pub fn str_to_bytes(s: &str, chop_newline: bool) -> Vec<u8> {
+    let s = if chop_newline && s.ends_with('\n') {
+        &s[..s.len() - 1]
+    } else {
+        s
+    };
+    s.as_bytes().to_vec()
+}
+
+
+/// Create a temporary file with the given content and execute permissions.
+/// Returns (path_string, handle). The caller must keep the handle alive
+/// for as long as the temp file is needed — it is deleted on drop.
+pub fn make_tmp_file(content: &[u8]) -> Option<(PathBuf, tempfile::TempPath)> {
+    let mut tmp = tempfile::NamedTempFile::new().ok()?;
+    tmp.write_all(content).ok()?;
+    let path = tmp.into_temp_path();
+    let perms = fs::Permissions::from_mode(0o700);
+    fs::set_permissions(&path, perms).ok()?;
+    let path_buf = path.to_path_buf();
+    Some((path_buf, path))
+}
+
+/// Create an empty temporary file with execute permissions.
+/// Returns (path_buf, handle). Caller must keep the handle alive — deleted on drop.
+pub fn make_empty_tmp_file() -> Option<(PathBuf, tempfile::TempPath)> {
+    let tmp = tempfile::NamedTempFile::new().ok()?;
+    let path = tmp.into_temp_path();
+    let perms = fs::Permissions::from_mode(0o700);
+    fs::set_permissions(&path, perms).ok()?;
+    let path_buf = path.to_path_buf();
+    Some((path_buf, path))
+}
+
+/// Truncate bytes in the middle if they exceed max_bytes.
+pub fn truncated_bytes(data: &[u8], max_bytes: usize) -> Vec<u8> {
+    if data.len() <= max_bytes {
+        return data.to_vec();
+    }
+    let omission = b"\n{{ omitted for brevity }}\n";
+    let available = max_bytes.saturating_sub(omission.len());
+    let half = available / 2;
+    let mut result = Vec::with_capacity(max_bytes);
+    result.extend_from_slice(&data[..half]);
+    result.extend_from_slice(omission);
+    result.extend_from_slice(&data[data.len() - half..]);
+    result
+}
+
diff --git a/tests/configs/ConfigGrade.json b/tests/configs/ConfigGrade.json
index 8dd0b96..c2ce41a 100644
--- a/tests/configs/ConfigGrade.json
+++ b/tests/configs/ConfigGrade.json
@@ -6,39 +6,39 @@
     "team3": "/usr/bin/clang",
     "TA": "/usr/bin/clang"
   },
-  "solutionExecutable": "TA",
   "toolchains": {
     "LLVM": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "test.o",
-        "allowError": true 
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
+        "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
     ],
     "LLVM-opt": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-O3", "-o", "$OUTPUT"],
-        "output": "test.o",
-        "allowError": true 
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-O3",
+          "-o",
+          "$OUTPUT"
+        ],
+        "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
-    ] 
+    ]
   }
 }
diff --git a/tests/configs/VCalcCompileConfig.json b/tests/configs/VCalcCompileConfig.json
index efd665b..0cfe60e 100644
--- a/tests/configs/VCalcCompileConfig.json
+++ b/tests/configs/VCalcCompileConfig.json
@@ -6,34 +6,40 @@
   "runtimes": {
     "solution": "/home/justin/CDOL/Solutions/VCalc24Solution/bin/libvcalcrt.so"
   },
-  "solutionExecutable": "solution",
   "toolchains": {
     "vcalc-llc": [
       {
-        "stepName": "vcalc",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "$OUTPUT"],
-        "output": "vcalc.ll"
-      }, 
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "$OUTPUT"
+        ]
+      },
       {
-        "stepName": "llc",
-        "executablePath": "/home/justin/install/llvm/llvm-18/bin/llc",
-        "arguments": ["-filetype=obj", "-relocation-model=pic", "$INPUT", "-o", "$OUTPUT"],
-        "output": "vcalc.o"
+        "exe": "/home/justin/install/llvm/llvm-18/bin/llc",
+        "args": [
+          "-filetype=obj",
+          "-relocation-model=pic",
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ]
       },
       {
-        "stepName": "clang",
-        "executablePath": "/usr/bin/clang",
-        "arguments": ["$INPUT", "-o", "$OUTPUT", "-L$RT_PATH", "-l$RT_LIB"],
-        "output": "vcalc"
+        "exe": "/usr/bin/clang",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT",
+          "-L$RT_PATH",
+          "-l$RT_LIB"
+        ]
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "usesRuntime": true
       }
-    ] 
+    ]
   }
 }
diff --git a/tests/configs/ValgrindConfig.json b/tests/configs/ValgrindConfig.json
index 4748213..4d570d8 100644
--- a/tests/configs/ValgrindConfig.json
+++ b/tests/configs/ValgrindConfig.json
@@ -7,25 +7,25 @@
   "toolchains": {
     "LLVM": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "prog.o",
-        "allowError": true 
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
+        "allowError": true
       },
       {
-	"stepName": "valgrind",
-        "executablePath": "/usr/bin/valgrind",
-        "arguments": [
-	  "--leak-check=full",
-	  "--error-exitcode=111",
-	  "--log-file=/dev/null",
-	  "$INPUT"
-	],
+        "exe": "/usr/bin/valgrind",
+        "args": [
+          "--leak-check=full",
+          "--error-exitcode=111",
+          "--log-file=/dev/null",
+          "$INPUT"
+        ],
         "usesInStr": true,
         "allowError": true
       }
-    ] 
+    ]
   }
 }
-
diff --git a/tests/configs/ValgrindGazpreaConfig.json b/tests/configs/ValgrindGazpreaConfig.json
index 8630c3b..9fcdc68 100644
--- a/tests/configs/ValgrindGazpreaConfig.json
+++ b/tests/configs/ValgrindGazpreaConfig.json
@@ -4,39 +4,47 @@
     "<team_name>": "<path_to_gazc>"
   },
   "runtimes": {
-    "<team_name>":"<path_to_libgazrt.so>"
+    "<team_name>": "<path_to_libgazrt.so>"
   },
-  "solutionExecutable": "solution",
   "toolchains": {
     "gazprea-llc": [
       {
-        "stepName": "gazprea",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "$OUTPUT"],
-        "output": "gaz.ll",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "llc",
-        "executablePath": "/cshome/cmput415/415-resources/llvm-project/build/bin/llc",
-        "arguments": ["-filetype=obj", "-relocation-model=pic", "$INPUT", "-o", "$OUTPUT"],
-        "output": "gaz.o"
+        "exe": "/cshome/cmput415/415-resources/llvm-project/build/bin/llc",
+        "args": [
+          "-filetype=obj",
+          "-relocation-model=pic",
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ]
       },
       {
-        "stepName": "clang",
-        "executablePath": "/usr/bin/clang",
-        "arguments": ["$INPUT", "-o", "$OUTPUT", "-L$RT_PATH", "-l$RT_LIB", "-lm"],
-        "output": "gaz"
+        "exe": "/usr/bin/clang",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT",
+          "-L$RT_PATH",
+          "-l$RT_LIB",
+          "-lm"
+        ]
       },
       {
-          "stepName": "valgrind",
-        "executablePath": "/usr/bin/valgrind",
-        "arguments": [
-                "--leak-check=full",
-                "--error-exitcode=111",
-                "--log-file=/dev/null",
-                "$INPUT"
-              ],
+        "exe": "/usr/bin/valgrind",
+        "args": [
+          "--leak-check=full",
+          "--error-exitcode=111",
+          "--log-file=/dev/null",
+          "$INPUT"
+        ],
         "usesInStr": true,
         "usesRuntime": true,
         "allowError": true
@@ -44,4 +52,3 @@
     ]
   }
 }
-
diff --git a/tests/configs/catConfig.json b/tests/configs/catConfig.json
index d38285a..720da10 100644
--- a/tests/configs/catConfig.json
+++ b/tests/configs/catConfig.json
@@ -6,21 +6,22 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "cat-object",
-        "executablePath": "/usr/bin/cat",
-        "arguments": ["$INPUT"]
+        "exe": "/usr/bin/cat",
+        "args": [
+          "$INPUT"
+        ]
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/catConfigDarwin.json b/tests/configs/catConfigDarwin.json
index 5de46b8..1902736 100644
--- a/tests/configs/catConfigDarwin.json
+++ b/tests/configs/catConfigDarwin.json
@@ -6,21 +6,22 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "cat-object",
-        "executablePath": "/bin/cat",
-        "arguments": ["$INPUT"]
+        "exe": "/bin/cat",
+        "args": [
+          "$INPUT"
+        ]
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/gazbolt-configs/gazprea.json b/tests/configs/gazbolt-configs/gazprea.json
index 2c602e0..19809f8 100644
--- a/tests/configs/gazbolt-configs/gazprea.json
+++ b/tests/configs/gazbolt-configs/gazprea.json
@@ -9,12 +9,14 @@
   "toolchains": {
     "gazprea-llc": [
       {
-        "stepName": "gazprea",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "$OUTPUT", "--interp"],
-        "output": "interp.out",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "$OUTPUT",
+          "--interp"
+        ],
         "allowError": true
       }
-    ] 
+    ]
   }
 }
diff --git a/tests/configs/gazbolt-configs/generator.json b/tests/configs/gazbolt-configs/generator.json
index 7cbc8d3..3d6ab1e 100644
--- a/tests/configs/gazbolt-configs/generator.json
+++ b/tests/configs/gazbolt-configs/generator.json
@@ -4,16 +4,14 @@
     "generator": "$GENERATOR_PATH"
   },
   "toolchains": {
-      "interpreter": [
-        {
-          "stepName": "generator-interpreter",
-          "executablePath": "$EXE",
-          "arguments": [
-            "$INPUT",
-            "$OUTPUT"
-            ],
-          "output": "generator.out"
-        }
-      ]
-    }
+    "interpreter": [
+      {
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "$OUTPUT"
+        ]
+      }
+    ]
+  }
 }
diff --git a/tests/configs/gazbolt-configs/scalc.json b/tests/configs/gazbolt-configs/scalc.json
index b6f9a67..17a3260 100644
--- a/tests/configs/gazbolt-configs/scalc.json
+++ b/tests/configs/gazbolt-configs/scalc.json
@@ -6,16 +6,13 @@
   "toolchains": {
     "interpreter": [
       {
-        "stepName": "scalc-interpreter",
-        "executablePath": "$EXE",
-        "arguments": [
+        "exe": "$EXE",
+        "args": [
           "interpreter",
           "$INPUT",
           "$OUTPUT"
-        ],
-        "output": "scalc.out"
+        ]
       }
     ]
   }
 }
-
diff --git a/tests/configs/gazbolt-configs/vcalc.json b/tests/configs/gazbolt-configs/vcalc.json
index d071c0f..7f8abef 100644
--- a/tests/configs/gazbolt-configs/vcalc.json
+++ b/tests/configs/gazbolt-configs/vcalc.json
@@ -9,20 +9,22 @@
   "toolchains": {
     "vcalc-lli": [
       {
-        "stepName": "vcalc",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "$OUTPUT"],
-        "output": "/tmp/vcalc.ll",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "lli",
-        "executablePath": "/path/to/lli",
-        "arguments": [ "$INPUT" ],
+        "exe": "/path/to/lli",
+        "args": [
+          "$INPUT"
+        ],
         "usesInStr": true,
         "usesRuntime": true,
         "allowError": true
       }
-    ] 
+    ]
   }
 }
diff --git a/tests/configs/gccFailConfig.json b/tests/configs/gccFailConfig.json
index 816a8d6..561a73a 100644
--- a/tests/configs/gccFailConfig.json
+++ b/tests/configs/gccFailConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/gccMemcheckConfig.json b/tests/configs/gccMemcheckConfig.json
new file mode 100644
index 0000000..121260b
--- /dev/null
+++ b/tests/configs/gccMemcheckConfig.json
@@ -0,0 +1,24 @@
+{
+  "testDir": "../packages/MemoryLeaks",
+  "testedExecutablePaths": {
+    "gcc": "/usr/bin/gcc"
+  },
+  "toolchains": {
+    "GCC-toolchain": [
+      {
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
+        "allowError": true
+      },
+      {
+        "exe": "$INPUT",
+        "usesInStr": true,
+        "allowError": true
+      }
+    ]
+  }
+}
diff --git a/tests/configs/gccMixConfig.json b/tests/configs/gccMixConfig.json
index 9911087..a66da59 100644
--- a/tests/configs/gccMixConfig.json
+++ b/tests/configs/gccMixConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/gccPassConfig.json b/tests/configs/gccPassConfig.json
index 5ae0a5b..e3aa4ff 100644
--- a/tests/configs/gccPassConfig.json
+++ b/tests/configs/gccPassConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/invalidDirConfig.json b/tests/configs/invalidDirConfig.json
index e224c49..c2db996 100644
--- a/tests/configs/invalidDirConfig.json
+++ b/tests/configs/invalidDirConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/invalidExeConfig.json b/tests/configs/invalidExeConfig.json
index 69f5ec5..e60092c 100644
--- a/tests/configs/invalidExeConfig.json
+++ b/tests/configs/invalidExeConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/invalidMultiConfig.json b/tests/configs/invalidMultiConfig.json
index fde97a3..bd4ac6e 100644
--- a/tests/configs/invalidMultiConfig.json
+++ b/tests/configs/invalidMultiConfig.json
@@ -6,16 +6,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "/this/dne/bin",
-        "arguments": [],
+        "exe": "/this/dne/bin",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/invalidRutnime.json b/tests/configs/invalidRutnime.json
index 613324d..0491f6c 100644
--- a/tests/configs/invalidRutnime.json
+++ b/tests/configs/invalidRutnime.json
@@ -9,16 +9,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/perfConfig.json b/tests/configs/perfConfig.json
index 13b8c1e..a578d6c 100644
--- a/tests/configs/perfConfig.json
+++ b/tests/configs/perfConfig.json
@@ -8,16 +8,16 @@
   "toolchains": {
     "GCC-toolchain": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/configs/runtimeConfigDarwin.json b/tests/configs/runtimeConfigDarwin.json
index ca38816..2787ae1 100644
--- a/tests/configs/runtimeConfigDarwin.json
+++ b/tests/configs/runtimeConfigDarwin.json
@@ -9,26 +9,30 @@
   "toolchains": {
     "clang-runtime": [
       {
-        "stepName": "clang",
-        "executablePath": "$EXE",
-        "arguments": ["-c", "$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/prog.o"
+        "exe": "$EXE",
+        "args": [
+          "-c",
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ]
       },
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT", "-L$RT_PATH", "-l$RT_LIB"],
-        "output": "/tmp/prog"
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT",
+          "-L$RT_PATH",
+          "-l$RT_LIB"
+        ]
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true,
         "usesRuntime": true
       }
     ]
-  } 
+  }
 }
-  
\ No newline at end of file
diff --git a/tests/configs/runtimeConfigLinux.json b/tests/configs/runtimeConfigLinux.json
index d16054e..b9817f4 100644
--- a/tests/configs/runtimeConfigLinux.json
+++ b/tests/configs/runtimeConfigLinux.json
@@ -9,26 +9,30 @@
   "toolchains": {
     "clang-runtime": [
       {
-        "stepName": "clang",
-        "executablePath": "$EXE",
-        "arguments": ["-c", "$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/prog.o"
+        "exe": "$EXE",
+        "args": [
+          "-c",
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ]
       },
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["$INPUT", "-o", "$OUTPUT", "-L$RT_PATH", "-l$RT_LIB"],
-        "output": "/tmp/prog"
+        "exe": "$EXE",
+        "args": [
+          "$INPUT",
+          "-o",
+          "$OUTPUT",
+          "-L$RT_PATH",
+          "-l$RT_LIB"
+        ]
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true,
         "usesRuntime": true
       }
     ]
-  } 
+  }
 }
-  
diff --git a/tests/configs/serverConfig.json b/tests/configs/serverConfig.json
index ea93981..12c83c4 100644
--- a/tests/configs/serverConfig.json
+++ b/tests/configs/serverConfig.json
@@ -7,16 +7,17 @@
   "toolchains": {
     "gcc-explorer": [
       {
-        "stepName": "compile",
-        "executablePath": "$EXE",
-        "arguments": ["-xc", "$INPUT", "-o", "$OUTPUT"],
-        "output": "/tmp/test.o",
+        "exe": "$EXE",
+        "args": [
+          "-xc",
+          "$INPUT",
+          "-o",
+          "$OUTPUT"
+        ],
         "allowError": true
       },
       {
-        "stepName": "run",
-        "executablePath": "$INPUT",
-        "arguments": [],
+        "exe": "$INPUT",
         "usesInStr": true,
         "allowError": true
       }
diff --git a/tests/conftest.py b/tests/conftest.py
deleted file mode 100644
index b71c82b..0000000
--- a/tests/conftest.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import pytest
-from typing import Optional
-from pathlib import Path
-from dragon_runner.src.cli import CLIArgs, RunnerArgs
-from dragon_runner.src.config import load_config, Config
-
-def get_config_path(config_name: str) -> Path:
-    return Path(__file__).parent / "configs" / config_name
-
-def create_config(config_name: str) -> Optional[Config]:
-    config_path = get_config_path(config_name)
-    return load_config(str(config_path))
-
-def create_cli_args(**kwargs) -> RunnerArgs:
-    return RunnerArgs(
-        config_file     = kwargs.get('config_file', None),
-        output     = kwargs.get('output_file', None),
-        failure_log     = kwargs.get('failure_log', None),
-        debug_package   = kwargs.get('debug_package', None),
-        package_filter  = kwargs.get('package_filter', None),
-        mode            = kwargs.get('mode', None),
-        timeout         = kwargs.get('timeout', 5),
-        time            = kwargs.get('time', None),
-        verbosity       = kwargs.get('verbosity', None),
-        verify          = kwargs.get('verify', None),
-        show_testcase   = kwargs.get('show_testcase', None),
-        fast_fail       = kwargs.get('fast_fail', None),
-    )
-
-@pytest.fixture(scope="session")
-def config_factory():
-    return create_config
-
-@pytest.fixture(scope="session")
-def cli_factory():
-    return create_cli_args
diff --git a/tests/packages/CPackage/RegularPass/valid_tests/021_skip_false.c b/tests/packages/CPackage/RegularPass/valid_tests/021_skip_false.c
new file mode 100644
index 0000000..7cf5445
--- /dev/null
+++ b/tests/packages/CPackage/RegularPass/valid_tests/021_skip_false.c
@@ -0,0 +1,8 @@
+// SKIP
+// CHECK:2
+
+#include <stdio.h>
+
+int main() {
+  printf("1");
+}
diff --git a/tests/run_tests.py b/tests/run_tests.py
deleted file mode 100644
index c219dc3..0000000
--- a/tests/run_tests.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Quick script 
-#
-#
-
-import os
-import subprocess
-from pathlib import Path
-
-if __name__ == "__main__":
-
-    script_dir = Path(__file__).parent.absolute()
-    for file in os.listdir(script_dir):
-        if "test_" in file:
-            print(file)
-            subprocess.run(f"pytest {os.path.join(script_dir, file)}", shell=True)
-
diff --git a/tests/run_tests.sh b/tests/run_tests.sh
new file mode 100755
index 0000000..da5ba3b
--- /dev/null
+++ b/tests/run_tests.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Resolve project root: script lives in <root>/tests/
+PROJECT_ROOT="$(cd "$(dirname "$(readlink -f "$0")")/.." && pwd)"
+cd "$PROJECT_ROOT"
+
+echo "=== Building ==="
+cargo build
+
+echo ""
+echo "=== Compiling test shared libraries ==="
+python3 tests/scripts/test-scripts/compile_lib.py \
+    tests/lib/src tests/lib
+
+echo ""
+echo "=== Running tests (single-threaded to avoid /tmp/test.o races) ==="
+cargo test -- --test-threads=1
diff --git a/tests/test_config.py b/tests/test_config.py
index daeb4b9..c676dbf 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,12 +1,9 @@
 import os
-from dragon_runner.src.cli import RunnerArgs, Mode
-from dragon_runner.src.config import load_config
-import fnmatch
+import sys 
 
-
-def test_valid_config(config_factory):
+def test_valid_config(config_factory): 
     config = config_factory("gccPassConfig.json")
-
+    
     assert config is not None
     assert config.test_dir is not None
     assert config.packages is not None
@@ -19,54 +16,14 @@ def test_valid_config(config_factory):
     assert config.error_collection == False
     assert os.path.exists(config.test_dir)
 
-
-def test_package_filter(config_factory):
-    """Test that subpackage filtering works correctly using glob pattern matching on paths"""
-
-    config_path = os.path.join(
-        os.path.dirname(__file__), "configs", "gccPassConfig.json"
-    )
-
-    # Load config - packages are always loaded, filtering happens at subpackage level
-    config = load_config(
-        config_path, RunnerArgs(mode=Mode.REGULAR, config_file=config_path)
-    )
-
-    # Collect all subpackages across all packages
-    all_subpackages = []
-    for pkg in config.packages:
-        for spkg in pkg.subpackages:
-            all_subpackages.append(spkg.path)
-
-    # Verify we have subpackages to test with
-    assert len(all_subpackages) > 0
-
-    # Test filter pattern "*ErrorPass*" - should match subpackages containing "ErrorPass" in path
-    filter_pattern = "*ErrorPass*"
-    filtered_subpackages = [
-        spkg_path
-        for spkg_path in all_subpackages
-        if fnmatch.fnmatch(spkg_path.lower(), filter_pattern.lower())
-    ]
-
-    # Should have some matches
-    assert len(filtered_subpackages) > 0
-
-    # All filtered subpackages should match the pattern (case insensitive)
-    for spkg_path in filtered_subpackages:
-        assert fnmatch.fnmatch(spkg_path.lower(), filter_pattern.lower())
-        assert "errorpass" in spkg_path.lower()
-
-
 def test_invalid_dir_config(config_factory):
     config = config_factory("invalidDirConfig.json")
-
+    
     assert config.error_collection == True
     assert not os.path.exists(config.test_dir)
 
-
 def test_invalid_exe_config(config_factory):
-
+    
     config = config_factory("invalidExeConfig.json")
 
     assert config.error_collection == True
diff --git a/tests/test_grader.py b/tests/test_grader.py
deleted file mode 100644
index d8e170c..0000000
--- a/tests/test_grader.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import os
-from dragon_runner.src.harness import TournamentHarness
-from dragon_runner.src.config import Config
-from dragon_runner.src.cli import RunnerArgs
-
-def test_grader_config(config_factory, cli_factory):
-
-    config : Config = config_factory("ConfigGrade.json")
-    args : RunnerArgs = cli_factory(**{
-        "mode": "tournament",
-        "failure_log": "Failures.txt",
-        "timeout": 2
-    })
-    
-    harness = TournamentHarness(config=config, cli_args=args) 
-    assert harness is not None
-    
-    harness.run()
-    assert os.path.exists(args.failure_log)
-
diff --git a/tests/test_key.py b/tests/test_key.py
new file mode 100644
index 0000000..5d49de2
--- /dev/null
+++ b/tests/test_key.py
@@ -0,0 +1,50 @@
+import sys, tempfile, textwrap
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
+from key import Key, StudentRecord
+
+CSV = textwrap.dedent("""\
+    SID,CCID,GitHubID,A1,A2
+    1234567,alice,alice-gh,team-alpha,opt-alpha
+    1234568,bob,bob-gh,team-alpha,opt-beta
+    1234569,carol,carol-gh,team-beta,,
+""")
+
+def make_key(csv_text=CSV):
+    f = tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False)
+    f.write(csv_text)
+    f.close()
+    return Key(Path(f.name))
+
+def test_assignments():
+    assert make_key().assignments == ["A1", "A2"]
+
+def test_lookup_by_sid():
+    assert make_key().get("1234567").ccid == "alice"
+
+def test_lookup_by_ccid():
+    assert make_key().get("bob").sid == "1234568"
+
+def test_lookup_by_github():
+    assert make_key().get("carol-gh").sid == "1234569"
+
+def test_lookup_miss():
+    assert make_key().get("nobody") is None
+
+def test_iter_repos_unique():
+    assert sorted(make_key().iter_repos("A1")) == ["team-alpha", "team-beta"]
+
+def test_iter_repos_skips_empty():
+    assert sorted(make_key().iter_repos("A2")) == ["opt-alpha", "opt-beta"]
+
+def test_students_for_repo_team():
+    members = make_key().students_for_repo("A1", "team-alpha")
+    assert [m.sid for m in members] == ["1234567", "1234568"]
+
+def test_get_repo():
+    k = make_key()
+    assert k.get_repo("carol", "A1") == "team-beta"
+    assert k.get_repo("carol", "A2") is None
+
+def test_iter_students_count():
+    assert len(list(make_key().iter_students())) == 3
diff --git a/tests/test_runner.py b/tests/test_runner.py
deleted file mode 100644
index 3ff7b15..0000000
--- a/tests/test_runner.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from dragon_runner.src.harness import RegularHarness
-from dragon_runner.src.config import Config
-from dragon_runner.src.cli import RunnerArgs
-
-def test_gcc_pass(config_factory, cli_factory):
-
-    config : Config = config_factory("gccPassConfig.json")
-    args : RunnerArgs = cli_factory(**{
-        "mode": "regular",
-        "timeout": 10
-    })
-    
-    harness = RegularHarness(config=config, cli_args=args) 
-    assert harness is not None 
-    success = harness.run()
-    assert success == True
-
-def test_gcc_pass_darwin(config_factory, cli_factory):
-
-    config : Config = config_factory("catConfigDarwin.json")
-    args : RunnerArgs = cli_factory(**{
-        "mode": "regular",
-        "timeout": 10
-    })
-    
-    harness = RegularHarness(config=config, cli_args=args) 
-    assert harness is not None 
-    success = harness.run()
-    assert success == True
-
-def test_gcc_fail(config_factory, cli_factory):
-
-    config : Config = config_factory("gccFailConfig.json")
-    args : RunnerArgs = cli_factory(**{
-        "mode": "regular",
-        "timeout": 5
-    })
-    
-    harness = RegularHarness(config=config, cli_args=args) 
-    assert harness is not None 
-    success = harness.run()
-    assert success == False
-
diff --git a/tests/test_runtime.py b/tests/test_runtime.py
deleted file mode 100644
index fe97ed3..0000000
--- a/tests/test_runtime.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from dragon_runner.src.runner   import ToolChainRunner, TestResult
-from dragon_runner.src.config   import Config
-import sys
-import os
-import subprocess
-
-TEST_DIR            = os.path.dirname(os.path.abspath(__file__))
-COMPILE_LIB_SCRIPT  = f"{TEST_DIR}/scripts/test-scripts/compile_lib.py"
-LIB_SRC_DIR         = os.path.join(TEST_DIR, "lib/src")
-LIB_OUT_DIR         = os.path.join(TEST_DIR, "lib")
-
-def run_tests_for_config(config: Config, expected_result: bool):
-    # TODO: move to conftest.py
-    assert config.packages is not None
-
-    for exe in config.executables:
-        exe.source_env()
-        for tc in config.toolchains:
-            tc_runner = ToolChainRunner(tc, timeout=3.0)
-            for pkg in config.packages:
-                for sp in pkg.subpackages:
-                    for test in sp.tests:
-                        result: TestResult = tc_runner.run(test, exe)
-                        result.log()
-                        assert result.did_pass == expected_result
-
-
-def test_gcc_toolchain_success(config_factory, cli_factory):
-    assert os.path.exists(COMPILE_LIB_SCRIPT), "missing library compiler script" 
-
-    if sys.platform == "darwin":
-        lib = "libfib.dylib"
-        config = config_factory("runtimeConfigDarwin.json")
-    else:
-        lib = "libfib.so"
-        config = config_factory("runtimeConfigLinux.json")
-
-    expected_lib=os.path.join(TEST_DIR, f"lib/{lib}")
-
-    if not os.path.exists(expected_lib):
-        result = subprocess.run([sys.executable,
-                                COMPILE_LIB_SCRIPT,
-                                LIB_SRC_DIR,
-                                LIB_OUT_DIR], check=True)
-        
-        assert result.returncode == 0, "shared object compilation failed"
-        assert os.path.exists(expected_lib), "failed to create shared object"
-
-    # now shared object exists where the config expects it, so we can run
-    os.environ["DRAGON_RUNNER_DEBUG"] = "3"
-    run_tests_for_config(config, expected_result=True)