From e5d8e16c5a5023cdb66121c1af69d88c9b305311 Mon Sep 17 00:00:00 2001 From: Guillaume ARM Date: Mon, 8 Jun 2026 05:37:49 +0200 Subject: [PATCH] test(craftos): add layered test timeouts --- .env.sample | 2 +- CLAUDE.md | 1 + DEVELOPMENT.md | 7 ++- Justfile | 65 ++++++++++++++++++- apis/libtest.lua | 37 ++++++++++- docs/adrs/README.md | 1 + docs/adrs/adr-0009-layered-test-timeouts.md | 69 +++++++++++++++++++++ programs/runtest.lua | 27 +++++--- tests/harness/timeout-10s.lua | 14 +++++ tests/harness/timeout-5s.lua | 14 +++++ 10 files changed, 225 insertions(+), 12 deletions(-) create mode 100644 docs/adrs/adr-0009-layered-test-timeouts.md create mode 100644 tests/harness/timeout-10s.lua create mode 100644 tests/harness/timeout-5s.lua diff --git a/.env.sample b/.env.sample index b9a8c81..dc42b17 100644 --- a/.env.sample +++ b/.env.sample @@ -1 +1 @@ -TRAP_CCLIBS_TEST_TIMEOUT_SECONDS=3 +TRAP_CCLIBS_TEST_TIMEOUT_SECONDS=7 diff --git a/CLAUDE.md b/CLAUDE.md index 03e00d1..3efcb27 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -14,6 +14,7 @@ Use `docs/README.md` as the entrypoint for CC:Tweaked, CraftOS-PC, Advanced Peri - Do not run `just repl` as an LLM agent; it is a human-only interactive CraftOS-PC wrapper. Use `just craftos --headless ...` for automated probes. - When changing behavior, add as many useful CraftOS-PC tests as practical. It is acceptable to skip tests that require human-only validation, such as complex turtle motion, in-game UX feel, or visual approval, but still add unit-style non-regression tests for deterministic parts when possible. - Use `/apis/libtest.lua` for test scripts under `tests/`; `/programs/runtest.lua` prints `__TRAPOS_TEST_OK__` only after the suite passes. +- `libtest` cancels each case after `3`s (`--timeout ` / `--no-timeout` to override); never commit a hanging test to `tests/`. Slow harness fixtures go in `tests/harness/` behind dedicated recipes. See `docs/adrs/adr-0009-layered-test-timeouts.md`. - After editing Lua, run `just check` and fix all `luacheck` warnings. - Use 2-space indent, semicolons, and `local function`. - `require` paths are absolute ComputerCraft paths, for example `require('/apis/net')()`. diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 9643af0..ebcf08f 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -18,4 +18,9 @@ This creates `.env` from `.env.sample` when needed and installs the local Git ho Tests live under `tests/` and run inside CraftOS-PC through `just test`, which launches `/programs/runtest.lua`. API-level tests should use `require('/apis/libtest')({ ... })`, register cases with `testlib.test(name, fn)`, and call `testlib.run()` at the end. `libtest` tests remain normal ComputerCraft programs; the runner handles suite discovery, grouped output, and the `__TRAPOS_TEST_OK__` shell success contract. Pass `--pretty` to `just test` for grouped colored `[OK]`/`[KO]` statuses, or `--verbose` for additional runner/debug detail. -Each CraftOS-PC test process is killed if it does not finish within `TRAP_CCLIBS_TEST_TIMEOUT_SECONDS`, defaulting to `3`. Override this in `.env` for slower local probes. +Test timeouts run in two independent layers (see [`docs/adrs/adr-0009-layered-test-timeouts.md`](docs/adrs/adr-0009-layered-test-timeouts.md)): + +- **libtest per-case timeout (primary).** Each `libtest` case is cancelled after `3` seconds by default, failing with a distinct `libtest timeout` message. Override with `--timeout `, or disable with `--no-timeout` (both forwarded by `runtest` to each case). This catches a single hung case quickly without taking down the whole run. +- **Shell watchdog (backstop).** The whole CraftOS-PC process is killed if it does not finish within `TRAP_CCLIBS_TEST_TIMEOUT_SECONDS` (`.env.sample` ships `7`; the recipe falls back to `7`). Keep it above the `3`s libtest default so libtest fires first; the watchdog only catches what Lua cannot interrupt. Override in `.env` for slower local probes. + +`just test-timeout-5s` and `just test-timeout-10s` are self-asserting harness checks for these layers (fixtures under `tests/harness/`, not part of `just test`): the first proves libtest cancels a 5s case at 3s, the second proves the shell watchdog kills a 10s case run with `--no-timeout`. diff --git a/Justfile b/Justfile index 910543b..cca0753 100644 --- a/Justfile +++ b/Justfile @@ -106,7 +106,7 @@ test *args: @if [ -f .env ]; then set -a; . ./.env; set +a; fi; \ pretty=0; \ verbose=0; \ - timeout_seconds="${TRAP_CCLIBS_TEST_TIMEOUT_SECONDS:-3}"; \ + timeout_seconds="${TRAP_CCLIBS_TEST_TIMEOUT_SECONDS:-7}"; \ case "$timeout_seconds" in ''|*[!0-9]*) printf '%s\n' 'TRAP_CCLIBS_TEST_TIMEOUT_SECONDS must be a positive integer' >&2; exit 1 ;; esac; \ if [ "$timeout_seconds" -lt 1 ]; then printf '%s\n' 'TRAP_CCLIBS_TEST_TIMEOUT_SECONDS must be >= 1' >&2; exit 1; fi; \ for a in {{args}}; do case "$a" in --pretty) pretty=1 ;; --verbose|-v) pretty=1; verbose=1 ;; esac; done; \ @@ -149,6 +149,69 @@ test *args: fi; \ printf '%s\n' 'OK: CraftOS integration tests passed' +# Harness self-test: run a tests/harness fixture and assert which timeout layer +# caught it. `expect` is "lua" (libtest cancels the case) or "shell" (the shell +# watchdog kills the whole process). Not part of `ci`/`test`: these exercise the +# failure paths on purpose. +_timeout-fixture script shell_timeout extra_flag expect: check-install + #!/usr/bin/env bash + set -uo pipefail + repo='{{justfile_directory()}}' + rom_arg="" + if [ "$(uname -s)" = "Darwin" ]; then + rom_arg="--rom /Applications/CraftOS-PC.app/Contents/Resources" + fi + mount_arg="--mount-ro /apis=$repo/apis --mount-ro /programs=$repo/programs --mount-ro /tests=$repo/tests" + tmp="$(mktemp)" + data_dir="$(mktemp -d)" + output_path="$data_dir/computer/0/trapos-test-output" + exec_code="shell.run('/programs/runtest.lua', '{{script}}', '--verbose', '--output', '/trapos-test-output', {{extra_flag}} '--shutdown')" + craftos --directory "$data_dir" --headless $rom_arg $mount_arg --exec "$exec_code" >"$tmp" 2>&1 & + pid="$!" + ( sleep {{shell_timeout}}; kill -TERM "$pid" >/dev/null 2>&1 ) & + watchdog="$!" + wait "$pid" >/dev/null 2>&1 + status="$?" + kill "$watchdog" >/dev/null 2>&1 || true + wait "$watchdog" >/dev/null 2>&1 || true + combined="$(cat "$tmp"; [ -f "$output_path" ] && cat "$output_path")" + red=$(printf '\033[31m'); green=$(printf '\033[32m'); reset=$(printf '\033[0m') + rc=0 + case "{{expect}}" in + lua) + if printf '%s\n' "$combined" | grep -q 'libtest timeout' && [ "$status" -ne 143 ]; then + printf '%s\n' "${green}OK${reset} libtest cancelled the case (shell watchdog not needed)"; \ + else + printf '%s\n' "${red}FAIL${reset} expected a libtest timeout before the shell watchdog (status=$status)" >&2 + rc=1 + fi + ;; + shell) + if [ "$status" -eq 143 ]; then + printf '%s\n' "${green}OK${reset} shell watchdog killed the run after {{shell_timeout}}s (status 143; libtest timeout bypassed)"; \ + else + printf '%s\n' "${red}FAIL${reset} expected the shell watchdog to kill the run (status=$status)" >&2 + rc=1 + fi + ;; + *) + printf '%s\n' "${red}FAIL${reset} unknown expectation '{{expect}}'" >&2 + rc=1 + ;; + esac + if [ "$rc" -ne 0 ]; then printf '%s\n' "$combined" >&2; fi + rm -f "$tmp" + rm -rf "$data_dir" + exit "$rc" + +# Prove the libtest (Lua) timeout layer: a 5s case is cancelled at the 3s +# default, before the generous 30s shell watchdog can fire. +test-timeout-5s: (_timeout-fixture "/tests/harness/timeout-5s.lua" "30" "" "lua") + +# Prove the shell watchdog backstop: a 10s case runs with the libtest timeout +# bypassed (--no-timeout), so the 5s shell watchdog kills the whole process. +test-timeout-10s: (_timeout-fixture "/tests/harness/timeout-10s.lua" "5" "'--no-timeout'," "shell") + # Lint all Lua source with luacheck. check: check-luacheck luacheck . diff --git a/apis/libtest.lua b/apis/libtest.lua index c0fd9ca..1df7211 100644 --- a/apis/libtest.lua +++ b/apis/libtest.lua @@ -1,4 +1,6 @@ -local _VERSION = "1.4.0" +local _VERSION = "1.5.0" + +local DEFAULT_TIMEOUT_SECONDS = 3 local function createLibTest(args) local api = {} @@ -7,6 +9,7 @@ local function createLibTest(args) local verbose = false local reportPath = nil local printMarker = true + local timeoutSeconds = DEFAULT_TIMEOUT_SECONDS local i = 1 while i <= #(args or {}) do @@ -21,6 +24,11 @@ local function createLibTest(args) i = i + 1 elseif arg == "--no-marker" then printMarker = false + elseif arg == "--timeout" then + timeoutSeconds = tonumber(args[i + 1]) + i = i + 1 + elseif arg == "--no-timeout" then + timeoutSeconds = nil end i = i + 1 end @@ -84,11 +92,36 @@ local function createLibTest(args) end end + local function runCase(fn) + if not timeoutSeconds then + return pcall(fn) + end + + local ok, err + local finished = false + local function runner() + ok, err = pcall(fn) + finished = true + end + local function timer() + sleep(timeoutSeconds) + end + + parallel.waitForAny(runner, timer) + if not finished then + return false, "libtest timeout after " .. timeoutSeconds .. "s", true + end + return ok, err + end + function api.run() for _, t in ipairs(tests) do api.log("RUN " .. t.name) - local ok, err = pcall(t.fn) + local ok, err, timedOut = runCase(t.fn) if not ok then + if timedOut then + api.log("TIMEOUT " .. t.name .. " after " .. timeoutSeconds .. "s (libtest)") + end writeReport("KO " .. t.name .. ": " .. tostring(err)) if not pretty then print("FAIL " .. t.name .. ": " .. tostring(err)) diff --git a/docs/adrs/README.md b/docs/adrs/README.md index 7235a15..544d395 100644 --- a/docs/adrs/README.md +++ b/docs/adrs/README.md @@ -16,3 +16,4 @@ Future ADRs can reuse the shape of the existing files when it is useful. - [`adr-0006-simplify-periphemu-bootstrap.md`](adr-0006-simplify-periphemu-bootstrap.md) - Simplify periphemu bootstrap. - [`adr-0007-use-libtest-for-craftos-tests.md`](adr-0007-use-libtest-for-craftos-tests.md) - Use libtest for CraftOS tests. - [`adr-0008-keep-tests-runnable-in-craftos-and-in-game.md`](adr-0008-keep-tests-runnable-in-craftos-and-in-game.md) - Keep tests runnable in CraftOS and in-game. +- [`adr-0009-layered-test-timeouts.md`](adr-0009-layered-test-timeouts.md) - Layered test timeouts (libtest per-case + shell watchdog). diff --git a/docs/adrs/adr-0009-layered-test-timeouts.md b/docs/adrs/adr-0009-layered-test-timeouts.md new file mode 100644 index 0000000..2a510f0 --- /dev/null +++ b/docs/adrs/adr-0009-layered-test-timeouts.md @@ -0,0 +1,69 @@ +# ADR 0009: Layered Test Timeouts + +## Status + +Accepted + +## Date + +2026-06-08 + +## Context + +ADR 0005 made CraftOS-PC the local harness and ADR 0007 split `libtest` (cases and +assertions) from `runtest` (suite orchestration). The only timeout in that harness was the +shell watchdog in the `Justfile` `test:` recipe: it `kill -TERM`s the whole CraftOS-PC +process after `TRAP_CCLIBS_TEST_TIMEOUT_SECONDS`. + +That single layer is coarse. It kills the entire process, so it cannot say *which* case +hung, it produces one generic message, and it cannot tell a cooperatively-blocked event +loop (the common failure: waiting on an event or `sleep` that never resolves) apart from a +genuinely wedged process. A per-case timeout inside Lua is both finer and faster, but the +shell watchdog is still needed for the cases Lua cannot interrupt. + +## Decision + +Run two independent timeout layers, ordered so the finer one fires first. + +**Layer 1 — `libtest` per-case timeout (primary).** `/apis/libtest.lua` races each test +case against a timer with `parallel.waitForAny(runner, timer)`. The default is +`DEFAULT_TIMEOUT_SECONDS = 3`. When the timer wins, the case fails with a distinct message +containing the token `libtest timeout` and, in `--verbose`, an extra `TIMEOUT … (libtest)` +diagnostic. `--timeout ` overrides the default; `--no-timeout` disables the layer. +`/programs/runtest.lua` forwards both flags to each case script. This only interrupts cases +that yield (the usual hang); a non-yielding CPU loop cannot be preempted in ComputerCraft. + +**Layer 2 — shell watchdog (backstop).** The `Justfile` `test:` recipe keeps its existing +`TRAP_CCLIBS_TEST_TIMEOUT_SECONDS` watchdog unchanged, as an independent double-check. Its +default sits *above* the libtest default (`.env.sample` ships `7`; the recipe falls back to +`7`) so libtest fires first in normal runs and the watchdog only catches what Lua cannot — +a non-yielding loop, a wedged libtest, or a deliberately bypassed case. Its SIGTERM message +is worded differently from the `libtest timeout` message, so the two layers are never +confused. + +## How To Write Tests Properly + +- Normal tests live in `tests/*.lua`, use `/apis/libtest.lua`, and must finish under the + libtest timeout. `runtest` auto-discovers them; `just test` runs the suite. +- Never commit a hanging or intentionally-slow test to `tests/`: it would fail every run. +- Intentionally-slow fixtures that exercise the harness itself live in `tests/harness/`. + `runtest` discovery skips subdirectories, so they never run with the normal suite; they + are driven only by dedicated recipes (`just test-timeout-5s`, `just test-timeout-10s`). +- Use `--no-timeout` only for harness fixtures that must outlive the libtest layer to prove + the shell watchdog, never for ordinary tests. + +## Consequences + +- A hung case now fails in ~3s with a per-case message instead of taking down the whole + process anonymously. +- The two `test-timeout-*` recipes are self-asserting harness tests: `test-timeout-5s` + proves Layer 1 (libtest cancels a 5s case before the watchdog), `test-timeout-10s` proves + Layer 2 (the watchdog kills a 10s case with libtest bypassed). They are intentionally + excluded from `ci`/`test`. +- `libtest` stays a normal ComputerCraft program: `parallel` and `sleep` are sandbox + globals, so the timeout works in CraftOS-PC and in-game alike. + +## Future Work + +- Per-case timing in `--verbose` output if slow-but-passing cases become hard to spot. +- A `libtest`-level marker for "expected timeout" if more harness fixtures appear. diff --git a/programs/runtest.lua b/programs/runtest.lua index 8e41aed..683c311 100644 --- a/programs/runtest.lua +++ b/programs/runtest.lua @@ -1,4 +1,4 @@ -local _VERSION = "1.0.0" +local _VERSION = "1.1.0" local SUCCESS_MARKER = "__TRAPOS_TEST_OK__" local DEFAULT_REPORT_PATH = "/trapos-test-report" @@ -6,7 +6,7 @@ local DEFAULT_REPORT_PATH = "/trapos-test-report" local function printUsage() print("runtest usage:") print() - print("\t\truntest [--pretty] [--verbose] [--output ] [--shutdown] [test ...]") + print("\t\truntest [--pretty] [--verbose] [--output ] [--timeout ] [--no-timeout] [--shutdown] [test ...]") print("\t\truntest --version") print("\t\truntest --help") end @@ -17,6 +17,8 @@ local function parseArgs(args) verbose = false, shutdown = false, outputPath = nil, + timeout = nil, + noTimeout = false, tests = {}, } @@ -37,6 +39,11 @@ local function parseArgs(args) elseif arg == "--output" then opts.outputPath = args[i + 1] i = i + 1 + elseif arg == "--timeout" then + opts.timeout = args[i + 1] + i = i + 1 + elseif arg == "--no-timeout" then + opts.noTimeout = true elseif arg == "--shutdown" then opts.shutdown = true elseif string.sub(arg, 1, 1) == "-" then @@ -176,14 +183,20 @@ local suiteOk = true for _, script in ipairs(tests) do fs.delete(DEFAULT_REPORT_PATH) - local ok + local scriptArgs = { "--no-marker", "--report", DEFAULT_REPORT_PATH } if opts.verbose then - ok = shell.run(script, "--no-marker", "--report", DEFAULT_REPORT_PATH, "--verbose") + scriptArgs[#scriptArgs + 1] = "--verbose" elseif opts.pretty then - ok = shell.run(script, "--no-marker", "--report", DEFAULT_REPORT_PATH, "--pretty") - else - ok = shell.run(script, "--no-marker", "--report", DEFAULT_REPORT_PATH) + scriptArgs[#scriptArgs + 1] = "--pretty" end + if opts.noTimeout then + scriptArgs[#scriptArgs + 1] = "--no-timeout" + elseif opts.timeout then + scriptArgs[#scriptArgs + 1] = "--timeout" + scriptArgs[#scriptArgs + 1] = opts.timeout + end + + local ok = shell.run(script, table.unpack(scriptArgs)) local reportLines = readLines(DEFAULT_REPORT_PATH) if opts.pretty then diff --git a/tests/harness/timeout-10s.lua b/tests/harness/timeout-10s.lua new file mode 100644 index 0000000..3e6b154 --- /dev/null +++ b/tests/harness/timeout-10s.lua @@ -0,0 +1,14 @@ +-- Harness fixture (NOT auto-discovered: lives under tests/harness/). +-- Sleeps 10s with the libtest timeout bypassed (--no-timeout), proving the +-- shell watchdog is still the backstop. Driven by `just test-timeout-10s`. +local createLibTest = require('/apis/libtest'); + +local testlib = createLibTest({ ... }); + +testlib.test('sleeps past the shell watchdog', function() + testlib.log('about to sleep 10s with the libtest timeout disabled'); + sleep(10); + testlib.assertTrue(true); -- never reached: shell watchdog kills the process +end); + +testlib.run(); diff --git a/tests/harness/timeout-5s.lua b/tests/harness/timeout-5s.lua new file mode 100644 index 0000000..0cdf1c3 --- /dev/null +++ b/tests/harness/timeout-5s.lua @@ -0,0 +1,14 @@ +-- Harness fixture (NOT auto-discovered: lives under tests/harness/). +-- Sleeps past the 3s libtest default timeout but under the 7s shell watchdog, +-- proving the libtest layer cancels the case first. Driven by `just test-timeout-5s`. +local createLibTest = require('/apis/libtest'); + +local testlib = createLibTest({ ... }); + +testlib.test('sleeps past the libtest timeout', function() + testlib.log('about to sleep 5s; libtest should cancel this case at 3s'); + sleep(5); + testlib.assertTrue(true); -- never reached: libtest cancels first +end); + +testlib.run();