test(craftos): add layered test timeouts

This commit is contained in:
Guillaume ARM 2026-06-08 05:37:49 +02:00
parent c7ba641e2d
commit e5d8e16c5a
10 changed files with 225 additions and 12 deletions

View File

@ -1 +1 @@
TRAP_CCLIBS_TEST_TIMEOUT_SECONDS=3
TRAP_CCLIBS_TEST_TIMEOUT_SECONDS=7

View File

@ -14,6 +14,7 @@ Use `docs/README.md` as the entrypoint for CC:Tweaked, CraftOS-PC, Advanced Peri
- Do not run `just repl` as an LLM agent; it is a human-only interactive CraftOS-PC wrapper. Use `just craftos --headless ...` for automated probes.
- When changing behavior, add as many useful CraftOS-PC tests as practical. It is acceptable to skip tests that require human-only validation, such as complex turtle motion, in-game UX feel, or visual approval, but still add unit-style non-regression tests for deterministic parts when possible.
- Use `/apis/libtest.lua` for test scripts under `tests/`; `/programs/runtest.lua` prints `__TRAPOS_TEST_OK__` only after the suite passes.
- `libtest` cancels each case after `3`s (`--timeout <s>` / `--no-timeout` to override); never commit a hanging test to `tests/`. Slow harness fixtures go in `tests/harness/` behind dedicated recipes. See `docs/adrs/adr-0009-layered-test-timeouts.md`.
- After editing Lua, run `just check` and fix all `luacheck` warnings.
- Use 2-space indent, semicolons, and `local function`.
- `require` paths are absolute ComputerCraft paths, for example `require('/apis/net')()`.

View File

@ -18,4 +18,9 @@ This creates `.env` from `.env.sample` when needed and installs the local Git ho
Tests live under `tests/` and run inside CraftOS-PC through `just test`, which launches `/programs/runtest.lua`. API-level tests should use `require('/apis/libtest')({ ... })`, register cases with `testlib.test(name, fn)`, and call `testlib.run()` at the end. `libtest` tests remain normal ComputerCraft programs; the runner handles suite discovery, grouped output, and the `__TRAPOS_TEST_OK__` shell success contract. Pass `--pretty` to `just test` for grouped colored `[OK]`/`[KO]` statuses, or `--verbose` for additional runner/debug detail.
Each CraftOS-PC test process is killed if it does not finish within `TRAP_CCLIBS_TEST_TIMEOUT_SECONDS`, defaulting to `3`. Override this in `.env` for slower local probes.
Test timeouts run in two independent layers (see [`docs/adrs/adr-0009-layered-test-timeouts.md`](docs/adrs/adr-0009-layered-test-timeouts.md)):
- **libtest per-case timeout (primary).** Each `libtest` case is cancelled after `3` seconds by default, failing with a distinct `libtest timeout` message. Override with `--timeout <seconds>`, or disable with `--no-timeout` (both forwarded by `runtest` to each case). This catches a single hung case quickly without taking down the whole run.
- **Shell watchdog (backstop).** The whole CraftOS-PC process is killed if it does not finish within `TRAP_CCLIBS_TEST_TIMEOUT_SECONDS` (`.env.sample` ships `7`; the recipe falls back to `7`). Keep it above the `3`s libtest default so libtest fires first; the watchdog only catches what Lua cannot interrupt. Override in `.env` for slower local probes.
`just test-timeout-5s` and `just test-timeout-10s` are self-asserting harness checks for these layers (fixtures under `tests/harness/`, not part of `just test`): the first proves libtest cancels a 5s case at 3s, the second proves the shell watchdog kills a 10s case run with `--no-timeout`.

View File

@ -106,7 +106,7 @@ test *args:
@if [ -f .env ]; then set -a; . ./.env; set +a; fi; \
pretty=0; \
verbose=0; \
timeout_seconds="${TRAP_CCLIBS_TEST_TIMEOUT_SECONDS:-3}"; \
timeout_seconds="${TRAP_CCLIBS_TEST_TIMEOUT_SECONDS:-7}"; \
case "$timeout_seconds" in ''|*[!0-9]*) printf '%s\n' 'TRAP_CCLIBS_TEST_TIMEOUT_SECONDS must be a positive integer' >&2; exit 1 ;; esac; \
if [ "$timeout_seconds" -lt 1 ]; then printf '%s\n' 'TRAP_CCLIBS_TEST_TIMEOUT_SECONDS must be >= 1' >&2; exit 1; fi; \
for a in {{args}}; do case "$a" in --pretty) pretty=1 ;; --verbose|-v) pretty=1; verbose=1 ;; esac; done; \
@ -149,6 +149,69 @@ test *args:
fi; \
printf '%s\n' 'OK: CraftOS integration tests passed'
# Harness self-test: run a tests/harness fixture and assert which timeout layer
# caught it. `expect` is "lua" (libtest cancels the case) or "shell" (the shell
# watchdog kills the whole process). Not part of `ci`/`test`: these exercise the
# failure paths on purpose.
_timeout-fixture script shell_timeout extra_flag expect: check-install
#!/usr/bin/env bash
set -uo pipefail
repo='{{justfile_directory()}}'
rom_arg=""
if [ "$(uname -s)" = "Darwin" ]; then
rom_arg="--rom /Applications/CraftOS-PC.app/Contents/Resources"
fi
mount_arg="--mount-ro /apis=$repo/apis --mount-ro /programs=$repo/programs --mount-ro /tests=$repo/tests"
tmp="$(mktemp)"
data_dir="$(mktemp -d)"
output_path="$data_dir/computer/0/trapos-test-output"
exec_code="shell.run('/programs/runtest.lua', '{{script}}', '--verbose', '--output', '/trapos-test-output', {{extra_flag}} '--shutdown')"
craftos --directory "$data_dir" --headless $rom_arg $mount_arg --exec "$exec_code" >"$tmp" 2>&1 &
pid="$!"
( sleep {{shell_timeout}}; kill -TERM "$pid" >/dev/null 2>&1 ) &
watchdog="$!"
wait "$pid" >/dev/null 2>&1
status="$?"
kill "$watchdog" >/dev/null 2>&1 || true
wait "$watchdog" >/dev/null 2>&1 || true
combined="$(cat "$tmp"; [ -f "$output_path" ] && cat "$output_path")"
red=$(printf '\033[31m'); green=$(printf '\033[32m'); reset=$(printf '\033[0m')
rc=0
case "{{expect}}" in
lua)
if printf '%s\n' "$combined" | grep -q 'libtest timeout' && [ "$status" -ne 143 ]; then
printf '%s\n' "${green}OK${reset} libtest cancelled the case (shell watchdog not needed)"; \
else
printf '%s\n' "${red}FAIL${reset} expected a libtest timeout before the shell watchdog (status=$status)" >&2
rc=1
fi
;;
shell)
if [ "$status" -eq 143 ]; then
printf '%s\n' "${green}OK${reset} shell watchdog killed the run after {{shell_timeout}}s (status 143; libtest timeout bypassed)"; \
else
printf '%s\n' "${red}FAIL${reset} expected the shell watchdog to kill the run (status=$status)" >&2
rc=1
fi
;;
*)
printf '%s\n' "${red}FAIL${reset} unknown expectation '{{expect}}'" >&2
rc=1
;;
esac
if [ "$rc" -ne 0 ]; then printf '%s\n' "$combined" >&2; fi
rm -f "$tmp"
rm -rf "$data_dir"
exit "$rc"
# Prove the libtest (Lua) timeout layer: a 5s case is cancelled at the 3s
# default, before the generous 30s shell watchdog can fire.
test-timeout-5s: (_timeout-fixture "/tests/harness/timeout-5s.lua" "30" "" "lua")
# Prove the shell watchdog backstop: a 10s case runs with the libtest timeout
# bypassed (--no-timeout), so the 5s shell watchdog kills the whole process.
test-timeout-10s: (_timeout-fixture "/tests/harness/timeout-10s.lua" "5" "'--no-timeout'," "shell")
# Lint all Lua source with luacheck.
check: check-luacheck
luacheck .

View File

@ -1,4 +1,6 @@
local _VERSION = "1.4.0"
local _VERSION = "1.5.0"
local DEFAULT_TIMEOUT_SECONDS = 3
local function createLibTest(args)
local api = {}
@ -7,6 +9,7 @@ local function createLibTest(args)
local verbose = false
local reportPath = nil
local printMarker = true
local timeoutSeconds = DEFAULT_TIMEOUT_SECONDS
local i = 1
while i <= #(args or {}) do
@ -21,6 +24,11 @@ local function createLibTest(args)
i = i + 1
elseif arg == "--no-marker" then
printMarker = false
elseif arg == "--timeout" then
timeoutSeconds = tonumber(args[i + 1])
i = i + 1
elseif arg == "--no-timeout" then
timeoutSeconds = nil
end
i = i + 1
end
@ -84,11 +92,36 @@ local function createLibTest(args)
end
end
local function runCase(fn)
if not timeoutSeconds then
return pcall(fn)
end
local ok, err
local finished = false
local function runner()
ok, err = pcall(fn)
finished = true
end
local function timer()
sleep(timeoutSeconds)
end
parallel.waitForAny(runner, timer)
if not finished then
return false, "libtest timeout after " .. timeoutSeconds .. "s", true
end
return ok, err
end
function api.run()
for _, t in ipairs(tests) do
api.log("RUN " .. t.name)
local ok, err = pcall(t.fn)
local ok, err, timedOut = runCase(t.fn)
if not ok then
if timedOut then
api.log("TIMEOUT " .. t.name .. " after " .. timeoutSeconds .. "s (libtest)")
end
writeReport("KO " .. t.name .. ": " .. tostring(err))
if not pretty then
print("FAIL " .. t.name .. ": " .. tostring(err))

View File

@ -16,3 +16,4 @@ Future ADRs can reuse the shape of the existing files when it is useful.
- [`adr-0006-simplify-periphemu-bootstrap.md`](adr-0006-simplify-periphemu-bootstrap.md) - Simplify periphemu bootstrap.
- [`adr-0007-use-libtest-for-craftos-tests.md`](adr-0007-use-libtest-for-craftos-tests.md) - Use libtest for CraftOS tests.
- [`adr-0008-keep-tests-runnable-in-craftos-and-in-game.md`](adr-0008-keep-tests-runnable-in-craftos-and-in-game.md) - Keep tests runnable in CraftOS and in-game.
- [`adr-0009-layered-test-timeouts.md`](adr-0009-layered-test-timeouts.md) - Layered test timeouts (libtest per-case + shell watchdog).

View File

@ -0,0 +1,69 @@
# ADR 0009: Layered Test Timeouts
## Status
Accepted
## Date
2026-06-08
## Context
ADR 0005 made CraftOS-PC the local harness and ADR 0007 split `libtest` (cases and
assertions) from `runtest` (suite orchestration). The only timeout in that harness was the
shell watchdog in the `Justfile` `test:` recipe: it `kill -TERM`s the whole CraftOS-PC
process after `TRAP_CCLIBS_TEST_TIMEOUT_SECONDS`.
That single layer is coarse. It kills the entire process, so it cannot say *which* case
hung, it produces one generic message, and it cannot tell a cooperatively-blocked event
loop (the common failure: waiting on an event or `sleep` that never resolves) apart from a
genuinely wedged process. A per-case timeout inside Lua is both finer and faster, but the
shell watchdog is still needed for the cases Lua cannot interrupt.
## Decision
Run two independent timeout layers, ordered so the finer one fires first.
**Layer 1 — `libtest` per-case timeout (primary).** `/apis/libtest.lua` races each test
case against a timer with `parallel.waitForAny(runner, timer)`. The default is
`DEFAULT_TIMEOUT_SECONDS = 3`. When the timer wins, the case fails with a distinct message
containing the token `libtest timeout` and, in `--verbose`, an extra `TIMEOUT … (libtest)`
diagnostic. `--timeout <seconds>` overrides the default; `--no-timeout` disables the layer.
`/programs/runtest.lua` forwards both flags to each case script. This only interrupts cases
that yield (the usual hang); a non-yielding CPU loop cannot be preempted in ComputerCraft.
**Layer 2 — shell watchdog (backstop).** The `Justfile` `test:` recipe keeps its existing
`TRAP_CCLIBS_TEST_TIMEOUT_SECONDS` watchdog unchanged, as an independent double-check. Its
default sits *above* the libtest default (`.env.sample` ships `7`; the recipe falls back to
`7`) so libtest fires first in normal runs and the watchdog only catches what Lua cannot —
a non-yielding loop, a wedged libtest, or a deliberately bypassed case. Its SIGTERM message
is worded differently from the `libtest timeout` message, so the two layers are never
confused.
## How To Write Tests Properly
- Normal tests live in `tests/*.lua`, use `/apis/libtest.lua`, and must finish under the
libtest timeout. `runtest` auto-discovers them; `just test` runs the suite.
- Never commit a hanging or intentionally-slow test to `tests/`: it would fail every run.
- Intentionally-slow fixtures that exercise the harness itself live in `tests/harness/`.
`runtest` discovery skips subdirectories, so they never run with the normal suite; they
are driven only by dedicated recipes (`just test-timeout-5s`, `just test-timeout-10s`).
- Use `--no-timeout` only for harness fixtures that must outlive the libtest layer to prove
the shell watchdog, never for ordinary tests.
## Consequences
- A hung case now fails in ~3s with a per-case message instead of taking down the whole
process anonymously.
- The two `test-timeout-*` recipes are self-asserting harness tests: `test-timeout-5s`
proves Layer 1 (libtest cancels a 5s case before the watchdog), `test-timeout-10s` proves
Layer 2 (the watchdog kills a 10s case with libtest bypassed). They are intentionally
excluded from `ci`/`test`.
- `libtest` stays a normal ComputerCraft program: `parallel` and `sleep` are sandbox
globals, so the timeout works in CraftOS-PC and in-game alike.
## Future Work
- Per-case timing in `--verbose` output if slow-but-passing cases become hard to spot.
- A `libtest`-level marker for "expected timeout" if more harness fixtures appear.

View File

@ -1,4 +1,4 @@
local _VERSION = "1.0.0"
local _VERSION = "1.1.0"
local SUCCESS_MARKER = "__TRAPOS_TEST_OK__"
local DEFAULT_REPORT_PATH = "/trapos-test-report"
@ -6,7 +6,7 @@ local DEFAULT_REPORT_PATH = "/trapos-test-report"
local function printUsage()
print("runtest usage:")
print()
print("\t\truntest [--pretty] [--verbose] [--output <path>] [--shutdown] [test ...]")
print("\t\truntest [--pretty] [--verbose] [--output <path>] [--timeout <seconds>] [--no-timeout] [--shutdown] [test ...]")
print("\t\truntest --version")
print("\t\truntest --help")
end
@ -17,6 +17,8 @@ local function parseArgs(args)
verbose = false,
shutdown = false,
outputPath = nil,
timeout = nil,
noTimeout = false,
tests = {},
}
@ -37,6 +39,11 @@ local function parseArgs(args)
elseif arg == "--output" then
opts.outputPath = args[i + 1]
i = i + 1
elseif arg == "--timeout" then
opts.timeout = args[i + 1]
i = i + 1
elseif arg == "--no-timeout" then
opts.noTimeout = true
elseif arg == "--shutdown" then
opts.shutdown = true
elseif string.sub(arg, 1, 1) == "-" then
@ -176,14 +183,20 @@ local suiteOk = true
for _, script in ipairs(tests) do
fs.delete(DEFAULT_REPORT_PATH)
local ok
local scriptArgs = { "--no-marker", "--report", DEFAULT_REPORT_PATH }
if opts.verbose then
ok = shell.run(script, "--no-marker", "--report", DEFAULT_REPORT_PATH, "--verbose")
scriptArgs[#scriptArgs + 1] = "--verbose"
elseif opts.pretty then
ok = shell.run(script, "--no-marker", "--report", DEFAULT_REPORT_PATH, "--pretty")
else
ok = shell.run(script, "--no-marker", "--report", DEFAULT_REPORT_PATH)
scriptArgs[#scriptArgs + 1] = "--pretty"
end
if opts.noTimeout then
scriptArgs[#scriptArgs + 1] = "--no-timeout"
elseif opts.timeout then
scriptArgs[#scriptArgs + 1] = "--timeout"
scriptArgs[#scriptArgs + 1] = opts.timeout
end
local ok = shell.run(script, table.unpack(scriptArgs))
local reportLines = readLines(DEFAULT_REPORT_PATH)
if opts.pretty then

View File

@ -0,0 +1,14 @@
-- Harness fixture (NOT auto-discovered: lives under tests/harness/).
-- Sleeps 10s with the libtest timeout bypassed (--no-timeout), proving the
-- shell watchdog is still the backstop. Driven by `just test-timeout-10s`.
local createLibTest = require('/apis/libtest');
local testlib = createLibTest({ ... });
testlib.test('sleeps past the shell watchdog', function()
testlib.log('about to sleep 10s with the libtest timeout disabled');
sleep(10);
testlib.assertTrue(true); -- never reached: shell watchdog kills the process
end);
testlib.run();

View File

@ -0,0 +1,14 @@
-- Harness fixture (NOT auto-discovered: lives under tests/harness/).
-- Sleeps past the 3s libtest default timeout but under the 7s shell watchdog,
-- proving the libtest layer cancels the case first. Driven by `just test-timeout-5s`.
local createLibTest = require('/apis/libtest');
local testlib = createLibTest({ ... });
testlib.test('sleeps past the libtest timeout', function()
testlib.log('about to sleep 5s; libtest should cancel this case at 3s');
sleep(5);
testlib.assertTrue(true); -- never reached: libtest cancels first
end);
testlib.run();