test(craftos): add layered test timeouts
This commit is contained in:
parent
c7ba641e2d
commit
e5d8e16c5a
@ -1 +1 @@
|
||||
TRAP_CCLIBS_TEST_TIMEOUT_SECONDS=3
|
||||
TRAP_CCLIBS_TEST_TIMEOUT_SECONDS=7
|
||||
|
||||
@ -14,6 +14,7 @@ Use `docs/README.md` as the entrypoint for CC:Tweaked, CraftOS-PC, Advanced Peri
|
||||
- Do not run `just repl` as an LLM agent; it is a human-only interactive CraftOS-PC wrapper. Use `just craftos --headless ...` for automated probes.
|
||||
- When changing behavior, add as many useful CraftOS-PC tests as practical. It is acceptable to skip tests that require human-only validation, such as complex turtle motion, in-game UX feel, or visual approval, but still add unit-style non-regression tests for deterministic parts when possible.
|
||||
- Use `/apis/libtest.lua` for test scripts under `tests/`; `/programs/runtest.lua` prints `__TRAPOS_TEST_OK__` only after the suite passes.
|
||||
- `libtest` cancels each case after `3`s (`--timeout <s>` / `--no-timeout` to override); never commit a hanging test to `tests/`. Slow harness fixtures go in `tests/harness/` behind dedicated recipes. See `docs/adrs/adr-0009-layered-test-timeouts.md`.
|
||||
- After editing Lua, run `just check` and fix all `luacheck` warnings.
|
||||
- Use 2-space indent, semicolons, and `local function`.
|
||||
- `require` paths are absolute ComputerCraft paths, for example `require('/apis/net')()`.
|
||||
|
||||
@ -18,4 +18,9 @@ This creates `.env` from `.env.sample` when needed and installs the local Git ho
|
||||
|
||||
Tests live under `tests/` and run inside CraftOS-PC through `just test`, which launches `/programs/runtest.lua`. API-level tests should use `require('/apis/libtest')({ ... })`, register cases with `testlib.test(name, fn)`, and call `testlib.run()` at the end. `libtest` tests remain normal ComputerCraft programs; the runner handles suite discovery, grouped output, and the `__TRAPOS_TEST_OK__` shell success contract. Pass `--pretty` to `just test` for grouped colored `[OK]`/`[KO]` statuses, or `--verbose` for additional runner/debug detail.
|
||||
|
||||
Each CraftOS-PC test process is killed if it does not finish within `TRAP_CCLIBS_TEST_TIMEOUT_SECONDS`, defaulting to `3`. Override this in `.env` for slower local probes.
|
||||
Test timeouts run in two independent layers (see [`docs/adrs/adr-0009-layered-test-timeouts.md`](docs/adrs/adr-0009-layered-test-timeouts.md)):
|
||||
|
||||
- **libtest per-case timeout (primary).** Each `libtest` case is cancelled after `3` seconds by default, failing with a distinct `libtest timeout` message. Override with `--timeout <seconds>`, or disable with `--no-timeout` (both forwarded by `runtest` to each case). This catches a single hung case quickly without taking down the whole run.
|
||||
- **Shell watchdog (backstop).** The whole CraftOS-PC process is killed if it does not finish within `TRAP_CCLIBS_TEST_TIMEOUT_SECONDS` (`.env.sample` ships `7`; the recipe falls back to `7`). Keep it above the `3`s libtest default so libtest fires first; the watchdog only catches what Lua cannot interrupt. Override in `.env` for slower local probes.
|
||||
|
||||
`just test-timeout-5s` and `just test-timeout-10s` are self-asserting harness checks for these layers (fixtures under `tests/harness/`, not part of `just test`): the first proves libtest cancels a 5s case at 3s, the second proves the shell watchdog kills a 10s case run with `--no-timeout`.
|
||||
|
||||
65
Justfile
65
Justfile
@ -106,7 +106,7 @@ test *args:
|
||||
@if [ -f .env ]; then set -a; . ./.env; set +a; fi; \
|
||||
pretty=0; \
|
||||
verbose=0; \
|
||||
timeout_seconds="${TRAP_CCLIBS_TEST_TIMEOUT_SECONDS:-3}"; \
|
||||
timeout_seconds="${TRAP_CCLIBS_TEST_TIMEOUT_SECONDS:-7}"; \
|
||||
case "$timeout_seconds" in ''|*[!0-9]*) printf '%s\n' 'TRAP_CCLIBS_TEST_TIMEOUT_SECONDS must be a positive integer' >&2; exit 1 ;; esac; \
|
||||
if [ "$timeout_seconds" -lt 1 ]; then printf '%s\n' 'TRAP_CCLIBS_TEST_TIMEOUT_SECONDS must be >= 1' >&2; exit 1; fi; \
|
||||
for a in {{args}}; do case "$a" in --pretty) pretty=1 ;; --verbose|-v) pretty=1; verbose=1 ;; esac; done; \
|
||||
@ -149,6 +149,69 @@ test *args:
|
||||
fi; \
|
||||
printf '%s\n' 'OK: CraftOS integration tests passed'
|
||||
|
||||
# Harness self-test: run a tests/harness fixture and assert which timeout layer
|
||||
# caught it. `expect` is "lua" (libtest cancels the case) or "shell" (the shell
|
||||
# watchdog kills the whole process). Not part of `ci`/`test`: these exercise the
|
||||
# failure paths on purpose.
|
||||
_timeout-fixture script shell_timeout extra_flag expect: check-install
|
||||
#!/usr/bin/env bash
|
||||
set -uo pipefail
|
||||
repo='{{justfile_directory()}}'
|
||||
rom_arg=""
|
||||
if [ "$(uname -s)" = "Darwin" ]; then
|
||||
rom_arg="--rom /Applications/CraftOS-PC.app/Contents/Resources"
|
||||
fi
|
||||
mount_arg="--mount-ro /apis=$repo/apis --mount-ro /programs=$repo/programs --mount-ro /tests=$repo/tests"
|
||||
tmp="$(mktemp)"
|
||||
data_dir="$(mktemp -d)"
|
||||
output_path="$data_dir/computer/0/trapos-test-output"
|
||||
exec_code="shell.run('/programs/runtest.lua', '{{script}}', '--verbose', '--output', '/trapos-test-output', {{extra_flag}} '--shutdown')"
|
||||
craftos --directory "$data_dir" --headless $rom_arg $mount_arg --exec "$exec_code" >"$tmp" 2>&1 &
|
||||
pid="$!"
|
||||
( sleep {{shell_timeout}}; kill -TERM "$pid" >/dev/null 2>&1 ) &
|
||||
watchdog="$!"
|
||||
wait "$pid" >/dev/null 2>&1
|
||||
status="$?"
|
||||
kill "$watchdog" >/dev/null 2>&1 || true
|
||||
wait "$watchdog" >/dev/null 2>&1 || true
|
||||
combined="$(cat "$tmp"; [ -f "$output_path" ] && cat "$output_path")"
|
||||
red=$(printf '\033[31m'); green=$(printf '\033[32m'); reset=$(printf '\033[0m')
|
||||
rc=0
|
||||
case "{{expect}}" in
|
||||
lua)
|
||||
if printf '%s\n' "$combined" | grep -q 'libtest timeout' && [ "$status" -ne 143 ]; then
|
||||
printf '%s\n' "${green}OK${reset} libtest cancelled the case (shell watchdog not needed)"; \
|
||||
else
|
||||
printf '%s\n' "${red}FAIL${reset} expected a libtest timeout before the shell watchdog (status=$status)" >&2
|
||||
rc=1
|
||||
fi
|
||||
;;
|
||||
shell)
|
||||
if [ "$status" -eq 143 ]; then
|
||||
printf '%s\n' "${green}OK${reset} shell watchdog killed the run after {{shell_timeout}}s (status 143; libtest timeout bypassed)"; \
|
||||
else
|
||||
printf '%s\n' "${red}FAIL${reset} expected the shell watchdog to kill the run (status=$status)" >&2
|
||||
rc=1
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
printf '%s\n' "${red}FAIL${reset} unknown expectation '{{expect}}'" >&2
|
||||
rc=1
|
||||
;;
|
||||
esac
|
||||
if [ "$rc" -ne 0 ]; then printf '%s\n' "$combined" >&2; fi
|
||||
rm -f "$tmp"
|
||||
rm -rf "$data_dir"
|
||||
exit "$rc"
|
||||
|
||||
# Prove the libtest (Lua) timeout layer: a 5s case is cancelled at the 3s
|
||||
# default, before the generous 30s shell watchdog can fire.
|
||||
test-timeout-5s: (_timeout-fixture "/tests/harness/timeout-5s.lua" "30" "" "lua")
|
||||
|
||||
# Prove the shell watchdog backstop: a 10s case runs with the libtest timeout
|
||||
# bypassed (--no-timeout), so the 5s shell watchdog kills the whole process.
|
||||
test-timeout-10s: (_timeout-fixture "/tests/harness/timeout-10s.lua" "5" "'--no-timeout'," "shell")
|
||||
|
||||
# Lint all Lua source with luacheck.
|
||||
check: check-luacheck
|
||||
luacheck .
|
||||
|
||||
@ -1,4 +1,6 @@
|
||||
local _VERSION = "1.4.0"
|
||||
local _VERSION = "1.5.0"
|
||||
|
||||
local DEFAULT_TIMEOUT_SECONDS = 3
|
||||
|
||||
local function createLibTest(args)
|
||||
local api = {}
|
||||
@ -7,6 +9,7 @@ local function createLibTest(args)
|
||||
local verbose = false
|
||||
local reportPath = nil
|
||||
local printMarker = true
|
||||
local timeoutSeconds = DEFAULT_TIMEOUT_SECONDS
|
||||
|
||||
local i = 1
|
||||
while i <= #(args or {}) do
|
||||
@ -21,6 +24,11 @@ local function createLibTest(args)
|
||||
i = i + 1
|
||||
elseif arg == "--no-marker" then
|
||||
printMarker = false
|
||||
elseif arg == "--timeout" then
|
||||
timeoutSeconds = tonumber(args[i + 1])
|
||||
i = i + 1
|
||||
elseif arg == "--no-timeout" then
|
||||
timeoutSeconds = nil
|
||||
end
|
||||
i = i + 1
|
||||
end
|
||||
@ -84,11 +92,36 @@ local function createLibTest(args)
|
||||
end
|
||||
end
|
||||
|
||||
local function runCase(fn)
|
||||
if not timeoutSeconds then
|
||||
return pcall(fn)
|
||||
end
|
||||
|
||||
local ok, err
|
||||
local finished = false
|
||||
local function runner()
|
||||
ok, err = pcall(fn)
|
||||
finished = true
|
||||
end
|
||||
local function timer()
|
||||
sleep(timeoutSeconds)
|
||||
end
|
||||
|
||||
parallel.waitForAny(runner, timer)
|
||||
if not finished then
|
||||
return false, "libtest timeout after " .. timeoutSeconds .. "s", true
|
||||
end
|
||||
return ok, err
|
||||
end
|
||||
|
||||
function api.run()
|
||||
for _, t in ipairs(tests) do
|
||||
api.log("RUN " .. t.name)
|
||||
local ok, err = pcall(t.fn)
|
||||
local ok, err, timedOut = runCase(t.fn)
|
||||
if not ok then
|
||||
if timedOut then
|
||||
api.log("TIMEOUT " .. t.name .. " after " .. timeoutSeconds .. "s (libtest)")
|
||||
end
|
||||
writeReport("KO " .. t.name .. ": " .. tostring(err))
|
||||
if not pretty then
|
||||
print("FAIL " .. t.name .. ": " .. tostring(err))
|
||||
|
||||
@ -16,3 +16,4 @@ Future ADRs can reuse the shape of the existing files when it is useful.
|
||||
- [`adr-0006-simplify-periphemu-bootstrap.md`](adr-0006-simplify-periphemu-bootstrap.md) - Simplify periphemu bootstrap.
|
||||
- [`adr-0007-use-libtest-for-craftos-tests.md`](adr-0007-use-libtest-for-craftos-tests.md) - Use libtest for CraftOS tests.
|
||||
- [`adr-0008-keep-tests-runnable-in-craftos-and-in-game.md`](adr-0008-keep-tests-runnable-in-craftos-and-in-game.md) - Keep tests runnable in CraftOS and in-game.
|
||||
- [`adr-0009-layered-test-timeouts.md`](adr-0009-layered-test-timeouts.md) - Layered test timeouts (libtest per-case + shell watchdog).
|
||||
|
||||
69
docs/adrs/adr-0009-layered-test-timeouts.md
Normal file
69
docs/adrs/adr-0009-layered-test-timeouts.md
Normal file
@ -0,0 +1,69 @@
|
||||
# ADR 0009: Layered Test Timeouts
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Date
|
||||
|
||||
2026-06-08
|
||||
|
||||
## Context
|
||||
|
||||
ADR 0005 made CraftOS-PC the local harness and ADR 0007 split `libtest` (cases and
|
||||
assertions) from `runtest` (suite orchestration). The only timeout in that harness was the
|
||||
shell watchdog in the `Justfile` `test:` recipe: it `kill -TERM`s the whole CraftOS-PC
|
||||
process after `TRAP_CCLIBS_TEST_TIMEOUT_SECONDS`.
|
||||
|
||||
That single layer is coarse. It kills the entire process, so it cannot say *which* case
|
||||
hung, it produces one generic message, and it cannot tell a cooperatively-blocked event
|
||||
loop (the common failure: waiting on an event or `sleep` that never resolves) apart from a
|
||||
genuinely wedged process. A per-case timeout inside Lua is both finer and faster, but the
|
||||
shell watchdog is still needed for the cases Lua cannot interrupt.
|
||||
|
||||
## Decision
|
||||
|
||||
Run two independent timeout layers, ordered so the finer one fires first.
|
||||
|
||||
**Layer 1 — `libtest` per-case timeout (primary).** `/apis/libtest.lua` races each test
|
||||
case against a timer with `parallel.waitForAny(runner, timer)`. The default is
|
||||
`DEFAULT_TIMEOUT_SECONDS = 3`. When the timer wins, the case fails with a distinct message
|
||||
containing the token `libtest timeout` and, in `--verbose`, an extra `TIMEOUT … (libtest)`
|
||||
diagnostic. `--timeout <seconds>` overrides the default; `--no-timeout` disables the layer.
|
||||
`/programs/runtest.lua` forwards both flags to each case script. This only interrupts cases
|
||||
that yield (the usual hang); a non-yielding CPU loop cannot be preempted in ComputerCraft.
|
||||
|
||||
**Layer 2 — shell watchdog (backstop).** The `Justfile` `test:` recipe keeps its existing
|
||||
`TRAP_CCLIBS_TEST_TIMEOUT_SECONDS` watchdog unchanged, as an independent double-check. Its
|
||||
default sits *above* the libtest default (`.env.sample` ships `7`; the recipe falls back to
|
||||
`7`) so libtest fires first in normal runs and the watchdog only catches what Lua cannot —
|
||||
a non-yielding loop, a wedged libtest, or a deliberately bypassed case. Its SIGTERM message
|
||||
is worded differently from the `libtest timeout` message, so the two layers are never
|
||||
confused.
|
||||
|
||||
## How To Write Tests Properly
|
||||
|
||||
- Normal tests live in `tests/*.lua`, use `/apis/libtest.lua`, and must finish under the
|
||||
libtest timeout. `runtest` auto-discovers them; `just test` runs the suite.
|
||||
- Never commit a hanging or intentionally-slow test to `tests/`: it would fail every run.
|
||||
- Intentionally-slow fixtures that exercise the harness itself live in `tests/harness/`.
|
||||
`runtest` discovery skips subdirectories, so they never run with the normal suite; they
|
||||
are driven only by dedicated recipes (`just test-timeout-5s`, `just test-timeout-10s`).
|
||||
- Use `--no-timeout` only for harness fixtures that must outlive the libtest layer to prove
|
||||
the shell watchdog, never for ordinary tests.
|
||||
|
||||
## Consequences
|
||||
|
||||
- A hung case now fails in ~3s with a per-case message instead of taking down the whole
|
||||
process anonymously.
|
||||
- The two `test-timeout-*` recipes are self-asserting harness tests: `test-timeout-5s`
|
||||
proves Layer 1 (libtest cancels a 5s case before the watchdog), `test-timeout-10s` proves
|
||||
Layer 2 (the watchdog kills a 10s case with libtest bypassed). They are intentionally
|
||||
excluded from `ci`/`test`.
|
||||
- `libtest` stays a normal ComputerCraft program: `parallel` and `sleep` are sandbox
|
||||
globals, so the timeout works in CraftOS-PC and in-game alike.
|
||||
|
||||
## Future Work
|
||||
|
||||
- Per-case timing in `--verbose` output if slow-but-passing cases become hard to spot.
|
||||
- A `libtest`-level marker for "expected timeout" if more harness fixtures appear.
|
||||
@ -1,4 +1,4 @@
|
||||
local _VERSION = "1.0.0"
|
||||
local _VERSION = "1.1.0"
|
||||
|
||||
local SUCCESS_MARKER = "__TRAPOS_TEST_OK__"
|
||||
local DEFAULT_REPORT_PATH = "/trapos-test-report"
|
||||
@ -6,7 +6,7 @@ local DEFAULT_REPORT_PATH = "/trapos-test-report"
|
||||
local function printUsage()
|
||||
print("runtest usage:")
|
||||
print()
|
||||
print("\t\truntest [--pretty] [--verbose] [--output <path>] [--shutdown] [test ...]")
|
||||
print("\t\truntest [--pretty] [--verbose] [--output <path>] [--timeout <seconds>] [--no-timeout] [--shutdown] [test ...]")
|
||||
print("\t\truntest --version")
|
||||
print("\t\truntest --help")
|
||||
end
|
||||
@ -17,6 +17,8 @@ local function parseArgs(args)
|
||||
verbose = false,
|
||||
shutdown = false,
|
||||
outputPath = nil,
|
||||
timeout = nil,
|
||||
noTimeout = false,
|
||||
tests = {},
|
||||
}
|
||||
|
||||
@ -37,6 +39,11 @@ local function parseArgs(args)
|
||||
elseif arg == "--output" then
|
||||
opts.outputPath = args[i + 1]
|
||||
i = i + 1
|
||||
elseif arg == "--timeout" then
|
||||
opts.timeout = args[i + 1]
|
||||
i = i + 1
|
||||
elseif arg == "--no-timeout" then
|
||||
opts.noTimeout = true
|
||||
elseif arg == "--shutdown" then
|
||||
opts.shutdown = true
|
||||
elseif string.sub(arg, 1, 1) == "-" then
|
||||
@ -176,14 +183,20 @@ local suiteOk = true
|
||||
for _, script in ipairs(tests) do
|
||||
fs.delete(DEFAULT_REPORT_PATH)
|
||||
|
||||
local ok
|
||||
local scriptArgs = { "--no-marker", "--report", DEFAULT_REPORT_PATH }
|
||||
if opts.verbose then
|
||||
ok = shell.run(script, "--no-marker", "--report", DEFAULT_REPORT_PATH, "--verbose")
|
||||
scriptArgs[#scriptArgs + 1] = "--verbose"
|
||||
elseif opts.pretty then
|
||||
ok = shell.run(script, "--no-marker", "--report", DEFAULT_REPORT_PATH, "--pretty")
|
||||
else
|
||||
ok = shell.run(script, "--no-marker", "--report", DEFAULT_REPORT_PATH)
|
||||
scriptArgs[#scriptArgs + 1] = "--pretty"
|
||||
end
|
||||
if opts.noTimeout then
|
||||
scriptArgs[#scriptArgs + 1] = "--no-timeout"
|
||||
elseif opts.timeout then
|
||||
scriptArgs[#scriptArgs + 1] = "--timeout"
|
||||
scriptArgs[#scriptArgs + 1] = opts.timeout
|
||||
end
|
||||
|
||||
local ok = shell.run(script, table.unpack(scriptArgs))
|
||||
local reportLines = readLines(DEFAULT_REPORT_PATH)
|
||||
|
||||
if opts.pretty then
|
||||
|
||||
14
tests/harness/timeout-10s.lua
Normal file
14
tests/harness/timeout-10s.lua
Normal file
@ -0,0 +1,14 @@
|
||||
-- Harness fixture (NOT auto-discovered: lives under tests/harness/).
|
||||
-- Sleeps 10s with the libtest timeout bypassed (--no-timeout), proving the
|
||||
-- shell watchdog is still the backstop. Driven by `just test-timeout-10s`.
|
||||
local createLibTest = require('/apis/libtest');
|
||||
|
||||
local testlib = createLibTest({ ... });
|
||||
|
||||
testlib.test('sleeps past the shell watchdog', function()
|
||||
testlib.log('about to sleep 10s with the libtest timeout disabled');
|
||||
sleep(10);
|
||||
testlib.assertTrue(true); -- never reached: shell watchdog kills the process
|
||||
end);
|
||||
|
||||
testlib.run();
|
||||
14
tests/harness/timeout-5s.lua
Normal file
14
tests/harness/timeout-5s.lua
Normal file
@ -0,0 +1,14 @@
|
||||
-- Harness fixture (NOT auto-discovered: lives under tests/harness/).
|
||||
-- Sleeps past the 3s libtest default timeout but under the 7s shell watchdog,
|
||||
-- proving the libtest layer cancels the case first. Driven by `just test-timeout-5s`.
|
||||
local createLibTest = require('/apis/libtest');
|
||||
|
||||
local testlib = createLibTest({ ... });
|
||||
|
||||
testlib.test('sleeps past the libtest timeout', function()
|
||||
testlib.log('about to sleep 5s; libtest should cancel this case at 3s');
|
||||
sleep(5);
|
||||
testlib.assertTrue(true); -- never reached: libtest cancels first
|
||||
end);
|
||||
|
||||
testlib.run();
|
||||
Loading…
Reference in New Issue
Block a user