docs(adr): prefer eventloop timeouts

This commit is contained in:
Guillaume ARM 2026-06-09 20:10:30 +02:00
parent c61254d702
commit 8e9958b1c3
8 changed files with 277 additions and 18 deletions

View File

@ -155,7 +155,7 @@ local function createAi(opts)
local httpLib = opts.http or http;
local settingsLib = opts.settings or settings;
local sleepFunc = opts.sleep or sleep;
local eventloopFactory = opts.eventloop or require('/apis/eventloop');
local nowFunc = opts.now or nowSeconds;
local api = {};
@ -267,27 +267,44 @@ local function createAi(opts)
local doPost;
local function pollMessage(cfg, sessionId, messageId, persist)
local loop = eventloopFactory();
local deadline = nowFunc() + cfg.pollTimeoutSeconds;
while true do
local resultOk, resultValue;
local function finish(ok, value)
resultOk, resultValue = ok, value;
loop.stopLoop();
end
local function attempt()
local body, code = doGet(cfg, '/session/' .. sessionId .. '/message');
if not body then return false, code; end
if code == 404 then return handleMissingSession(persist); end
if not body then return finish(false, code); end
if code == 404 then
local ok, value = handleMissingSession(persist);
return finish(ok, value);
end
if code and code ~= 200 then
return false, 'erreur message: HTTP ' .. tostring(code);
return finish(false, 'erreur message: HTTP ' .. tostring(code));
end
local messages = textutils.unserializeJSON(body);
if type(messages) ~= 'table' then return false, 'reponse message invalide'; end
if type(messages) ~= 'table' then
return finish(false, 'reponse message invalide');
end
local decoded = findAssistantMessage(messages, messageId);
local reply = decoded and extractTextParts(decoded.parts) or '';
if decoded and reply ~= '' and isMessageComplete(decoded) then
return true, { reply = reply, sessionId = sessionId, messageId = messageId };
return finish(true, { reply = reply, sessionId = sessionId, messageId = messageId });
end
if nowFunc() >= deadline then
return false, 'delai depasse en attendant la reponse AI';
return finish(false, 'delai depasse en attendant la reponse AI');
end
sleepFunc(cfg.pollIntervalSeconds);
loop.setTimeout(attempt, cfg.pollIntervalSeconds);
end
loop.setTimeout(attempt, 0);
loop.runLoop();
return resultOk, resultValue;
end
local function buildHeaders(cfg)

View File

@ -21,3 +21,4 @@ Future ADRs can reuse the shape of the existing files when it is useful.
- [`adr-0011-git-hooks-own-commit-push-verification.md`](adr-0011-git-hooks-own-commit-push-verification.md) - Git hooks own commit/push verification.
- [`adr-0012-headless-craftos-pc-as-hypothesis-probe.md`](adr-0012-headless-craftos-pc-as-hypothesis-probe.md) - Headless CraftOS-PC as the canonical hypothesis probe (rename `just craftos``just trapos`, add vanilla `just craftos`).
- [`adr-0013-markdown-link-syntax-for-cross-references.md`](adr-0013-markdown-link-syntax-for-cross-references.md) - Cross-reference markdown files with `[]()` syntax (so lychee can validate them).
- [`adr-0014-prefer-eventloop-settimeout-over-os-sleep.md`](adr-0014-prefer-eventloop-settimeout-over-os-sleep.md) - Prefer `eventloop.setTimeout` over `os.sleep` in application code.

View File

@ -0,0 +1,87 @@
# ADR 0014: Prefer `eventloop.setTimeout` Over `os.sleep` In Application Code
## Status
Accepted
## Date
2026-06-09
## Context
[ADR-0002](adr-0002-use-eventloop-for-async-code.md) made [`apis/eventloop.lua`](../../apis/eventloop.lua) the
default substrate for async behavior. The eventloop drives a single
`os.pullEventRaw` loop and dispatches every event to registered handlers and
timer callbacks.
`os.sleep` looks innocent but breaks that contract. Its CC:Tweaked
implementation yields the coroutine via `os.pullEvent("timer")`. While the
sleep is in flight:
- The enclosing `os.pullEventRaw` of the eventloop is paused; nothing else
runs in that coroutine.
- Non-`timer` events that arrive are silently discarded by `os.pullEvent`
so handlers registered through the eventloop miss them entirely.
- Even `eventloop.setTimeout` callbacks scheduled before the sleep cannot
fire until the sleep returns, because their timer events are consumed by
the sleep filter.
This bit `apis/libai.lua` `pollMessage`, which used a sleep-based throttle
between HTTP polls. The function looked synchronous and stand-alone, but
the moment a caller invoked `libai.ask` from inside a handler — exactly the
composition [ADR-0002](adr-0002-use-eventloop-for-async-code.md)
encourages — the whole event loop froze for the duration of every poll
interval.
[`apis/net.lua`](../../apis/net.lua) `sendRequest` already shows the right
pattern: create a private eventloop, schedule the wait through
`setTimeout`, then `runLoop` until the work resolves. From the caller's
perspective the function is still synchronous; internally, the dispatcher
of timer events stays alive and arbitrary other handlers can be composed
around it via `parallel.waitForAll`.
## Decision
In library, server, and program code that may run inside an eventloop
(directly or transitively), use `eventloop.setTimeout` for any waiting,
throttling, polling, or retry-with-delay. Libraries that need to temporize
must take an eventloop factory through their constructor (the way
`apis/net` does) rather than baking a hardcoded sleep call.
`os.sleep` remains acceptable only in narrow cases:
1. One-shot programs that are purely sequential and register no event
handlers — a `programs/foo.lua` that prints, sleeps, prints again, and
exits.
2. `parallel.waitForAny(task, function() sleep(t); end)` used as an
isolated guard to bound an inner task (e.g. the AI Lua-exec sandbox in
`apis/libai.lua` and the `parallel.waitForAny`-driven per-case timer in
`apis/libtest.lua`). The guard sleep is private to its own coroutine
group; it does not block anything external.
3. Tests that are themselves driven by `libtest`'s per-case timeout (see
[ADR-0009](adr-0009-layered-test-timeouts.md)).
New code must not expose a `sleep` injection point on its constructor. If
a wait is needed, accept an `eventloop` factory and schedule through
`setTimeout`. Tests substitute a synchronous deterministic eventloop fake
the same way they substitute `http` or `settings`.
## Consequences
- Slightly more ceremony in "synchronous-looking" functions that wait: a
private eventloop plus a small `attempt`/`finish` pair. The benefit is
that the function composes cleanly with any caller's eventloop.
- Test fakes shift from a `sleep` stub to a synchronous eventloop double.
Ergonomics are comparable; the eventloop fake additionally lets tests
observe `pending` and `stopped` state, catching leaks the sleep stub
would have missed.
- Existing call sites are migrated opportunistically when they cause
observable bugs. The first migration is `apis/libai.lua`.
## References
- [ADR-0002](adr-0002-use-eventloop-for-async-code.md) — use eventloop for async code.
- [ADR-0009](adr-0009-layered-test-timeouts.md) — layered test timeouts (the `parallel.waitForAny` guard exception).
- [`apis/net.lua`](../../apis/net.lua) `sendRequest` — canonical private-eventloop pattern.
- [`apis/libai.lua`](../../apis/libai.lua) `pollMessage` — first migration.

View File

@ -1,6 +1,6 @@
{
"name": "TrapOS",
"version": "0.6.4",
"version": "0.7.0",
"branch": "next",
"packages": [
"trapos"

View File

@ -5,8 +5,8 @@
"trapos-boot": "0.2.2",
"trapos-net": "0.2.1",
"trapos-ui": "0.2.2",
"trapos-ai": "0.5.3",
"trapos-ai": "0.6.0",
"trapos-sandbox": "0.1.0",
"trapos": "0.6.4"
"trapos": "0.7.0"
}
}

View File

@ -1,6 +1,6 @@
{
"name": "trapos-ai",
"version": "0.5.3",
"version": "0.6.0",
"description": "TrapOS AI client for opencode serve",
"dependencies": ["trapos-core"],
"files": [

View File

@ -1,6 +1,6 @@
{
"name": "trapos",
"version": "0.6.4",
"version": "0.7.0",
"description": "TrapOS full install meta-package",
"dependencies": ["trapos-boot", "trapos-net", "trapos-ui", "trapos-test", "trapos-ai"],
"files": [],

View File

@ -66,6 +66,41 @@ local function httpError(code, body)
end;
end
-- Synchronous deterministic eventloop double for tests.
-- setTimeout drains FIFO; runLoop runs until pending is empty or stopLoop fires.
-- Returns (factory, state). state.sleeps accumulates every delay passed across
-- all loops; state.lastLoop exposes the most recent loop for pending/stopped
-- assertions.
local function fakeEventloopFactory()
local state = { sleeps = {}, lastLoop = nil };
local function factory()
local pending = {};
local stopped = false;
local api = {};
function api.setTimeout(fn, delay)
state.sleeps[#state.sleeps + 1] = delay;
pending[#pending + 1] = fn;
return function() end;
end
function api.runLoop()
stopped = false;
while not stopped and #pending > 0 do
local fn = table.remove(pending, 1);
fn();
end
end
function api.stopLoop() stopped = true; end
function api.onStart(fn) fn(); end
function api.onStop() return function() end; end
function api.inspect()
return { pending = pending, stopped = stopped };
end
state.lastLoop = api;
return api;
end
return factory, state;
end
local function sessionResp(id)
return response(200, textutils.serializeJSON({ id = id, title = 'cc-ai' }));
end
@ -307,12 +342,12 @@ testlib.test('ask polls async message until completion', function()
}
);
local settingsStub = fakeSettings({ ['opencc.server_url'] = 'http://host' });
local sleeps = {};
local elFactory, elState = fakeEventloopFactory();
local ai = createAi({
http = httpStub,
settings = settingsStub,
now = function() return 10; end,
sleep = function(n) sleeps[#sleeps + 1] = n; end,
eventloop = elFactory,
});
local ok, result = ai.ask('hello', { messageId = 'msg_1', pollIntervalSeconds = 3 });
@ -322,7 +357,9 @@ testlib.test('ask polls async message until completion', function()
testlib.assertEquals(result.messageId, 'msg_1');
testlib.assertEquals(#httpStub.getCalls, 2);
testlib.assertTrue(string.find(httpStub.getCalls[1].url, '/session/ses_1/message', 1, true) ~= nil);
testlib.assertEquals(sleeps[1], 3);
-- First setTimeout fires the initial attempt (delay 0); second waits the poll interval.
testlib.assertEquals(elState.sleeps[1], 0);
testlib.assertEquals(elState.sleeps[2], 3);
end);
testlib.test('ask polling times out', function()
@ -335,11 +372,22 @@ testlib.test('ask polling times out', function()
);
local settingsStub = fakeSettings({ ['opencc.server_url'] = 'http://host' });
local now = 0;
local elFactory, elState = fakeEventloopFactory();
-- Advance virtual time on every scheduled delay so the deadline is reached.
local advancingFactory = function()
local loop = elFactory();
local origSet = loop.setTimeout;
loop.setTimeout = function(fn, delay)
now = now + (delay or 0);
return origSet(fn, delay);
end
return loop;
end;
local ai = createAi({
http = httpStub,
settings = settingsStub,
now = function() return now; end,
sleep = function(n) now = now + n; end,
eventloop = advancingFactory,
});
local ok, err = ai.ask('hello', {
@ -351,6 +399,112 @@ testlib.test('ask polling times out', function()
testlib.assertTrue(not ok);
testlib.assertTrue(string.find(err, 'delai depasse', 1, true) ~= nil);
testlib.assertEquals(#httpStub.getCalls, 2);
testlib.assertTrue(elState.lastLoop.inspect().stopped);
testlib.assertEquals(#elState.lastLoop.inspect().pending, 0);
end);
testlib.test('ask polling does not call os.sleep', function()
local httpStub = fakeHttp(
{ sessionResp('ses_1'), asyncResp() },
{
messageListResp({ userMessage('msg_1', 'hello'), assistantMessage('msg_2', 'partial', false) }),
messageListResp({ userMessage('msg_1', 'hello'), assistantMessage('msg_2', 'reply', true) }),
}
);
local settingsStub = fakeSettings({ ['opencc.server_url'] = 'http://host' });
local elFactory = fakeEventloopFactory();
local originalSleep = _G.sleep;
local sleepCalls = 0;
_G.sleep = function(n) sleepCalls = sleepCalls + 1; originalSleep(n); end
local ai = createAi({
http = httpStub,
settings = settingsStub,
now = function() return 10; end,
eventloop = elFactory,
});
local ok = ai.ask('hello', { messageId = 'msg_1', pollIntervalSeconds = 3 });
_G.sleep = originalSleep;
testlib.assertTrue(ok);
testlib.assertEquals(sleepCalls, 0);
end);
testlib.test('pollMessage stops the private loop on success', function()
local httpStub = fakeHttp(
{ sessionResp('ses_1'), asyncResp() },
{
messageListResp({ userMessage('msg_1', 'hi'), assistantMessage('msg_2', 'reply', true) }),
}
);
local settingsStub = fakeSettings({ ['opencc.server_url'] = 'http://host' });
local elFactory, elState = fakeEventloopFactory();
local ai = createAi({
http = httpStub,
settings = settingsStub,
now = function() return 0; end,
eventloop = elFactory,
});
local ok = ai.ask('hi', { messageId = 'msg_1' });
testlib.assertTrue(ok);
testlib.assertTrue(elState.lastLoop.inspect().stopped);
testlib.assertEquals(#elState.lastLoop.inspect().pending, 0);
end);
testlib.test('pollMessage stops cleanly on HTTP error mid-poll', function()
local httpStub = fakeHttp(
{ sessionResp('ses_1'), asyncResp() },
{
messageListResp({ userMessage('msg_1', 'hi'), assistantMessage('msg_2', 'partial', false) }),
httpError(500, '{}'),
}
);
local settingsStub = fakeSettings({ ['opencc.server_url'] = 'http://host' });
local elFactory, elState = fakeEventloopFactory();
local ai = createAi({
http = httpStub,
settings = settingsStub,
now = function() return 0; end,
eventloop = elFactory,
});
local ok, err = ai.ask('hi', { messageId = 'msg_1', pollIntervalSeconds = 1, pollTimeoutSeconds = 60 });
testlib.assertTrue(not ok);
testlib.assertTrue(string.find(err, 'HTTP 500', 1, true) ~= nil);
testlib.assertTrue(elState.lastLoop.inspect().stopped);
testlib.assertEquals(#elState.lastLoop.inspect().pending, 0);
end);
testlib.test('pollMessage stops cleanly on 404 mid-poll', function()
local httpStub = fakeHttp(
{ asyncResp() },
{
messageListResp({ userMessage('msg_1', 'hi'), assistantMessage('msg_2', 'partial', false) }),
response(404, '{}'),
}
);
local settingsStub = fakeSettings({
['opencc.server_url'] = 'http://host',
['opencc.session_id'] = 'ses_1',
});
local elFactory, elState = fakeEventloopFactory();
local ai = createAi({
http = httpStub,
settings = settingsStub,
now = function() return 0; end,
eventloop = elFactory,
});
local ok, err = ai.ask('hi', { messageId = 'msg_1', pollIntervalSeconds = 1, pollTimeoutSeconds = 60 });
testlib.assertTrue(not ok);
testlib.assertTrue(string.find(err, 'session introuvable', 1, true) ~= nil);
testlib.assertEquals(settingsStub.values['opencc.session_id'], nil);
testlib.assertTrue(elState.lastLoop.inspect().stopped);
testlib.assertEquals(#elState.lastLoop.inspect().pending, 0);
end);
testlib.test('ask rejects missing prompt without HTTP calls', function()