diff --git a/apis/libai.lua b/apis/libai.lua index 2ecb08e..42147ba 100644 --- a/apis/libai.lua +++ b/apis/libai.lua @@ -155,7 +155,7 @@ local function createAi(opts) local httpLib = opts.http or http; local settingsLib = opts.settings or settings; - local sleepFunc = opts.sleep or sleep; + local eventloopFactory = opts.eventloop or require('/apis/eventloop'); local nowFunc = opts.now or nowSeconds; local api = {}; @@ -267,27 +267,44 @@ local function createAi(opts) local doPost; local function pollMessage(cfg, sessionId, messageId, persist) + local loop = eventloopFactory(); local deadline = nowFunc() + cfg.pollTimeoutSeconds; - while true do + local resultOk, resultValue; + + local function finish(ok, value) + resultOk, resultValue = ok, value; + loop.stopLoop(); + end + + local function attempt() local body, code = doGet(cfg, '/session/' .. sessionId .. '/message'); - if not body then return false, code; end - if code == 404 then return handleMissingSession(persist); end + if not body then return finish(false, code); end + if code == 404 then + local ok, value = handleMissingSession(persist); + return finish(ok, value); + end if code and code ~= 200 then - return false, 'erreur message: HTTP ' .. tostring(code); + return finish(false, 'erreur message: HTTP ' .. tostring(code)); end local messages = textutils.unserializeJSON(body); - if type(messages) ~= 'table' then return false, 'reponse message invalide'; end + if type(messages) ~= 'table' then + return finish(false, 'reponse message invalide'); + end local decoded = findAssistantMessage(messages, messageId); local reply = decoded and extractTextParts(decoded.parts) or ''; if decoded and reply ~= '' and isMessageComplete(decoded) then - return true, { reply = reply, sessionId = sessionId, messageId = messageId }; + return finish(true, { reply = reply, sessionId = sessionId, messageId = messageId }); end if nowFunc() >= deadline then - return false, 'delai depasse en attendant la reponse AI'; + return finish(false, 'delai depasse en attendant la reponse AI'); end - sleepFunc(cfg.pollIntervalSeconds); + loop.setTimeout(attempt, cfg.pollIntervalSeconds); end + + loop.setTimeout(attempt, 0); + loop.runLoop(); + return resultOk, resultValue; end local function buildHeaders(cfg) diff --git a/docs/adrs/README.md b/docs/adrs/README.md index 3e466d6..70ee1d2 100644 --- a/docs/adrs/README.md +++ b/docs/adrs/README.md @@ -21,3 +21,4 @@ Future ADRs can reuse the shape of the existing files when it is useful. - [`adr-0011-git-hooks-own-commit-push-verification.md`](adr-0011-git-hooks-own-commit-push-verification.md) - Git hooks own commit/push verification. - [`adr-0012-headless-craftos-pc-as-hypothesis-probe.md`](adr-0012-headless-craftos-pc-as-hypothesis-probe.md) - Headless CraftOS-PC as the canonical hypothesis probe (rename `just craftos` → `just trapos`, add vanilla `just craftos`). - [`adr-0013-markdown-link-syntax-for-cross-references.md`](adr-0013-markdown-link-syntax-for-cross-references.md) - Cross-reference markdown files with `[]()` syntax (so lychee can validate them). +- [`adr-0014-prefer-eventloop-settimeout-over-os-sleep.md`](adr-0014-prefer-eventloop-settimeout-over-os-sleep.md) - Prefer `eventloop.setTimeout` over `os.sleep` in application code. diff --git a/docs/adrs/adr-0014-prefer-eventloop-settimeout-over-os-sleep.md b/docs/adrs/adr-0014-prefer-eventloop-settimeout-over-os-sleep.md new file mode 100644 index 0000000..7004a18 --- /dev/null +++ b/docs/adrs/adr-0014-prefer-eventloop-settimeout-over-os-sleep.md @@ -0,0 +1,87 @@ +# ADR 0014: Prefer `eventloop.setTimeout` Over `os.sleep` In Application Code + +## Status + +Accepted + +## Date + +2026-06-09 + +## Context + +[ADR-0002](adr-0002-use-eventloop-for-async-code.md) made [`apis/eventloop.lua`](../../apis/eventloop.lua) the +default substrate for async behavior. The eventloop drives a single +`os.pullEventRaw` loop and dispatches every event to registered handlers and +timer callbacks. + +`os.sleep` looks innocent but breaks that contract. Its CC:Tweaked +implementation yields the coroutine via `os.pullEvent("timer")`. While the +sleep is in flight: + +- The enclosing `os.pullEventRaw` of the eventloop is paused; nothing else + runs in that coroutine. +- Non-`timer` events that arrive are silently discarded by `os.pullEvent` — + so handlers registered through the eventloop miss them entirely. +- Even `eventloop.setTimeout` callbacks scheduled before the sleep cannot + fire until the sleep returns, because their timer events are consumed by + the sleep filter. + +This bit `apis/libai.lua` `pollMessage`, which used a sleep-based throttle +between HTTP polls. The function looked synchronous and stand-alone, but +the moment a caller invoked `libai.ask` from inside a handler — exactly the +composition [ADR-0002](adr-0002-use-eventloop-for-async-code.md) +encourages — the whole event loop froze for the duration of every poll +interval. + +[`apis/net.lua`](../../apis/net.lua) `sendRequest` already shows the right +pattern: create a private eventloop, schedule the wait through +`setTimeout`, then `runLoop` until the work resolves. From the caller's +perspective the function is still synchronous; internally, the dispatcher +of timer events stays alive and arbitrary other handlers can be composed +around it via `parallel.waitForAll`. + +## Decision + +In library, server, and program code that may run inside an eventloop +(directly or transitively), use `eventloop.setTimeout` for any waiting, +throttling, polling, or retry-with-delay. Libraries that need to temporize +must take an eventloop factory through their constructor (the way +`apis/net` does) rather than baking a hardcoded sleep call. + +`os.sleep` remains acceptable only in narrow cases: + +1. One-shot programs that are purely sequential and register no event + handlers — a `programs/foo.lua` that prints, sleeps, prints again, and + exits. +2. `parallel.waitForAny(task, function() sleep(t); end)` used as an + isolated guard to bound an inner task (e.g. the AI Lua-exec sandbox in + `apis/libai.lua` and the `parallel.waitForAny`-driven per-case timer in + `apis/libtest.lua`). The guard sleep is private to its own coroutine + group; it does not block anything external. +3. Tests that are themselves driven by `libtest`'s per-case timeout (see + [ADR-0009](adr-0009-layered-test-timeouts.md)). + +New code must not expose a `sleep` injection point on its constructor. If +a wait is needed, accept an `eventloop` factory and schedule through +`setTimeout`. Tests substitute a synchronous deterministic eventloop fake +the same way they substitute `http` or `settings`. + +## Consequences + +- Slightly more ceremony in "synchronous-looking" functions that wait: a + private eventloop plus a small `attempt`/`finish` pair. The benefit is + that the function composes cleanly with any caller's eventloop. +- Test fakes shift from a `sleep` stub to a synchronous eventloop double. + Ergonomics are comparable; the eventloop fake additionally lets tests + observe `pending` and `stopped` state, catching leaks the sleep stub + would have missed. +- Existing call sites are migrated opportunistically when they cause + observable bugs. The first migration is `apis/libai.lua`. + +## References + +- [ADR-0002](adr-0002-use-eventloop-for-async-code.md) — use eventloop for async code. +- [ADR-0009](adr-0009-layered-test-timeouts.md) — layered test timeouts (the `parallel.waitForAny` guard exception). +- [`apis/net.lua`](../../apis/net.lua) `sendRequest` — canonical private-eventloop pattern. +- [`apis/libai.lua`](../../apis/libai.lua) `pollMessage` — first migration. diff --git a/manifest.json b/manifest.json index d6fab76..332ca56 100644 --- a/manifest.json +++ b/manifest.json @@ -1,6 +1,6 @@ { "name": "TrapOS", - "version": "0.6.4", + "version": "0.7.0", "branch": "next", "packages": [ "trapos" diff --git a/packages/index.json b/packages/index.json index be34388..5fa9258 100644 --- a/packages/index.json +++ b/packages/index.json @@ -5,8 +5,8 @@ "trapos-boot": "0.2.2", "trapos-net": "0.2.1", "trapos-ui": "0.2.2", - "trapos-ai": "0.5.3", + "trapos-ai": "0.6.0", "trapos-sandbox": "0.1.0", - "trapos": "0.6.4" + "trapos": "0.7.0" } } diff --git a/packages/trapos-ai/ccpm.json b/packages/trapos-ai/ccpm.json index 559803d..21b76a3 100644 --- a/packages/trapos-ai/ccpm.json +++ b/packages/trapos-ai/ccpm.json @@ -1,6 +1,6 @@ { "name": "trapos-ai", - "version": "0.5.3", + "version": "0.6.0", "description": "TrapOS AI client for opencode serve", "dependencies": ["trapos-core"], "files": [ diff --git a/packages/trapos/ccpm.json b/packages/trapos/ccpm.json index cabb747..2f437aa 100644 --- a/packages/trapos/ccpm.json +++ b/packages/trapos/ccpm.json @@ -1,6 +1,6 @@ { "name": "trapos", - "version": "0.6.4", + "version": "0.7.0", "description": "TrapOS full install meta-package", "dependencies": ["trapos-boot", "trapos-net", "trapos-ui", "trapos-test", "trapos-ai"], "files": [], diff --git a/tests/ai.lua b/tests/ai.lua index 3bdf116..b2ca2c0 100644 --- a/tests/ai.lua +++ b/tests/ai.lua @@ -66,6 +66,41 @@ local function httpError(code, body) end; end +-- Synchronous deterministic eventloop double for tests. +-- setTimeout drains FIFO; runLoop runs until pending is empty or stopLoop fires. +-- Returns (factory, state). state.sleeps accumulates every delay passed across +-- all loops; state.lastLoop exposes the most recent loop for pending/stopped +-- assertions. +local function fakeEventloopFactory() + local state = { sleeps = {}, lastLoop = nil }; + local function factory() + local pending = {}; + local stopped = false; + local api = {}; + function api.setTimeout(fn, delay) + state.sleeps[#state.sleeps + 1] = delay; + pending[#pending + 1] = fn; + return function() end; + end + function api.runLoop() + stopped = false; + while not stopped and #pending > 0 do + local fn = table.remove(pending, 1); + fn(); + end + end + function api.stopLoop() stopped = true; end + function api.onStart(fn) fn(); end + function api.onStop() return function() end; end + function api.inspect() + return { pending = pending, stopped = stopped }; + end + state.lastLoop = api; + return api; + end + return factory, state; +end + local function sessionResp(id) return response(200, textutils.serializeJSON({ id = id, title = 'cc-ai' })); end @@ -307,12 +342,12 @@ testlib.test('ask polls async message until completion', function() } ); local settingsStub = fakeSettings({ ['opencc.server_url'] = 'http://host' }); - local sleeps = {}; + local elFactory, elState = fakeEventloopFactory(); local ai = createAi({ http = httpStub, settings = settingsStub, now = function() return 10; end, - sleep = function(n) sleeps[#sleeps + 1] = n; end, + eventloop = elFactory, }); local ok, result = ai.ask('hello', { messageId = 'msg_1', pollIntervalSeconds = 3 }); @@ -322,7 +357,9 @@ testlib.test('ask polls async message until completion', function() testlib.assertEquals(result.messageId, 'msg_1'); testlib.assertEquals(#httpStub.getCalls, 2); testlib.assertTrue(string.find(httpStub.getCalls[1].url, '/session/ses_1/message', 1, true) ~= nil); - testlib.assertEquals(sleeps[1], 3); + -- First setTimeout fires the initial attempt (delay 0); second waits the poll interval. + testlib.assertEquals(elState.sleeps[1], 0); + testlib.assertEquals(elState.sleeps[2], 3); end); testlib.test('ask polling times out', function() @@ -335,11 +372,22 @@ testlib.test('ask polling times out', function() ); local settingsStub = fakeSettings({ ['opencc.server_url'] = 'http://host' }); local now = 0; + local elFactory, elState = fakeEventloopFactory(); + -- Advance virtual time on every scheduled delay so the deadline is reached. + local advancingFactory = function() + local loop = elFactory(); + local origSet = loop.setTimeout; + loop.setTimeout = function(fn, delay) + now = now + (delay or 0); + return origSet(fn, delay); + end + return loop; + end; local ai = createAi({ http = httpStub, settings = settingsStub, now = function() return now; end, - sleep = function(n) now = now + n; end, + eventloop = advancingFactory, }); local ok, err = ai.ask('hello', { @@ -351,6 +399,112 @@ testlib.test('ask polling times out', function() testlib.assertTrue(not ok); testlib.assertTrue(string.find(err, 'delai depasse', 1, true) ~= nil); testlib.assertEquals(#httpStub.getCalls, 2); + testlib.assertTrue(elState.lastLoop.inspect().stopped); + testlib.assertEquals(#elState.lastLoop.inspect().pending, 0); +end); + +testlib.test('ask polling does not call os.sleep', function() + local httpStub = fakeHttp( + { sessionResp('ses_1'), asyncResp() }, + { + messageListResp({ userMessage('msg_1', 'hello'), assistantMessage('msg_2', 'partial', false) }), + messageListResp({ userMessage('msg_1', 'hello'), assistantMessage('msg_2', 'reply', true) }), + } + ); + local settingsStub = fakeSettings({ ['opencc.server_url'] = 'http://host' }); + local elFactory = fakeEventloopFactory(); + local originalSleep = _G.sleep; + local sleepCalls = 0; + _G.sleep = function(n) sleepCalls = sleepCalls + 1; originalSleep(n); end + local ai = createAi({ + http = httpStub, + settings = settingsStub, + now = function() return 10; end, + eventloop = elFactory, + }); + + local ok = ai.ask('hello', { messageId = 'msg_1', pollIntervalSeconds = 3 }); + _G.sleep = originalSleep; + + testlib.assertTrue(ok); + testlib.assertEquals(sleepCalls, 0); +end); + +testlib.test('pollMessage stops the private loop on success', function() + local httpStub = fakeHttp( + { sessionResp('ses_1'), asyncResp() }, + { + messageListResp({ userMessage('msg_1', 'hi'), assistantMessage('msg_2', 'reply', true) }), + } + ); + local settingsStub = fakeSettings({ ['opencc.server_url'] = 'http://host' }); + local elFactory, elState = fakeEventloopFactory(); + local ai = createAi({ + http = httpStub, + settings = settingsStub, + now = function() return 0; end, + eventloop = elFactory, + }); + + local ok = ai.ask('hi', { messageId = 'msg_1' }); + + testlib.assertTrue(ok); + testlib.assertTrue(elState.lastLoop.inspect().stopped); + testlib.assertEquals(#elState.lastLoop.inspect().pending, 0); +end); + +testlib.test('pollMessage stops cleanly on HTTP error mid-poll', function() + local httpStub = fakeHttp( + { sessionResp('ses_1'), asyncResp() }, + { + messageListResp({ userMessage('msg_1', 'hi'), assistantMessage('msg_2', 'partial', false) }), + httpError(500, '{}'), + } + ); + local settingsStub = fakeSettings({ ['opencc.server_url'] = 'http://host' }); + local elFactory, elState = fakeEventloopFactory(); + local ai = createAi({ + http = httpStub, + settings = settingsStub, + now = function() return 0; end, + eventloop = elFactory, + }); + + local ok, err = ai.ask('hi', { messageId = 'msg_1', pollIntervalSeconds = 1, pollTimeoutSeconds = 60 }); + + testlib.assertTrue(not ok); + testlib.assertTrue(string.find(err, 'HTTP 500', 1, true) ~= nil); + testlib.assertTrue(elState.lastLoop.inspect().stopped); + testlib.assertEquals(#elState.lastLoop.inspect().pending, 0); +end); + +testlib.test('pollMessage stops cleanly on 404 mid-poll', function() + local httpStub = fakeHttp( + { asyncResp() }, + { + messageListResp({ userMessage('msg_1', 'hi'), assistantMessage('msg_2', 'partial', false) }), + response(404, '{}'), + } + ); + local settingsStub = fakeSettings({ + ['opencc.server_url'] = 'http://host', + ['opencc.session_id'] = 'ses_1', + }); + local elFactory, elState = fakeEventloopFactory(); + local ai = createAi({ + http = httpStub, + settings = settingsStub, + now = function() return 0; end, + eventloop = elFactory, + }); + + local ok, err = ai.ask('hi', { messageId = 'msg_1', pollIntervalSeconds = 1, pollTimeoutSeconds = 60 }); + + testlib.assertTrue(not ok); + testlib.assertTrue(string.find(err, 'session introuvable', 1, true) ~= nil); + testlib.assertEquals(settingsStub.values['opencc.session_id'], nil); + testlib.assertTrue(elState.lastLoop.inspect().stopped); + testlib.assertEquals(#elState.lastLoop.inspect().pending, 0); end); testlib.test('ask rejects missing prompt without HTTP calls', function()