fix(ai): retry transient network timeouts during poll
A single poll GET that hit a network-level failure was treated as fatal, aborting the whole poll after the ~60s per-call timeout instead of retrying until poll_timeout. Network failures during polling are now transient: they log and reschedule until the deadline. HTTP 404 and other non-200 codes stay fatal.
This commit is contained in:
parent
79677e2742
commit
3b647090fa
@ -386,7 +386,13 @@ local function createAi(opts)
|
||||
local function attempt()
|
||||
attemptCount = attemptCount + 1;
|
||||
local body, code = doGet(cfg, '/session/' .. sessionId .. '/message');
|
||||
if not body then return finish(false, code); end
|
||||
if not body then
|
||||
log('poll #' .. tostring(attemptCount) .. ': transient error: ' .. tostring(code));
|
||||
if nowFunc() >= deadline then
|
||||
return finish(false, code);
|
||||
end
|
||||
return loop.setTimeout(attempt, cfg.pollIntervalSeconds);
|
||||
end
|
||||
if code == 404 then
|
||||
local ok, value = handleMissingSession(persist, sessionSettingKey);
|
||||
return finish(ok, value);
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "TrapOS",
|
||||
"version": "0.8.13",
|
||||
"version": "0.8.14",
|
||||
"branch": "next",
|
||||
"packages": [
|
||||
"trapos"
|
||||
|
||||
@ -5,8 +5,8 @@
|
||||
"trapos-boot": "0.3.2",
|
||||
"trapos-net": "0.3.0",
|
||||
"trapos-ui": "0.2.2",
|
||||
"trapos-ai": "0.6.11",
|
||||
"trapos-ai": "0.6.12",
|
||||
"trapos-sandbox": "0.2.2",
|
||||
"trapos": "0.8.13"
|
||||
"trapos": "0.8.14"
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "trapos-ai",
|
||||
"version": "0.6.11",
|
||||
"version": "0.6.12",
|
||||
"description": "TrapOS AI client for opencode serve",
|
||||
"dependencies": ["trapos-core"],
|
||||
"files": [
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "trapos",
|
||||
"version": "0.8.13",
|
||||
"version": "0.8.14",
|
||||
"description": "TrapOS full install meta-package",
|
||||
"dependencies": [
|
||||
"trapos-boot",
|
||||
|
||||
67
tests/ai.lua
67
tests/ai.lua
@ -85,6 +85,14 @@ local function httpError(code, body)
|
||||
end;
|
||||
end
|
||||
|
||||
-- True network-level failure: no response handle at all (timeout / unreachable).
|
||||
-- Drives callHttp's `not response` path -> 'serveur injoignable: <message>'.
|
||||
local function httpTimeout(message)
|
||||
return function()
|
||||
return nil, message or 'Timed out';
|
||||
end;
|
||||
end
|
||||
|
||||
-- Synchronous deterministic eventloop double for tests.
|
||||
-- setTimeout drains FIFO; runLoop runs until pending is empty or stopLoop fires.
|
||||
-- Returns (factory, state). state.sleeps accumulates every delay passed across
|
||||
@ -1023,6 +1031,65 @@ testlib.test('pollMessage stops the private loop on success', function()
|
||||
testlib.assertEquals(#elState.lastLoop.inspect().pending, 0);
|
||||
end);
|
||||
|
||||
testlib.test('pollMessage retries transient network timeout then succeeds', function()
|
||||
local httpStub = fakeHttp(
|
||||
{ sessionResp('ses_1'), asyncResp() },
|
||||
{
|
||||
messageListResp({ userMessage('msg_1', 'hi'), assistantMessage('msg_2', 'partial', false) }),
|
||||
httpTimeout('Timed out'),
|
||||
messageListResp({ userMessage('msg_1', 'hi'), assistantMessage('msg_2', 'reply', true) }),
|
||||
}
|
||||
);
|
||||
local settingsStub = fakeAsyncSettings();
|
||||
local elFactory, elState = fakeEventloopFactory();
|
||||
local ai = createAi({
|
||||
http = httpStub,
|
||||
settings = settingsStub,
|
||||
now = function() return 0; end,
|
||||
eventloop = elFactory,
|
||||
});
|
||||
|
||||
local ok, result = ai.ask('hi', { messageId = 'msg_1', pollIntervalSeconds = 1, pollTimeoutSeconds = 60 });
|
||||
|
||||
testlib.assertTrue(ok);
|
||||
testlib.assertEquals(result.reply, 'reply');
|
||||
testlib.assertEquals(#httpStub.getCalls, 3);
|
||||
testlib.assertTrue(elState.lastLoop.inspect().stopped);
|
||||
testlib.assertEquals(#elState.lastLoop.inspect().pending, 0);
|
||||
end);
|
||||
|
||||
testlib.test('pollMessage fails on persistent timeout only after deadline', function()
|
||||
local httpStub = fakeHttp(
|
||||
{ sessionResp('ses_1'), asyncResp() },
|
||||
{
|
||||
httpTimeout('Timed out'),
|
||||
httpTimeout('Timed out'),
|
||||
httpTimeout('Timed out'),
|
||||
}
|
||||
);
|
||||
local settingsStub = fakeAsyncSettings();
|
||||
local elFactory, elState = fakeEventloopFactory();
|
||||
local clock = 0;
|
||||
local ai = createAi({
|
||||
http = httpStub,
|
||||
settings = settingsStub,
|
||||
now = function()
|
||||
local t = clock;
|
||||
clock = clock + 30;
|
||||
return t;
|
||||
end,
|
||||
eventloop = elFactory,
|
||||
});
|
||||
|
||||
local ok, err = ai.ask('hi', { messageId = 'msg_1', pollIntervalSeconds = 1, pollTimeoutSeconds = 60 });
|
||||
|
||||
testlib.assertTrue(not ok);
|
||||
testlib.assertTrue(string.find(err, 'injoignable', 1, true) ~= nil);
|
||||
testlib.assertTrue(#httpStub.getCalls > 1);
|
||||
testlib.assertTrue(elState.lastLoop.inspect().stopped);
|
||||
testlib.assertEquals(#elState.lastLoop.inspect().pending, 0);
|
||||
end);
|
||||
|
||||
testlib.test('pollMessage stops cleanly on HTTP error mid-poll', function()
|
||||
local httpStub = fakeHttp(
|
||||
{ sessionResp('ses_1'), asyncResp() },
|
||||
|
||||
Loading…
Reference in New Issue
Block a user