fix(ai): retry transient network timeouts during poll
A single poll GET that hit a network-level failure was treated as fatal, aborting the whole poll after the ~60s per-call timeout instead of retrying until poll_timeout. Network failures during polling are now transient: they log and reschedule until the deadline. HTTP 404 and other non-200 codes stay fatal.
This commit is contained in:
parent
79677e2742
commit
3b647090fa
@ -386,7 +386,13 @@ local function createAi(opts)
|
|||||||
local function attempt()
|
local function attempt()
|
||||||
attemptCount = attemptCount + 1;
|
attemptCount = attemptCount + 1;
|
||||||
local body, code = doGet(cfg, '/session/' .. sessionId .. '/message');
|
local body, code = doGet(cfg, '/session/' .. sessionId .. '/message');
|
||||||
if not body then return finish(false, code); end
|
if not body then
|
||||||
|
log('poll #' .. tostring(attemptCount) .. ': transient error: ' .. tostring(code));
|
||||||
|
if nowFunc() >= deadline then
|
||||||
|
return finish(false, code);
|
||||||
|
end
|
||||||
|
return loop.setTimeout(attempt, cfg.pollIntervalSeconds);
|
||||||
|
end
|
||||||
if code == 404 then
|
if code == 404 then
|
||||||
local ok, value = handleMissingSession(persist, sessionSettingKey);
|
local ok, value = handleMissingSession(persist, sessionSettingKey);
|
||||||
return finish(ok, value);
|
return finish(ok, value);
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "TrapOS",
|
"name": "TrapOS",
|
||||||
"version": "0.8.13",
|
"version": "0.8.14",
|
||||||
"branch": "next",
|
"branch": "next",
|
||||||
"packages": [
|
"packages": [
|
||||||
"trapos"
|
"trapos"
|
||||||
|
|||||||
@ -5,8 +5,8 @@
|
|||||||
"trapos-boot": "0.3.2",
|
"trapos-boot": "0.3.2",
|
||||||
"trapos-net": "0.3.0",
|
"trapos-net": "0.3.0",
|
||||||
"trapos-ui": "0.2.2",
|
"trapos-ui": "0.2.2",
|
||||||
"trapos-ai": "0.6.11",
|
"trapos-ai": "0.6.12",
|
||||||
"trapos-sandbox": "0.2.2",
|
"trapos-sandbox": "0.2.2",
|
||||||
"trapos": "0.8.13"
|
"trapos": "0.8.14"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "trapos-ai",
|
"name": "trapos-ai",
|
||||||
"version": "0.6.11",
|
"version": "0.6.12",
|
||||||
"description": "TrapOS AI client for opencode serve",
|
"description": "TrapOS AI client for opencode serve",
|
||||||
"dependencies": ["trapos-core"],
|
"dependencies": ["trapos-core"],
|
||||||
"files": [
|
"files": [
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "trapos",
|
"name": "trapos",
|
||||||
"version": "0.8.13",
|
"version": "0.8.14",
|
||||||
"description": "TrapOS full install meta-package",
|
"description": "TrapOS full install meta-package",
|
||||||
"dependencies": [
|
"dependencies": [
|
||||||
"trapos-boot",
|
"trapos-boot",
|
||||||
|
|||||||
67
tests/ai.lua
67
tests/ai.lua
@ -85,6 +85,14 @@ local function httpError(code, body)
|
|||||||
end;
|
end;
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- True network-level failure: no response handle at all (timeout / unreachable).
|
||||||
|
-- Drives callHttp's `not response` path -> 'serveur injoignable: <message>'.
|
||||||
|
local function httpTimeout(message)
|
||||||
|
return function()
|
||||||
|
return nil, message or 'Timed out';
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
|
||||||
-- Synchronous deterministic eventloop double for tests.
|
-- Synchronous deterministic eventloop double for tests.
|
||||||
-- setTimeout drains FIFO; runLoop runs until pending is empty or stopLoop fires.
|
-- setTimeout drains FIFO; runLoop runs until pending is empty or stopLoop fires.
|
||||||
-- Returns (factory, state). state.sleeps accumulates every delay passed across
|
-- Returns (factory, state). state.sleeps accumulates every delay passed across
|
||||||
@ -1023,6 +1031,65 @@ testlib.test('pollMessage stops the private loop on success', function()
|
|||||||
testlib.assertEquals(#elState.lastLoop.inspect().pending, 0);
|
testlib.assertEquals(#elState.lastLoop.inspect().pending, 0);
|
||||||
end);
|
end);
|
||||||
|
|
||||||
|
testlib.test('pollMessage retries transient network timeout then succeeds', function()
|
||||||
|
local httpStub = fakeHttp(
|
||||||
|
{ sessionResp('ses_1'), asyncResp() },
|
||||||
|
{
|
||||||
|
messageListResp({ userMessage('msg_1', 'hi'), assistantMessage('msg_2', 'partial', false) }),
|
||||||
|
httpTimeout('Timed out'),
|
||||||
|
messageListResp({ userMessage('msg_1', 'hi'), assistantMessage('msg_2', 'reply', true) }),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
local settingsStub = fakeAsyncSettings();
|
||||||
|
local elFactory, elState = fakeEventloopFactory();
|
||||||
|
local ai = createAi({
|
||||||
|
http = httpStub,
|
||||||
|
settings = settingsStub,
|
||||||
|
now = function() return 0; end,
|
||||||
|
eventloop = elFactory,
|
||||||
|
});
|
||||||
|
|
||||||
|
local ok, result = ai.ask('hi', { messageId = 'msg_1', pollIntervalSeconds = 1, pollTimeoutSeconds = 60 });
|
||||||
|
|
||||||
|
testlib.assertTrue(ok);
|
||||||
|
testlib.assertEquals(result.reply, 'reply');
|
||||||
|
testlib.assertEquals(#httpStub.getCalls, 3);
|
||||||
|
testlib.assertTrue(elState.lastLoop.inspect().stopped);
|
||||||
|
testlib.assertEquals(#elState.lastLoop.inspect().pending, 0);
|
||||||
|
end);
|
||||||
|
|
||||||
|
testlib.test('pollMessage fails on persistent timeout only after deadline', function()
|
||||||
|
local httpStub = fakeHttp(
|
||||||
|
{ sessionResp('ses_1'), asyncResp() },
|
||||||
|
{
|
||||||
|
httpTimeout('Timed out'),
|
||||||
|
httpTimeout('Timed out'),
|
||||||
|
httpTimeout('Timed out'),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
local settingsStub = fakeAsyncSettings();
|
||||||
|
local elFactory, elState = fakeEventloopFactory();
|
||||||
|
local clock = 0;
|
||||||
|
local ai = createAi({
|
||||||
|
http = httpStub,
|
||||||
|
settings = settingsStub,
|
||||||
|
now = function()
|
||||||
|
local t = clock;
|
||||||
|
clock = clock + 30;
|
||||||
|
return t;
|
||||||
|
end,
|
||||||
|
eventloop = elFactory,
|
||||||
|
});
|
||||||
|
|
||||||
|
local ok, err = ai.ask('hi', { messageId = 'msg_1', pollIntervalSeconds = 1, pollTimeoutSeconds = 60 });
|
||||||
|
|
||||||
|
testlib.assertTrue(not ok);
|
||||||
|
testlib.assertTrue(string.find(err, 'injoignable', 1, true) ~= nil);
|
||||||
|
testlib.assertTrue(#httpStub.getCalls > 1);
|
||||||
|
testlib.assertTrue(elState.lastLoop.inspect().stopped);
|
||||||
|
testlib.assertEquals(#elState.lastLoop.inspect().pending, 0);
|
||||||
|
end);
|
||||||
|
|
||||||
testlib.test('pollMessage stops cleanly on HTTP error mid-poll', function()
|
testlib.test('pollMessage stops cleanly on HTTP error mid-poll', function()
|
||||||
local httpStub = fakeHttp(
|
local httpStub = fakeHttp(
|
||||||
{ sessionResp('ses_1'), asyncResp() },
|
{ sessionResp('ses_1'), asyncResp() },
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user