chore: test deploy snapshot

This commit is contained in:
2026-05-27 10:43:01 +09:00
parent c1d0f4c1db
commit 4c4b3c8d2c
78 changed files with 10392 additions and 2301 deletions

View File

@@ -1,11 +1,16 @@
import { execFile, spawn } from 'node:child_process';
import fs from 'node:fs';
import http from 'node:http';
import { readFile, rm, stat } from 'node:fs/promises';
import { mkdir, open, readFile, rm, stat } from 'node:fs/promises';
import path from 'node:path';
import { promisify } from 'node:util';
import { env } from '../config/env.js';
import { resolveMainProjectRoot } from './main-project-root-service.js';
import {
readTestServerDeploymentState,
startTestServerDeployment,
type TestServerDeploymentSnapshot,
} from './test-server-deployment-service.js';
import {
getRuntimeWorkServerBuildInfo,
readLatestWorkServerBuildInfo,
@@ -66,12 +71,21 @@ export type ServerCommandSnapshot = {
commandScript: string;
commandWorkingDirectory: string;
errorMessage: string | null;
deployment: WorkServerDeploymentSnapshot | null;
};
export type ServerCommandRestartResult = {
server: ServerCommandSnapshot;
commandOutput: string | null;
restartState: 'completed' | 'accepted';
deployment?: WorkServerDeploymentSnapshot | null;
testDeployment?: TestServerDeploymentSnapshot | null;
};
type ServerCommandScriptExecutionOptions = {
commandScript?: string;
environment?: Record<string, string>;
timeoutMs?: number;
};
type ExecFileFailure = Error & {
@@ -119,6 +133,56 @@ type BuildInspectionResult = {
type WorkServerSlot = 'blue' | 'green';
export type WorkServerDeploymentStepKey =
| 'build-target-slot'
| 'verify-target-health'
| 'switch-proxy'
| 'drain-previous-slot'
| 'rebuild-previous-slot'
| 'recover-interrupted-chat';
export type WorkServerDeploymentStepStatus = 'pending' | 'running' | 'completed' | 'failed';
export type WorkServerDeploymentStepSnapshot = {
key: WorkServerDeploymentStepKey;
status: WorkServerDeploymentStepStatus;
detail: string | null;
updatedAt: string | null;
};
export type WorkServerDeploymentPhase =
| 'idle'
| 'build-target-slot'
| 'verify-target-health'
| 'switch-proxy'
| 'drain-previous-slot'
| 'rebuild-previous-slot'
| 'recover-interrupted-chat'
| 'completed'
| 'failed';
export type WorkServerDeploymentSnapshot = {
status: 'idle' | 'running' | 'completed' | 'failed';
phase: WorkServerDeploymentPhase;
summary: string | null;
startedAt: string | null;
updatedAt: string | null;
completedAt: string | null;
activeSlot: WorkServerSlot | null;
targetSlot: WorkServerSlot | null;
previousSlot: WorkServerSlot | null;
targetContainer: string | null;
previousContainer: string | null;
previousSlotActiveChatRequestCount: number | null;
previousSlotQueuedChatRequestCount: number | null;
recoveredSessionCount: number | null;
recoveredRestartedCount: number | null;
recoveredRequeuedCount: number | null;
lastError: string | null;
logExcerpt: string | null;
steps: WorkServerDeploymentStepSnapshot[];
};
const RUNNER_HEARTBEAT_FRESHNESS_MS = 30_000;
const DEFERRED_RESTART_DELAY_MS = 2_000;
const DEFERRED_RESTART_CONFIRM_TIMEOUT_MS = 4_500;
@@ -141,6 +205,35 @@ const APP_BUILD_INFO_FILE_CANDIDATES = [
const APP_BUILD_STAMP_RELATIVE_PATH = '.server-command-test-app-built-at';
const APP_SOURCE_EXCLUDED_PREFIXES = ['public/.codex_chat/'] as const;
const APP_SOURCE_EXCLUDED_FILE_PATTERNS = [/\.test\.[^/]+$/i, /\.spec\.[^/]+$/i] as const;
const WORK_SERVER_RESTART_LOCK_STALE_MS = 20 * 60 * 1000;
type WorkServerRestartLockPayload = {
startedAt: string;
key: ServerCommandKey;
pid: number;
};
type WorkServerDeploymentStateFilePayload = {
status?: unknown;
phase?: unknown;
summary?: unknown;
startedAt?: unknown;
updatedAt?: unknown;
completedAt?: unknown;
activeSlot?: unknown;
targetSlot?: unknown;
previousSlot?: unknown;
targetContainer?: unknown;
previousContainer?: unknown;
previousSlotActiveChatRequestCount?: unknown;
previousSlotQueuedChatRequestCount?: unknown;
recoveredSessionCount?: unknown;
recoveredRestartedCount?: unknown;
recoveredRequeuedCount?: unknown;
lastError?: unknown;
logExcerpt?: unknown;
steps?: unknown;
};
export async function readAppBuildTimestamp(definition: ServerDefinition, options?: { allowLocal?: boolean }) {
const allowLocal = options?.allowLocal ?? false;
@@ -642,8 +735,8 @@ function getServerDefinitions(): ServerDefinition[] {
return [
{
key: 'test',
label: 'TEST',
summary: '메인 프로젝트의 테스트 앱 컨테이너',
label: 'PREVIEW',
summary: 'preview.sm-home.cloud 테스트 앱 컨테이너',
environment: 'test',
publicUrl: normalizeUrl(env.SERVER_COMMAND_TEST_URL),
checkUrl: normalizeUrl(env.SERVER_COMMAND_TEST_CHECK_URL || env.SERVER_COMMAND_TEST_URL),
@@ -751,6 +844,25 @@ function getServerDefinition(key: ServerCommandKey) {
return definition;
}
async function executeServerCommandScript(
definition: ServerDefinition,
options: ServerCommandScriptExecutionOptions = {},
) {
const commandScript = options.commandScript ?? definition.commandScript;
const timeoutMs = options.timeoutMs ?? 30000;
return execFileAsync('sh', [commandScript], {
cwd: definition.commandWorkingDirectory,
timeout: timeoutMs,
maxBuffer: 1024 * 1024,
env: {
...process.env,
...definition.commandEnvironment,
...options.environment,
},
});
}
function trimPreview(value: string | null | undefined, maxLength = 220) {
const normalized = value?.replace(/\s+/g, ' ').trim() ?? '';
@@ -772,6 +884,209 @@ function normalizeDateTimeValue(value: string | null | undefined) {
return Number.isNaN(parsed.getTime()) ? null : parsed.toISOString();
}
function getWorkServerRestartLockPath() {
return path.join(resolveMainProjectRoot(), "etc", "servers", "work-server", ".docker", "runtime", "restart-in-progress.json");
}
function getWorkServerDeploymentStatePath() {
return path.join(resolveMainProjectRoot(), 'etc', 'servers', 'work-server', '.docker', 'runtime', 'deployment-state.json');
}
const WORK_SERVER_DEPLOYMENT_STEP_KEYS: WorkServerDeploymentStepKey[] = [
'build-target-slot',
'verify-target-health',
'switch-proxy',
'drain-previous-slot',
'rebuild-previous-slot',
'recover-interrupted-chat',
];
function normalizeWorkServerDeploymentStepKey(value: unknown): WorkServerDeploymentStepKey | null {
return WORK_SERVER_DEPLOYMENT_STEP_KEYS.includes(value as WorkServerDeploymentStepKey)
? (value as WorkServerDeploymentStepKey)
: null;
}
function normalizeWorkServerSlotValue(value: unknown): WorkServerSlot | null {
return value === 'blue' || value === 'green' ? value : null;
}
function normalizeWorkServerDeploymentPhase(value: unknown): WorkServerDeploymentPhase {
return value === 'build-target-slot'
|| value === 'verify-target-health'
|| value === 'switch-proxy'
|| value === 'drain-previous-slot'
|| value === 'rebuild-previous-slot'
|| value === 'recover-interrupted-chat'
|| value === 'completed'
|| value === 'failed'
? value
: 'idle';
}
function normalizeWorkServerDeploymentStatus(value: unknown): WorkServerDeploymentSnapshot['status'] {
return value === 'running' || value === 'completed' || value === 'failed' ? value : 'idle';
}
function normalizeNumberOrNull(value: unknown) {
return typeof value === 'number' && Number.isFinite(value) ? value : null;
}
function buildEmptyWorkServerDeploymentSnapshot(): WorkServerDeploymentSnapshot {
return {
status: 'idle',
phase: 'idle',
summary: null,
startedAt: null,
updatedAt: null,
completedAt: null,
activeSlot: null,
targetSlot: null,
previousSlot: null,
targetContainer: null,
previousContainer: null,
previousSlotActiveChatRequestCount: null,
previousSlotQueuedChatRequestCount: null,
recoveredSessionCount: null,
recoveredRestartedCount: null,
recoveredRequeuedCount: null,
lastError: null,
logExcerpt: null,
steps: WORK_SERVER_DEPLOYMENT_STEP_KEYS.map((key) => ({
key,
status: 'pending',
detail: null,
updatedAt: null,
})),
};
}
function normalizeWorkServerDeploymentSteps(value: unknown) {
const fallback = buildEmptyWorkServerDeploymentSnapshot().steps;
if (!Array.isArray(value)) {
return fallback;
}
const normalizedByKey = new Map<WorkServerDeploymentStepKey, WorkServerDeploymentStepSnapshot>();
value.forEach((item) => {
if (!item || typeof item !== 'object') {
return;
}
const candidate = item as Record<string, unknown>;
const key = normalizeWorkServerDeploymentStepKey(candidate.key);
if (!key) {
return;
}
const status =
candidate.status === 'running'
|| candidate.status === 'completed'
|| candidate.status === 'failed'
|| candidate.status === 'pending'
? candidate.status
: 'pending';
normalizedByKey.set(key, {
key,
status,
detail: typeof candidate.detail === 'string' ? candidate.detail : null,
updatedAt: normalizeDateTimeValue(typeof candidate.updatedAt === 'string' ? candidate.updatedAt : null),
});
});
return WORK_SERVER_DEPLOYMENT_STEP_KEYS.map((key) => normalizedByKey.get(key) ?? fallback.find((item) => item.key === key)!);
}
function normalizeWorkServerDeploymentSnapshot(value: unknown): WorkServerDeploymentSnapshot {
if (!value || typeof value !== 'object') {
return buildEmptyWorkServerDeploymentSnapshot();
}
const candidate = value as WorkServerDeploymentStateFilePayload;
return {
status: normalizeWorkServerDeploymentStatus(candidate.status),
phase: normalizeWorkServerDeploymentPhase(candidate.phase),
summary: typeof candidate.summary === 'string' ? candidate.summary : null,
startedAt: normalizeDateTimeValue(typeof candidate.startedAt === 'string' ? candidate.startedAt : null),
updatedAt: normalizeDateTimeValue(typeof candidate.updatedAt === 'string' ? candidate.updatedAt : null),
completedAt: normalizeDateTimeValue(typeof candidate.completedAt === 'string' ? candidate.completedAt : null),
activeSlot: normalizeWorkServerSlotValue(candidate.activeSlot),
targetSlot: normalizeWorkServerSlotValue(candidate.targetSlot),
previousSlot: normalizeWorkServerSlotValue(candidate.previousSlot),
targetContainer: typeof candidate.targetContainer === 'string' ? candidate.targetContainer : null,
previousContainer: typeof candidate.previousContainer === 'string' ? candidate.previousContainer : null,
previousSlotActiveChatRequestCount: normalizeNumberOrNull(candidate.previousSlotActiveChatRequestCount),
previousSlotQueuedChatRequestCount: normalizeNumberOrNull(candidate.previousSlotQueuedChatRequestCount),
recoveredSessionCount: normalizeNumberOrNull(candidate.recoveredSessionCount),
recoveredRestartedCount: normalizeNumberOrNull(candidate.recoveredRestartedCount),
recoveredRequeuedCount: normalizeNumberOrNull(candidate.recoveredRequeuedCount),
lastError: typeof candidate.lastError === 'string' ? candidate.lastError : null,
logExcerpt: typeof candidate.logExcerpt === 'string' ? candidate.logExcerpt : null,
steps: normalizeWorkServerDeploymentSteps(candidate.steps),
};
}
export async function readWorkServerDeploymentState(): Promise<WorkServerDeploymentSnapshot | null> {
try {
const raw = await readFile(getWorkServerDeploymentStatePath(), 'utf8');
return normalizeWorkServerDeploymentSnapshot(JSON.parse(raw));
} catch {
return null;
}
}
async function acquireWorkServerRestartLock() {
const lockPath = getWorkServerRestartLockPath();
await mkdir(path.dirname(lockPath), { recursive: true });
const startedAt = new Date().toISOString();
try {
const handle = await open(lockPath, "wx");
try {
await handle.writeFile(JSON.stringify({ startedAt, key: "work-server", pid: process.pid }) + "\n", "utf8");
} finally {
await handle.close();
}
return lockPath;
} catch (error) {
if ((error as NodeJS.ErrnoException).code !== "EEXIST") {
throw error;
}
let existingStartedAt: string | null = null;
try {
const raw = await readFile(lockPath, "utf8");
const parsed = JSON.parse(raw) as Partial<WorkServerRestartLockPayload>;
existingStartedAt = normalizeDateTimeValue(typeof parsed.startedAt === "string" ? parsed.startedAt : null);
const lockStat = await stat(lockPath).catch(() => null);
const freshnessSource = existingStartedAt ?? normalizeDateTimeValue(lockStat?.mtime.toISOString() ?? null);
if (!freshnessSource || Date.now() - Date.parse(freshnessSource) > WORK_SERVER_RESTART_LOCK_STALE_MS) {
await rm(lockPath, { force: true }).catch(() => undefined);
return acquireWorkServerRestartLock();
}
} catch {
// ignore read failures and keep conflict response below
}
const conflictError = new Error(
existingStartedAt
? "WORK-SERVER 무중단 재기동이 이미 진행 중입니다. 시작 시각 " + existingStartedAt
: "WORK-SERVER 무중단 재기동이 이미 진행 중입니다.",
);
(conflictError as Error & { statusCode?: number }).statusCode = 409;
throw conflictError;
}
}
function buildRestartCommandPreview(definition: ServerDefinition) {
return `sh ${definition.commandScript}`;
}
@@ -817,6 +1132,7 @@ function buildAcceptedRestartSnapshot(definition: ServerDefinition): ServerComma
commandScript: definition.commandScript,
commandWorkingDirectory: definition.commandWorkingDirectory,
errorMessage: null,
deployment: definition.key === 'work-server' ? buildEmptyWorkServerDeploymentSnapshot() : null,
};
}
@@ -969,6 +1285,7 @@ async function waitForDeferredRestartResult(
async function restartServerCommandDeferred(definition: ServerDefinition): Promise<ServerCommandRestartResult> {
const { logPath, statusPath } = buildDeferredRestartProbePaths(definition);
const workServerLockPath = definition.key === "work-server" ? await acquireWorkServerRestartLock() : null;
const shellCommand = [
`sleep ${Math.ceil(DEFERRED_RESTART_DELAY_MS / 1000)}`,
`sh ${JSON.stringify(definition.commandScript)} >${JSON.stringify(logPath)} 2>&1`,
@@ -976,7 +1293,8 @@ async function restartServerCommandDeferred(definition: ServerDefinition): Promi
`printf '%s' \"$status\" >${JSON.stringify(statusPath)}`,
].join('; ');
await new Promise<void>((resolve, reject) => {
try {
await new Promise<void>((resolve, reject) => {
const child = spawn('sh', ['-c', shellCommand], {
cwd: definition.commandWorkingDirectory,
detached: true,
@@ -984,21 +1302,30 @@ async function restartServerCommandDeferred(definition: ServerDefinition): Promi
env: {
...process.env,
...definition.commandEnvironment,
...(workServerLockPath ? { WORK_SERVER_RESTART_LOCK_FILE: workServerLockPath } : {}),
},
});
child.once('error', reject);
child.once('spawn', () => {
child.unref();
resolve();
child.once('spawn', () => {
child.unref();
resolve();
});
});
});
} catch (error) {
if (workServerLockPath) {
await rm(workServerLockPath, { force: true }).catch(() => undefined);
}
throw error;
}
if (definition.deferredResponseMode === 'accept-immediately') {
return {
server: buildAcceptedRestartSnapshot(definition),
commandOutput: `${definition.label} 재기동 요청을 접수했습니다. 잠시 후 상태를 다시 확인해 주세요.`,
restartState: 'accepted',
deployment: definition.key === 'work-server' ? await readWorkServerDeploymentState() : null,
};
}
@@ -1008,6 +1335,7 @@ async function restartServerCommandDeferred(definition: ServerDefinition): Promi
server: buildAcceptedRestartSnapshot(definition),
commandOutput: commandOutput ?? `${definition.label} 재기동 요청을 접수했습니다. 잠시 후 상태를 다시 확인해 주세요.`,
restartState: 'accepted',
deployment: definition.key === 'work-server' ? await readWorkServerDeploymentState() : null,
};
}
@@ -1463,7 +1791,12 @@ async function inspectBuild(definition: ServerDefinition): Promise<BuildInspecti
? !latestBuild?.builtAt || latestSourceChangedAt > latestBuild.builtAt
: false;
const updateAvailable =
Boolean(runningBuild?.buildId) && Boolean(latestBuild?.buildId) && runningBuild?.buildId !== latestBuild?.buildId;
!buildRequired &&
Boolean(runningBuild?.builtAt) &&
Boolean(latestBuild?.builtAt) &&
Boolean(latestSourceChangedAt) &&
runningBuild!.builtAt < latestBuild!.builtAt &&
runningBuild!.builtAt < latestSourceChangedAt!;
return {
runningVersion: runningBuild?.buildId ?? null,
@@ -1519,6 +1852,7 @@ async function checkServer(definition: ServerDefinition): Promise<ServerCommandS
const runtimeInfo = await inspectRuntime(definition);
const buildInfo = await inspectBuild(definition);
const deployment = definition.key === 'work-server' ? await readWorkServerDeploymentState() : null;
const fallbackAttempt = selectedAttempt.url !== definition.checkUrl ? `fallback health check succeeded via ${selectedAttempt.url}` : null;
const collectedErrors = attempts
.filter((attempt) => attempt.errorMessage)
@@ -1557,12 +1891,26 @@ async function checkServer(definition: ServerDefinition): Promise<ServerCommandS
updateAvailable: buildInfo.updateAvailable,
updateSummary: buildInfo.updateSummary,
responseTimeMs: Date.now() - startedAt,
composeStatus: runtimeInfo.composeStatus,
composeDetails: runtimeInfo.composeDetails,
composeStatus:
definition.key === 'work-server' && deployment?.status === 'running'
? 'deploying'
: runtimeInfo.composeStatus,
composeDetails:
definition.key === 'work-server' && deployment
? appendComposeDetails([
runtimeInfo.composeDetails,
deployment.status !== 'idle'
? `deploy:${deployment.status}${deployment.targetSlot ? `:${deployment.targetSlot}` : ''}`
: null,
])
: runtimeInfo.composeDetails,
lastCommand: buildRestartCommandPreview(definition),
commandScript: definition.commandScript,
commandWorkingDirectory: definition.commandWorkingDirectory,
errorMessage,
errorMessage: deployment?.status === 'failed' && deployment.lastError
? trimPreview([deployment.lastError, errorMessage].filter(Boolean).join(' | '), 400)
: errorMessage,
deployment,
};
}
@@ -1591,15 +1939,7 @@ export async function restartServerCommand(key: ServerCommandKey): Promise<Serve
}
try {
const commandResult = await execFileAsync('sh', [definition.commandScript], {
cwd: definition.commandWorkingDirectory,
timeout: 30000,
maxBuffer: 1024 * 1024,
env: {
...process.env,
...definition.commandEnvironment,
},
});
const commandResult = await executeServerCommandScript(definition);
stdout = commandResult.stdout;
stderr = commandResult.stderr;
} catch (error) {
@@ -1626,5 +1966,23 @@ export async function restartServerCommand(key: ServerCommandKey): Promise<Serve
server,
commandOutput: trimPreview([stdout, stderr].filter(Boolean).join('\n'), 400),
restartState: 'completed',
deployment: server.deployment,
};
}
export async function deployWorkServerCommand(): Promise<ServerCommandRestartResult> {
return restartServerCommand('work-server');
}
export async function deployTestServerCommand(): Promise<ServerCommandRestartResult> {
const testDefinition = getServerDefinition('test');
const testDeployment = await startTestServerDeployment();
const server = await checkServer(testDefinition);
return {
server,
commandOutput: 'TEST 배포를 시작했습니다. origin/main 푸시, 테스트 빌드, 테스트 배포 과정을 확인합니다.',
restartState: 'accepted',
testDeployment: testDeployment ?? (await readTestServerDeploymentState()),
};
}