第12章:测试、部署和运维
2025/9/1大约 12 分钟
第12章:测试、部署和运维
学习目标
- 建立完整的测试体系(单元测试、集成测试)
- 掌握MCP Server的打包和部署策略
- 学习容器化部署和编排
- 实现监控、告警和故障恢复
- 掌握版本管理和持续集成/部署
12.1 测试体系建设
12.1.1 单元测试框架
// tests/unit/tools.test.ts
import { describe, it, expect, beforeEach, afterEach, jest } from '@jest/globals';
import { FileSystemMCPServer } from '../../src/projects/filesystem/FileSystemServer';
describe('FileSystem Tools', () => {
let server: FileSystemMCPServer;
let mockFs: any;
beforeEach(async () => {
// 模拟文件系统
mockFs = {
readFile: jest.fn(),
writeFile: jest.fn(),
readdir: jest.fn(),
stat: jest.fn(),
};
// 创建测试服务器实例
const config = {
server: { name: 'test-server', version: '1.0.0' },
capabilities: { tools: {} },
runtime: {},
security: {},
};
server = new FileSystemMCPServer(config, ['/tmp/test']);
await server.start();
});
afterEach(async () => {
await server.stop();
jest.clearAllMocks();
});
describe('fs_read_file', () => {
it('should read file successfully', async () => {
// 准备测试数据
const testContent = 'Hello, World!';
mockFs.readFile.mockResolvedValue(testContent);
mockFs.stat.mockResolvedValue({
size: testContent.length,
mtime: new Date(),
});
// 执行测试
const result = await server.handleToolCall('fs_read_file', {
path: '/tmp/test/example.txt'
});
// 验证结果
expect(result).toHaveProperty('content', testContent);
expect(result).toHaveProperty('metadata');
expect(mockFs.readFile).toHaveBeenCalledWith('/tmp/test/example.txt', 'utf8');
});
it('should handle file not found error', async () => {
mockFs.readFile.mockRejectedValue(new Error('ENOENT: no such file or directory'));
await expect(
server.handleToolCall('fs_read_file', {
path: '/tmp/test/nonexistent.txt'
})
).rejects.toThrow('Failed to read file');
});
it('should validate file path permissions', async () => {
await expect(
server.handleToolCall('fs_read_file', {
path: '/etc/passwd'
})
).rejects.toThrow('Access denied');
});
});
describe('fs_search_files', () => {
it('should search files by pattern', async () => {
const mockFiles = [
'/tmp/test/file1.js',
'/tmp/test/file2.ts',
'/tmp/test/subdir/file3.js'
];
jest.doMock('glob', () => ({
sync: jest.fn().mockReturnValue(mockFiles)
}));
const result = await server.handleToolCall('fs_search_files', {
path: '/tmp/test',
pattern: '**/*.js'
});
expect(result.results).toHaveLength(2);
expect(result.results[0].path).toMatch(/\.js$/);
});
});
});
// tests/unit/database.test.ts
describe('Database Tools', () => {
let server: DatabaseMCPServer;
let mockConnection: any;
beforeEach(async () => {
mockConnection = {
query: jest.fn(),
execute: jest.fn(),
connect: jest.fn().mockResolvedValue(mockConnection),
end: jest.fn(),
};
const dbConfig = {
type: 'postgresql' as const,
connection: {
host: 'localhost',
database: 'test',
username: 'test',
password: 'test'
}
};
server = new DatabaseMCPServer({
server: { name: 'db-server', version: '1.0.0' },
capabilities: { tools: {} },
runtime: {},
security: {},
}, dbConfig);
// 注入模拟连接
(server as any).connection = mockConnection;
});
describe('db_query', () => {
it('should execute SELECT query successfully', async () => {
const mockResult = {
rows: [{ id: 1, name: 'Test User' }],
rowCount: 1,
fields: [{ name: 'id' }, { name: 'name' }]
};
mockConnection.query.mockResolvedValue(mockResult);
const result = await server.handleToolCall('db_query', {
sql: 'SELECT * FROM users WHERE id = $1',
params: ['1']
});
expect(result.rows).toEqual(mockResult.rows);
expect(result.rowCount).toBe(1);
expect(mockConnection.query).toHaveBeenCalledWith(
'SELECT * FROM users WHERE id = $1 LIMIT 100',
['1']
);
});
it('should handle SQL syntax errors', async () => {
mockConnection.query.mockRejectedValue(
new Error('syntax error at or near "SELEC"')
);
await expect(
server.handleToolCall('db_query', {
sql: 'SELEC * FROM users'
})
).rejects.toThrow('Query execution failed');
});
it('should use query cache', async () => {
const mockResult = { rows: [{ count: 5 }], rowCount: 1 };
mockConnection.query.mockResolvedValue(mockResult);
// 第一次查询
const result1 = await server.handleToolCall('db_query', {
sql: 'SELECT COUNT(*) FROM users',
useCache: true
});
// 第二次查询应该使用缓存
const result2 = await server.handleToolCall('db_query', {
sql: 'SELECT COUNT(*) FROM users',
useCache: true
});
expect(result2.fromCache).toBe(true);
expect(mockConnection.query).toHaveBeenCalledTimes(1);
});
});
});
12.1.2 集成测试
// tests/integration/server.integration.test.ts
import { SecureMCPServer } from '../../src/SecureMCPServer';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
describe('MCP Server Integration Tests', () => {
let server: SecureMCPServer;
let transport: StdioServerTransport;
beforeEach(async () => {
const config = {
server: { name: 'integration-test', version: '1.0.0' },
capabilities: { tools: {}, resources: {}, prompts: {} },
runtime: { timeout: 5000 },
security: {
authentication: { required: false, methods: [], sessionTimeout: 3600000, maxLoginAttempts: 5, lockoutDuration: 900000 },
authorization: { model: 'none' as const, requireExplicitPermissions: false },
inputValidation: { enableStrictValidation: true, maxPayloadSize: 1024000, allowedContentTypes: ['application/json'], sanitizeInputs: true },
rateLimiting: { enabled: false, windowMs: 60000, maxRequests: 100 },
encryption: { algorithm: 'aes-256-gcm', keySize: 32, encryptSensitiveData: false },
audit: { enabled: true, logLevel: 'standard' as const, retentionDays: 30 },
cors: { enabled: false, allowedOrigins: [], allowedMethods: ['POST'], allowedHeaders: ['Content-Type'], credentials: false }
}
};
server = new SecureMCPServer(config);
transport = new StdioServerTransport();
await server.start();
});
afterEach(async () => {
await server.stop();
});
it('should handle complete request/response cycle', async () => {
// 模拟客户端连接
const request = {
jsonrpc: '2.0',
id: 1,
method: 'tools/list',
params: {}
};
const response = await server.handleSecureRequest(request, {
clientIP: '127.0.0.1',
userAgent: 'test-client'
});
expect(response.jsonrpc).toBe('2.0');
expect(response.id).toBe(1);
expect(response.result).toHaveProperty('tools');
});
it('should enforce rate limiting', async () => {
// 启用速率限制
const configWithRateLimit = {
...server['config'],
security: {
...server['config'].security,
rateLimiting: {
enabled: true,
windowMs: 60000,
maxRequests: 2
}
}
};
const limitedServer = new SecureMCPServer(configWithRateLimit);
await limitedServer.start();
try {
const request = {
jsonrpc: '2.0',
id: 1,
method: 'tools/list',
params: {}
};
const context = { clientIP: '127.0.0.1', userAgent: 'test' };
// 前两个请求应该成功
await limitedServer.handleSecureRequest(request, context);
await limitedServer.handleSecureRequest(request, context);
// 第三个请求应该被限制
const response = await limitedServer.handleSecureRequest(request, context);
expect(response.error?.code).toBe(-32006); // RATE_LIMIT_EXCEEDED
} finally {
await limitedServer.stop();
}
});
it('should validate input parameters', async () => {
const invalidRequest = {
jsonrpc: '2.0',
id: 1,
method: 'tools/call',
params: {
name: '', // 无效的工具名
arguments: {}
}
};
const response = await server.handleSecureRequest(invalidRequest, {
clientIP: '127.0.0.1'
});
expect(response.error?.code).toBe(-32602); // INVALID_PARAMS
});
});
// tests/integration/end-to-end.test.ts
describe('End-to-End Tests', () => {
let serverProcess: any;
let client: any;
beforeAll(async () => {
// 启动完整的服务器进程
serverProcess = await startTestServer();
client = createTestClient();
await client.connect();
});
afterAll(async () => {
await client.disconnect();
await stopTestServer(serverProcess);
});
it('should handle complete workflow', async () => {
// 1. 初始化连接
const initResponse = await client.send({
method: 'initialize',
params: {
protocolVersion: '2024-11-05',
clientInfo: { name: 'test-client', version: '1.0.0' },
capabilities: {}
}
});
expect(initResponse.result.serverInfo.name).toBeDefined();
// 2. 获取可用工具
const toolsResponse = await client.send({
method: 'tools/list'
});
expect(toolsResponse.result.tools).toBeInstanceOf(Array);
// 3. 调用工具
if (toolsResponse.result.tools.length > 0) {
const tool = toolsResponse.result.tools[0];
const callResponse = await client.send({
method: 'tools/call',
params: {
name: tool.name,
arguments: {}
}
});
expect(callResponse.result).toBeDefined();
}
// 4. 获取资源
const resourcesResponse = await client.send({
method: 'resources/list'
});
expect(resourcesResponse.result.resources).toBeInstanceOf(Array);
});
});
// 测试辅助函数
async function startTestServer(): Promise<any> {
const { spawn } = require('child_process');
const serverProcess = spawn('node', ['dist/test-server.js'], {
stdio: ['pipe', 'pipe', 'pipe'],
env: { ...process.env, NODE_ENV: 'test' }
});
// 等待服务器启动
await new Promise((resolve, reject) => {
const timeout = setTimeout(() => reject(new Error('Server start timeout')), 10000);
serverProcess.stdout.on('data', (data: Buffer) => {
if (data.toString().includes('Server started')) {
clearTimeout(timeout);
resolve(undefined);
}
});
serverProcess.on('error', reject);
});
return serverProcess;
}
function createTestClient(): any {
return {
connect: jest.fn().mockResolvedValue(undefined),
disconnect: jest.fn().mockResolvedValue(undefined),
send: jest.fn().mockImplementation(async (message) => {
// 模拟客户端发送请求
return { result: { success: true } };
})
};
}
async function stopTestServer(process: any): Promise<void> {
if (process) {
process.kill();
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
12.2 部署策略和容器化
12.2.1 Docker容器化
# Dockerfile
FROM node:18-alpine AS builder
WORKDIR /app
# 复制package文件
COPY package*.json ./
COPY tsconfig*.json ./
# 安装依赖
RUN npm ci --only=production && npm cache clean --force
# 复制源码并构建
COPY src ./src
RUN npm run build
# 生产镜像
FROM node:18-alpine AS production
WORKDIR /app
# 安装dumb-init用于信号处理
RUN apk add --no-cache dumb-init
# 创建非root用户
RUN addgroup -g 1001 -S nodejs && \
adduser -S mcpserver -u 1001
# 复制构建产物和依赖
COPY --from=builder /app/dist ./dist
COPY --from=builder /app/node_modules ./node_modules
COPY --from=builder /app/package*.json ./
# 创建必要目录
RUN mkdir -p /app/logs /app/data && \
chown -R mcpserver:nodejs /app
# 设置环境变量
ENV NODE_ENV=production
ENV LOG_LEVEL=info
ENV PORT=3000
# 暴露端口
EXPOSE 3000
# 切换到非root用户
USER mcpserver
# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD node dist/healthcheck.js
# 启动命令
ENTRYPOINT ["dumb-init", "--"]
CMD ["node", "dist/server.js"]
# docker-compose.yml
version: '3.8'
services:
mcp-server:
build: .
ports:
- "3000:3000"
environment:
- NODE_ENV=production
- LOG_LEVEL=info
- DB_HOST=postgres
- DB_NAME=mcpserver
- DB_USER=postgres
- DB_PASS=password
- REDIS_URL=redis://redis:6379
volumes:
- ./data:/app/data
- ./logs:/app/logs
- ./config:/app/config:ro
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
restart: unless-stopped
networks:
- mcp-network
postgres:
image: postgres:15-alpine
environment:
- POSTGRES_DB=mcpserver
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=password
volumes:
- postgres_data:/var/lib/postgresql/data
- ./init-db:/docker-entrypoint-initdb.d:ro
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 10s
timeout: 5s
retries: 5
networks:
- mcp-network
redis:
image: redis:7-alpine
command: redis-server --appendonly yes
volumes:
- redis_data:/data
ports:
- "6379:6379"
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 3
networks:
- mcp-network
nginx:
image: nginx:alpine
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
depends_on:
- mcp-server
restart: unless-stopped
networks:
- mcp-network
volumes:
postgres_data:
redis_data:
networks:
mcp-network:
driver: bridge
12.2.2 Kubernetes部署
# k8s/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: mcp-server
labels:
name: mcp-server
---
# k8s/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: mcp-server-config
namespace: mcp-server
data:
server.json: |
{
"server": {
"name": "production-mcp-server",
"version": "1.0.0"
},
"capabilities": {
"tools": { "listChanged": true },
"resources": { "subscribe": true, "listChanged": true },
"prompts": { "listChanged": true }
},
"runtime": {
"maxConnections": 100,
"timeout": 30000,
"enableMetrics": true
},
"security": {
"rateLimiting": {
"enabled": true,
"maxRequests": 1000,
"windowMs": 60000
}
}
}
---
# k8s/secret.yaml
apiVersion: v1
kind: Secret
metadata:
name: mcp-server-secrets
namespace: mcp-server
type: Opaque
data:
db-password: cGFzc3dvcmQxMjM= # base64 encoded
jwt-secret: bXlzZWNyZXRrZXk=
api-key-secret: YXBpa2V5c2VjcmV0
---
# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: mcp-server
namespace: mcp-server
labels:
app: mcp-server
spec:
replicas: 3
selector:
matchLabels:
app: mcp-server
template:
metadata:
labels:
app: mcp-server
spec:
containers:
- name: mcp-server
image: mcp-server:latest
ports:
- containerPort: 3000
name: http
env:
- name: NODE_ENV
value: "production"
- name: PORT
value: "3000"
- name: DB_HOST
value: "postgres-service"
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
name: mcp-server-secrets
key: db-password
- name: JWT_SECRET
valueFrom:
secretKeyRef:
name: mcp-server-secrets
key: jwt-secret
volumeMounts:
- name: config
mountPath: /app/config
readOnly: true
- name: data
mountPath: /app/data
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 3000
initialDelaySeconds: 5
periodSeconds: 5
volumes:
- name: config
configMap:
name: mcp-server-config
- name: data
persistentVolumeClaim:
claimName: mcp-server-data
---
# k8s/service.yaml
apiVersion: v1
kind: Service
metadata:
name: mcp-server-service
namespace: mcp-server
labels:
app: mcp-server
spec:
selector:
app: mcp-server
ports:
- port: 80
targetPort: 3000
protocol: TCP
name: http
type: ClusterIP
---
# k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: mcp-server-ingress
namespace: mcp-server
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
cert-manager.io/cluster-issuer: letsencrypt-prod
spec:
tls:
- hosts:
- mcp-server.example.com
secretName: mcp-server-tls
rules:
- host: mcp-server.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: mcp-server-service
port:
number: 80
---
# k8s/pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: mcp-server-data
namespace: mcp-server
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
storageClassName: standard
---
# k8s/hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: mcp-server-hpa
namespace: mcp-server
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: mcp-server
minReplicas: 3
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
12.3 监控和可观测性
12.3.1 监控指标收集
// src/monitoring/MetricsExporter.ts
import { Registry, Counter, Histogram, Gauge } from 'prom-client';
export class PrometheusMetrics {
private registry: Registry;
private httpRequests: Counter;
private httpDuration: Histogram;
private activeConnections: Gauge;
private toolExecutions: Counter;
private errorRate: Counter;
constructor() {
this.registry = new Registry();
// HTTP请求计数器
this.httpRequests = new Counter({
name: 'mcp_http_requests_total',
help: 'Total number of HTTP requests',
labelNames: ['method', 'route', 'status_code'],
registers: [this.registry]
});
// HTTP请求持续时间
this.httpDuration = new Histogram({
name: 'mcp_http_request_duration_seconds',
help: 'HTTP request duration in seconds',
labelNames: ['method', 'route'],
buckets: [0.1, 0.5, 1, 2, 5, 10],
registers: [this.registry]
});
// 活跃连接数
this.activeConnections = new Gauge({
name: 'mcp_active_connections',
help: 'Number of active MCP connections',
registers: [this.registry]
});
// 工具执行计数器
this.toolExecutions = new Counter({
name: 'mcp_tool_executions_total',
help: 'Total number of tool executions',
labelNames: ['tool_name', 'status'],
registers: [this.registry]
});
// 错误率
this.errorRate = new Counter({
name: 'mcp_errors_total',
help: 'Total number of errors',
labelNames: ['type', 'severity'],
registers: [this.registry]
});
// 系统指标
this.collectDefaultMetrics();
}
private collectDefaultMetrics(): void {
const collectDefaultMetrics = require('prom-client').collectDefaultMetrics;
collectDefaultMetrics({ register: this.registry });
}
recordHttpRequest(method: string, route: string, statusCode: number, duration: number): void {
this.httpRequests.inc({
method,
route,
status_code: statusCode.toString()
});
this.httpDuration.observe({ method, route }, duration / 1000);
}
recordToolExecution(toolName: string, success: boolean): void {
this.toolExecutions.inc({
tool_name: toolName,
status: success ? 'success' : 'error'
});
}
recordError(type: string, severity: string): void {
this.errorRate.inc({ type, severity });
}
setActiveConnections(count: number): void {
this.activeConnections.set(count);
}
getMetrics(): Promise<string> {
return this.registry.metrics();
}
getRegistry(): Registry {
return this.registry;
}
}
// src/monitoring/HealthCheck.ts
export class HealthCheckEndpoint {
private dependencies: Map<string, HealthCheckFunction> = new Map();
interface HealthCheckFunction {
(): Promise<{ status: 'healthy' | 'unhealthy'; message?: string; details?: any }>;
}
addDependency(name: string, checkFn: HealthCheckFunction): void {
this.dependencies.set(name, checkFn);
}
async getHealth(): Promise<{
status: 'healthy' | 'degraded' | 'unhealthy';
timestamp: string;
uptime: number;
dependencies: Record<string, any>;
system: {
memory: any;
cpu: any;
eventLoop: any;
};
}> {
const startTime = process.hrtime();
const results: Record<string, any> = {};
let overallStatus: 'healthy' | 'degraded' | 'unhealthy' = 'healthy';
// 检查所有依赖
for (const [name, checkFn] of this.dependencies) {
try {
const result = await Promise.race([
checkFn(),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('Health check timeout')), 5000)
)
]) as any;
results[name] = result;
if (result.status === 'unhealthy') {
overallStatus = 'unhealthy';
} else if (result.status === 'degraded' && overallStatus === 'healthy') {
overallStatus = 'degraded';
}
} catch (error) {
results[name] = {
status: 'unhealthy',
message: error instanceof Error ? error.message : 'Unknown error'
};
overallStatus = 'unhealthy';
}
}
// 系统信息
const memUsage = process.memoryUsage();
const cpuUsage = process.cpuUsage();
const [seconds, nanoseconds] = process.hrtime(startTime);
const eventLoopDelay = (seconds * 1000) + (nanoseconds / 1000000);
return {
status: overallStatus,
timestamp: new Date().toISOString(),
uptime: process.uptime(),
dependencies: results,
system: {
memory: {
heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024),
heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024),
rss: Math.round(memUsage.rss / 1024 / 1024)
},
cpu: {
user: Math.round(cpuUsage.user / 1000),
system: Math.round(cpuUsage.system / 1000)
},
eventLoop: {
delay: Math.round(eventLoopDelay * 100) / 100
}
}
};
}
}
12.3.2 监控配置
# monitoring/prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "alert_rules.yml"
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
scrape_configs:
- job_name: 'mcp-server'
static_configs:
- targets: ['mcp-server-service:80']
metrics_path: /metrics
scrape_interval: 10s
scrape_timeout: 5s
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
- job_name: 'postgres-exporter'
static_configs:
- targets: ['postgres-exporter:9187']
# monitoring/alert_rules.yml
groups:
- name: mcp-server-alerts
rules:
- alert: HighErrorRate
expr: rate(mcp_errors_total[5m]) > 0.1
for: 2m
labels:
severity: warning
annotations:
summary: High error rate detected
description: "Error rate is {{ $value }} errors per second"
- alert: HighResponseTime
expr: histogram_quantile(0.95, rate(mcp_http_request_duration_seconds_bucket[5m])) > 2
for: 5m
labels:
severity: warning
annotations:
summary: High response time
description: "95th percentile response time is {{ $value }}s"
- alert: HighMemoryUsage
expr: process_resident_memory_bytes / 1024 / 1024 > 500
for: 5m
labels:
severity: warning
annotations:
summary: High memory usage
description: "Memory usage is {{ $value }}MB"
- alert: ServiceDown
expr: up{job="mcp-server"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: MCP Server is down
description: "MCP Server has been down for more than 1 minute"
# monitoring/grafana-dashboard.json
{
"dashboard": {
"title": "MCP Server Dashboard",
"panels": [
{
"title": "Request Rate",
"type": "graph",
"targets": [
{
"expr": "rate(mcp_http_requests_total[5m])",
"legendFormat": "{{method}} {{route}}"
}
]
},
{
"title": "Response Time",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(mcp_http_request_duration_seconds_bucket[5m]))",
"legendFormat": "95th percentile"
},
{
"expr": "histogram_quantile(0.50, rate(mcp_http_request_duration_seconds_bucket[5m]))",
"legendFormat": "50th percentile"
}
]
},
{
"title": "Active Connections",
"type": "singlestat",
"targets": [
{
"expr": "mcp_active_connections",
"legendFormat": "Connections"
}
]
},
{
"title": "Memory Usage",
"type": "graph",
"targets": [
{
"expr": "process_resident_memory_bytes / 1024 / 1024",
"legendFormat": "RSS Memory (MB)"
},
{
"expr": "nodejs_heap_size_used_bytes / 1024 / 1024",
"legendFormat": "Heap Used (MB)"
}
]
},
{
"title": "Tool Executions",
"type": "graph",
"targets": [
{
"expr": "rate(mcp_tool_executions_total[5m])",
"legendFormat": "{{tool_name}} ({{status}})"
}
]
},
{
"title": "Error Rate",
"type": "graph",
"targets": [
{
"expr": "rate(mcp_errors_total[5m])",
"legendFormat": "{{type}} ({{severity}})"
}
]
}
]
}
}
12.4 CI/CD流水线
12.4.1 GitHub Actions配置
# .github/workflows/ci.yml
name: CI/CD Pipeline
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
release:
types: [ published ]
env:
NODE_VERSION: '18'
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
test:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:15
env:
POSTGRES_PASSWORD: postgres
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432
redis:
image: redis:7
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 3s
--health-retries 5
ports:
- 6379:6379
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Setup Node.js
uses: actions/setup-node@v3
with:
node-version: ${{ env.NODE_VERSION }}
cache: 'npm'
- name: Install dependencies
run: npm ci
- name: Run linting
run: npm run lint
- name: Run type checking
run: npm run type-check
- name: Run unit tests
run: npm run test:unit
env:
NODE_ENV: test
- name: Run integration tests
run: npm run test:integration
env:
NODE_ENV: test
DB_HOST: localhost
DB_PORT: 5432
REDIS_HOST: localhost
REDIS_PORT: 6379
- name: Generate coverage report
run: npm run coverage
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
- name: Build application
run: npm run build
- name: Run security audit
run: npm audit --audit-level moderate
build:
needs: test
runs-on: ubuntu-latest
if: github.event_name != 'pull_request'
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Log in to Container Registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
deploy:
needs: build
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
environment: production
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Setup kubectl
uses: azure/setup-kubectl@v3
with:
version: 'v1.28.0'
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Update kubeconfig
run: |
aws eks update-kubeconfig --region us-west-2 --name production-cluster
- name: Deploy to Kubernetes
run: |
sed -i "s|IMAGE_TAG|${{ github.sha }}|g" k8s/deployment.yaml
kubectl apply -f k8s/
kubectl rollout status deployment/mcp-server -n mcp-server
- name: Run smoke tests
run: |
npm run test:smoke
env:
TEST_URL: https://mcp-server.example.com
notify:
needs: [test, build, deploy]
runs-on: ubuntu-latest
if: always()
steps:
- name: Notify Slack
uses: 8398a7/action-slack@v3
with:
status: ${{ job.status }}
channel: '#deployments'
webhook_url: ${{ secrets.SLACK_WEBHOOK }}
fields: repo,message,commit,author,action,eventName,ref,workflow
12.4.2 部署脚本
#!/bin/bash
# scripts/deploy.sh
set -euo pipefail
ENVIRONMENT=${1:-staging}
VERSION=${2:-latest}
echo "🚀 Deploying MCP Server to $ENVIRONMENT"
echo "📦 Version: $VERSION"
# 验证环境
if [[ ! "$ENVIRONMENT" =~ ^(staging|production)$ ]]; then
echo "❌ Invalid environment. Use 'staging' or 'production'"
exit 1
fi
# 设置变量
NAMESPACE="mcp-server-$ENVIRONMENT"
IMAGE="ghcr.io/company/mcp-server:$VERSION"
# 检查kubectl连接
echo "🔍 Checking Kubernetes connection..."
kubectl cluster-info || {
echo "❌ Cannot connect to Kubernetes cluster"
exit 1
}
# 创建命名空间
echo "📂 Creating namespace if not exists..."
kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
# 应用配置
echo "⚙️ Applying configuration..."
envsubst < k8s/configmap.yaml | kubectl apply -n "$NAMESPACE" -f -
envsubst < k8s/secret.yaml | kubectl apply -n "$NAMESPACE" -f -
# 更新部署
echo "🔄 Updating deployment..."
sed "s|IMAGE_PLACEHOLDER|$IMAGE|g" k8s/deployment.yaml | \
kubectl apply -n "$NAMESPACE" -f -
# 等待部署完成
echo "⏳ Waiting for deployment to complete..."
kubectl rollout status deployment/mcp-server -n "$NAMESPACE" --timeout=300s
# 验证部署
echo "✅ Verifying deployment..."
READY_PODS=$(kubectl get pods -n "$NAMESPACE" -l app=mcp-server --field-selector=status.phase=Running --no-headers | wc -l)
DESIRED_REPLICAS=$(kubectl get deployment mcp-server -n "$NAMESPACE" -o jsonpath='{.spec.replicas}')
if [[ "$READY_PODS" -eq "$DESIRED_REPLICAS" ]]; then
echo "✅ Deployment successful! $READY_PODS/$DESIRED_REPLICAS pods ready"
else
echo "❌ Deployment failed! Only $READY_PODS/$DESIRED_REPLICAS pods ready"
exit 1
fi
# 运行健康检查
echo "🏥 Running health check..."
SERVICE_URL=$(kubectl get service mcp-server-service -n "$NAMESPACE" -o jsonpath='{.status.loadBalancer.ingress[0].hostname}')
if curl -f -s "$SERVICE_URL/health" > /dev/null; then
echo "✅ Health check passed"
else
echo "❌ Health check failed"
exit 1
fi
echo "🎉 Deployment to $ENVIRONMENT completed successfully!"
# 发送通知
if [[ "$ENVIRONMENT" == "production" ]]; then
curl -X POST "$SLACK_WEBHOOK" \
-H 'Content-type: application/json' \
--data "{\"text\":\"🚀 MCP Server v$VERSION deployed to production successfully!\"}"
fi
#!/bin/bash
# scripts/rollback.sh
set -euo pipefail
ENVIRONMENT=${1:-staging}
REVISION=${2:-}
echo "🔄 Rolling back MCP Server in $ENVIRONMENT"
NAMESPACE="mcp-server-$ENVIRONMENT"
# 获取当前版本
CURRENT_REVISION=$(kubectl rollout history deployment/mcp-server -n "$NAMESPACE" --revision=0 | tail -n1 | awk '{print $1}')
echo "📍 Current revision: $CURRENT_REVISION"
# 确定回滚版本
if [[ -z "$REVISION" ]]; then
REVISION=$((CURRENT_REVISION - 1))
echo "🔙 Rolling back to previous revision: $REVISION"
else
echo "🔙 Rolling back to specified revision: $REVISION"
fi
# 执行回滚
echo "⏪ Executing rollback..."
kubectl rollout undo deployment/mcp-server -n "$NAMESPACE" --to-revision="$REVISION"
# 等待回滚完成
echo "⏳ Waiting for rollback to complete..."
kubectl rollout status deployment/mcp-server -n "$NAMESPACE" --timeout=300s
# 验证回滚
echo "✅ Verifying rollback..."
NEW_REVISION=$(kubectl rollout history deployment/mcp-server -n "$NAMESPACE" --revision=0 | tail -n1 | awk '{print $1}')
echo "📍 New revision: $NEW_REVISION"
echo "🎉 Rollback completed successfully!"
# 发送通知
curl -X POST "$SLACK_WEBHOOK" \
-H 'Content-type: application/json' \
--data "{\"text\":\"⚠️ MCP Server rolled back to revision $REVISION in $ENVIRONMENT\"}"
本章总结
第12章全面覆盖了MCP Server的测试、部署和运维:
核心知识点
- 测试体系:建立了完整的单元测试、集成测试和端到端测试框架
- 容器化部署:实现了Docker容器化和Kubernetes编排部署
- 监控可观测性:构建了Prometheus指标收集和Grafana可视化监控
- CI/CD流水线:建立了自动化的持续集成和部署流程
- 运维自动化:实现了部署、回滚和故障恢复的自动化脚本
实践要点
- 建立多层次的测试策略确保代码质量
- 使用容器化技术实现环境一致性
- 构建全面的监控和告警体系
- 实现自动化的部署和回滚机制
- 建立完善的日志和调试体系
通过本章学习,掌握了MCP Server在生产环境中的完整部署和运维体系,确保服务的高可用性和稳定性。