Chapter 12: Testing, Deployment, and Operations

Haiyue
38min

Chapter 12: Testing, Deployment, and Operations

Learning Objectives

  1. Establish a complete testing system (unit tests, integration tests)
  2. Master MCP Server packaging and deployment strategies
  3. Learn containerized deployment and orchestration
  4. Implement monitoring, alerting, and fault recovery
  5. Master version management and continuous integration/deployment

12.1 Testing System Development

12.1.1 Unit Testing Framework

// tests/unit/tools.test.ts
import { describe, it, expect, beforeEach, afterEach, jest } from '@jest/globals';
import { FileSystemMCPServer } from '../../src/projects/filesystem/FileSystemServer';

describe('FileSystem Tools', () => {
  let server: FileSystemMCPServer;
  let mockFs: any;

  beforeEach(async () => {
    // Mock file system
    mockFs = {
      readFile: jest.fn(),
      writeFile: jest.fn(),
      readdir: jest.fn(),
      stat: jest.fn(),
    };

    // Create test server instance
    const config = {
      server: { name: 'test-server', version: '1.0.0' },
      capabilities: { tools: {} },
      runtime: {},
      security: {},
    };

    server = new FileSystemMCPServer(config, ['/tmp/test']);
    await server.start();
  });

  afterEach(async () => {
    await server.stop();
    jest.clearAllMocks();
  });

  describe('fs_read_file', () => {
    it('should read file successfully', async () => {
      // Prepare test data
      const testContent = 'Hello, World!';
      mockFs.readFile.mockResolvedValue(testContent);
      mockFs.stat.mockResolvedValue({
        size: testContent.length,
        mtime: new Date(),
      });

      // Execute test
      const result = await server.handleToolCall('fs_read_file', {
        path: '/tmp/test/example.txt'
      });

      // Verify results
      expect(result).toHaveProperty('content', testContent);
      expect(result).toHaveProperty('metadata');
      expect(mockFs.readFile).toHaveBeenCalledWith('/tmp/test/example.txt', 'utf8');
    });

    it('should handle file not found error', async () => {
      mockFs.readFile.mockRejectedValue(new Error('ENOENT: no such file or directory'));

      await expect(
        server.handleToolCall('fs_read_file', {
          path: '/tmp/test/nonexistent.txt'
        })
      ).rejects.toThrow('Failed to read file');
    });

    it('should validate file path permissions', async () => {
      await expect(
        server.handleToolCall('fs_read_file', {
          path: '/etc/passwd'
        })
      ).rejects.toThrow('Access denied');
    });
  });

  describe('fs_search_files', () => {
    it('should search files by pattern', async () => {
      const mockFiles = [
        '/tmp/test/file1.js',
        '/tmp/test/file2.ts',
        '/tmp/test/subdir/file3.js'
      ];

      jest.doMock('glob', () => ({
        sync: jest.fn().mockReturnValue(mockFiles)
      }));

      const result = await server.handleToolCall('fs_search_files', {
        path: '/tmp/test',
        pattern: '**/*.js'
      });

      expect(result.results).toHaveLength(2);
      expect(result.results[0].path).toMatch(/\.js$/);
    });
  });
});

// tests/unit/database.test.ts
describe('Database Tools', () => {
  let server: DatabaseMCPServer;
  let mockConnection: any;

  beforeEach(async () => {
    mockConnection = {
      query: jest.fn(),
      execute: jest.fn(),
      connect: jest.fn().mockResolvedValue(mockConnection),
      end: jest.fn(),
    };

    const dbConfig = {
      type: 'postgresql' as const,
      connection: {
        host: 'localhost',
        database: 'test',
        username: 'test',
        password: 'test'
      }
    };

    server = new DatabaseMCPServer({
      server: { name: 'db-server', version: '1.0.0' },
      capabilities: { tools: {} },
      runtime: {},
      security: {},
    }, dbConfig);

    // Inject mock connection
    (server as any).connection = mockConnection;
  });

  describe('db_query', () => {
    it('should execute SELECT query successfully', async () => {
      const mockResult = {
        rows: [{ id: 1, name: 'Test User' }],
        rowCount: 1,
        fields: [{ name: 'id' }, { name: 'name' }]
      };

      mockConnection.query.mockResolvedValue(mockResult);

      const result = await server.handleToolCall('db_query', {
        sql: 'SELECT * FROM users WHERE id = $1',
        params: ['1']
      });

      expect(result.rows).toEqual(mockResult.rows);
      expect(result.rowCount).toBe(1);
      expect(mockConnection.query).toHaveBeenCalledWith(
        'SELECT * FROM users WHERE id = $1 LIMIT 100',
        ['1']
      );
    });

    it('should handle SQL syntax errors', async () => {
      mockConnection.query.mockRejectedValue(
        new Error('syntax error at or near "SELEC"')
      );

      await expect(
        server.handleToolCall('db_query', {
          sql: 'SELEC * FROM users'
        })
      ).rejects.toThrow('Query execution failed');
    });

    it('should use query cache', async () => {
      const mockResult = { rows: [{ count: 5 }], rowCount: 1 };
      mockConnection.query.mockResolvedValue(mockResult);

      // First query
      const result1 = await server.handleToolCall('db_query', {
        sql: 'SELECT COUNT(*) FROM users',
        useCache: true
      });

      // Second query should use cache
      const result2 = await server.handleToolCall('db_query', {
        sql: 'SELECT COUNT(*) FROM users',
        useCache: true
      });

      expect(result2.fromCache).toBe(true);
      expect(mockConnection.query).toHaveBeenCalledTimes(1);
    });
  });
});

12.1.2 Integration Testing

// tests/integration/server.integration.test.ts
import { SecureMCPServer } from '../../src/SecureMCPServer';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';

describe('MCP Server Integration Tests', () => {
  let server: SecureMCPServer;
  let transport: StdioServerTransport;

  beforeEach(async () => {
    const config = {
      server: { name: 'integration-test', version: '1.0.0' },
      capabilities: { tools: {}, resources: {}, prompts: {} },
      runtime: { timeout: 5000 },
      security: {
        authentication: { required: false, methods: [], sessionTimeout: 3600000, maxLoginAttempts: 5, lockoutDuration: 900000 },
        authorization: { model: 'none' as const, requireExplicitPermissions: false },
        inputValidation: { enableStrictValidation: true, maxPayloadSize: 1024000, allowedContentTypes: ['application/json'], sanitizeInputs: true },
        rateLimiting: { enabled: false, windowMs: 60000, maxRequests: 100 },
        encryption: { algorithm: 'aes-256-gcm', keySize: 32, encryptSensitiveData: false },
        audit: { enabled: true, logLevel: 'standard' as const, retentionDays: 30 },
        cors: { enabled: false, allowedOrigins: [], allowedMethods: ['POST'], allowedHeaders: ['Content-Type'], credentials: false }
      }
    };

    server = new SecureMCPServer(config);
    transport = new StdioServerTransport();

    await server.start();
  });

  afterEach(async () => {
    await server.stop();
  });

  it('should handle complete request/response cycle', async () => {
    // Simulate client connection
    const request = {
      jsonrpc: '2.0',
      id: 1,
      method: 'tools/list',
      params: {}
    };

    const response = await server.handleSecureRequest(request, {
      clientIP: '127.0.0.1',
      userAgent: 'test-client'
    });

    expect(response.jsonrpc).toBe('2.0');
    expect(response.id).toBe(1);
    expect(response.result).toHaveProperty('tools');
  });

  it('should enforce rate limiting', async () => {
    // Enable rate limiting
    const configWithRateLimit = {
      ...server['config'],
      security: {
        ...server['config'].security,
        rateLimiting: {
          enabled: true,
          windowMs: 60000,
          maxRequests: 2
        }
      }
    };

    const limitedServer = new SecureMCPServer(configWithRateLimit);
    await limitedServer.start();

    try {
      const request = {
        jsonrpc: '2.0',
        id: 1,
        method: 'tools/list',
        params: {}
      };

      const context = { clientIP: '127.0.0.1', userAgent: 'test' };

      // First two requests should succeed
      await limitedServer.handleSecureRequest(request, context);
      await limitedServer.handleSecureRequest(request, context);

      // Third request should be rate limited
      const response = await limitedServer.handleSecureRequest(request, context);
      expect(response.error?.code).toBe(-32006); // RATE_LIMIT_EXCEEDED
    } finally {
      await limitedServer.stop();
    }
  });

  it('should validate input parameters', async () => {
    const invalidRequest = {
      jsonrpc: '2.0',
      id: 1,
      method: 'tools/call',
      params: {
        name: '', // Invalid tool name
        arguments: {}
      }
    };

    const response = await server.handleSecureRequest(invalidRequest, {
      clientIP: '127.0.0.1'
    });

    expect(response.error?.code).toBe(-32602); // INVALID_PARAMS
  });
});

// tests/integration/end-to-end.test.ts
describe('End-to-End Tests', () => {
  let serverProcess: any;
  let client: any;

  beforeAll(async () => {
    // Start complete server process
    serverProcess = await startTestServer();
    client = createTestClient();
    await client.connect();
  });

  afterAll(async () => {
    await client.disconnect();
    await stopTestServer(serverProcess);
  });

  it('should handle complete workflow', async () => {
    // 1. Initialize connection
    const initResponse = await client.send({
      method: 'initialize',
      params: {
        protocolVersion: '2024-11-05',
        clientInfo: { name: 'test-client', version: '1.0.0' },
        capabilities: {}
      }
    });

    expect(initResponse.result.serverInfo.name).toBeDefined();

    // 2. Get available tools
    const toolsResponse = await client.send({
      method: 'tools/list'
    });

    expect(toolsResponse.result.tools).toBeInstanceOf(Array);

    // 3. Call tool
    if (toolsResponse.result.tools.length > 0) {
      const tool = toolsResponse.result.tools[0];
      const callResponse = await client.send({
        method: 'tools/call',
        params: {
          name: tool.name,
          arguments: {}
        }
      });

      expect(callResponse.result).toBeDefined();
    }

    // 4. Get resources
    const resourcesResponse = await client.send({
      method: 'resources/list'
    });

    expect(resourcesResponse.result.resources).toBeInstanceOf(Array);
  });
});

// Test helper functions
async function startTestServer(): Promise<any> {
  const { spawn } = require('child_process');

  const serverProcess = spawn('node', ['dist/test-server.js'], {
    stdio: ['pipe', 'pipe', 'pipe'],
    env: { ...process.env, NODE_ENV: 'test' }
  });

  // Wait for server startup
  await new Promise((resolve, reject) => {
    const timeout = setTimeout(() => reject(new Error('Server start timeout')), 10000);

    serverProcess.stdout.on('data', (data: Buffer) => {
      if (data.toString().includes('Server started')) {
        clearTimeout(timeout);
        resolve(undefined);
      }
    });

    serverProcess.on('error', reject);
  });

  return serverProcess;
}

function createTestClient(): any {
  return {
    connect: jest.fn().mockResolvedValue(undefined),
    disconnect: jest.fn().mockResolvedValue(undefined),
    send: jest.fn().mockImplementation(async (message) => {
      // Simulate client sending request
      return { result: { success: true } };
    })
  };
}

async function stopTestServer(process: any): Promise<void> {
  if (process) {
    process.kill();
    await new Promise(resolve => setTimeout(resolve, 1000));
  }
}

12.2 Deployment Strategy and Containerization

12.2.1 Docker Containerization

# Dockerfile
FROM node:18-alpine AS builder

WORKDIR /app

# Copy package files
COPY package*.json ./
COPY tsconfig*.json ./

# Install dependencies
RUN npm ci --only=production && npm cache clean --force

# Copy source code and build
COPY src ./src
RUN npm run build

# Production image
FROM node:18-alpine AS production

WORKDIR /app

# Install dumb-init for signal handling
RUN apk add --no-cache dumb-init

# Create non-root user
RUN addgroup -g 1001 -S nodejs && \
    adduser -S mcpserver -u 1001

# Copy build artifacts and dependencies
COPY --from=builder /app/dist ./dist
COPY --from=builder /app/node_modules ./node_modules
COPY --from=builder /app/package*.json ./

# Create necessary directories
RUN mkdir -p /app/logs /app/data && \
    chown -R mcpserver:nodejs /app

# Set environment variables
ENV NODE_ENV=production
ENV LOG_LEVEL=info
ENV PORT=3000

# Expose port
EXPOSE 3000

# Switch to non-root user
USER mcpserver

# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
  CMD node dist/healthcheck.js

# Startup command
ENTRYPOINT ["dumb-init", "--"]
CMD ["node", "dist/server.js"]
# docker-compose.yml
version: '3.8'

services:
  mcp-server:
    build: .
    ports:
      - "3000:3000"
    environment:
      - NODE_ENV=production
      - LOG_LEVEL=info
      - DB_HOST=postgres
      - DB_NAME=mcpserver
      - DB_USER=postgres
      - DB_PASS=password
      - REDIS_URL=redis://redis:6379
    volumes:
      - ./data:/app/data
      - ./logs:/app/logs
      - ./config:/app/config:ro
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    restart: unless-stopped
    networks:
      - mcp-network

  postgres:
    image: postgres:15-alpine
    environment:
      - POSTGRES_DB=mcpserver
      - POSTGRES_USER=postgres
      - POSTGRES_PASSWORD=password
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - ./init-db:/docker-entrypoint-initdb.d:ro
    ports:
      - "5432:5432"
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U postgres"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - mcp-network

  redis:
    image: redis:7-alpine
    command: redis-server --appendonly yes
    volumes:
      - redis_data:/data
    ports:
      - "6379:6379"
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 3s
      retries: 3
    networks:
      - mcp-network

  nginx:
    image: nginx:alpine
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
      - ./nginx/ssl:/etc/nginx/ssl:ro
    depends_on:
      - mcp-server
    restart: unless-stopped
    networks:
      - mcp-network

volumes:
  postgres_data:
  redis_data:

networks:
  mcp-network:
    driver: bridge

12.2.2 Kubernetes Deployment

# k8s/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
  name: mcp-server
  labels:
    name: mcp-server

---
# k8s/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: mcp-server-config
  namespace: mcp-server
data:
  server.json: |
    {
      "server": {
        "name": "production-mcp-server",
        "version": "1.0.0"
      },
      "capabilities": {
        "tools": { "listChanged": true },
        "resources": { "subscribe": true, "listChanged": true },
        "prompts": { "listChanged": true }
      },
      "runtime": {
        "maxConnections": 100,
        "timeout": 30000,
        "enableMetrics": true
      },
      "security": {
        "rateLimiting": {
          "enabled": true,
          "maxRequests": 1000,
          "windowMs": 60000
        }
      }
    }

---
# k8s/secret.yaml
apiVersion: v1
kind: Secret
metadata:
  name: mcp-server-secrets
  namespace: mcp-server
type: Opaque
data:
  db-password: cGFzc3dvcmQxMjM=  # base64 encoded
  jwt-secret: bXlzZWNyZXRrZXk=
  api-key-secret: YXBpa2V5c2VjcmV0

---
# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: mcp-server
  namespace: mcp-server
  labels:
    app: mcp-server
spec:
  replicas: 3
  selector:
    matchLabels:
      app: mcp-server
  template:
    metadata:
      labels:
        app: mcp-server
    spec:
      containers:
      - name: mcp-server
        image: mcp-server:latest
        ports:
        - containerPort: 3000
          name: http
        env:
        - name: NODE_ENV
          value: "production"
        - name: PORT
          value: "3000"
        - name: DB_HOST
          value: "postgres-service"
        - name: DB_PASSWORD
          valueFrom:
            secretKeyRef:
              name: mcp-server-secrets
              key: db-password
        - name: JWT_SECRET
          valueFrom:
            secretKeyRef:
              name: mcp-server-secrets
              key: jwt-secret
        volumeMounts:
        - name: config
          mountPath: /app/config
          readOnly: true
        - name: data
          mountPath: /app/data
        resources:
          requests:
            memory: "256Mi"
            cpu: "100m"
          limits:
            memory: "512Mi"
            cpu: "500m"
        livenessProbe:
          httpGet:
            path: /health
            port: 3000
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /health
            port: 3000
          initialDelaySeconds: 5
          periodSeconds: 5
      volumes:
      - name: config
        configMap:
          name: mcp-server-config
      - name: data
        persistentVolumeClaim:
          claimName: mcp-server-data

---
# k8s/service.yaml
apiVersion: v1
kind: Service
metadata:
  name: mcp-server-service
  namespace: mcp-server
  labels:
    app: mcp-server
spec:
  selector:
    app: mcp-server
  ports:
  - port: 80
    targetPort: 3000
    protocol: TCP
    name: http
  type: ClusterIP

---
# k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: mcp-server-ingress
  namespace: mcp-server
  annotations:
    nginx.ingress.kubernetes.io/rewrite-target: /
    cert-manager.io/cluster-issuer: letsencrypt-prod
spec:
  tls:
  - hosts:
    - mcp-server.example.com
    secretName: mcp-server-tls
  rules:
  - host: mcp-server.example.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: mcp-server-service
            port:
              number: 80

---
# k8s/pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: mcp-server-data
  namespace: mcp-server
spec:
  accessModes:
  - ReadWriteOnce
  resources:
    requests:
      storage: 10Gi
  storageClassName: standard

---
# k8s/hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: mcp-server-hpa
  namespace: mcp-server
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: mcp-server
  minReplicas: 3
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80

12.3 Monitoring and Observability

12.3.1 Metrics Collection

// src/monitoring/MetricsExporter.ts
import { Registry, Counter, Histogram, Gauge } from 'prom-client';

export class PrometheusMetrics {
  private registry: Registry;
  private httpRequests: Counter;
  private httpDuration: Histogram;
  private activeConnections: Gauge;
  private toolExecutions: Counter;
  private errorRate: Counter;

  constructor() {
    this.registry = new Registry();

    // HTTP request counter
    this.httpRequests = new Counter({
      name: 'mcp_http_requests_total',
      help: 'Total number of HTTP requests',
      labelNames: ['method', 'route', 'status_code'],
      registers: [this.registry]
    });

    // HTTP request duration
    this.httpDuration = new Histogram({
      name: 'mcp_http_request_duration_seconds',
      help: 'HTTP request duration in seconds',
      labelNames: ['method', 'route'],
      buckets: [0.1, 0.5, 1, 2, 5, 10],
      registers: [this.registry]
    });

    // Active connections
    this.activeConnections = new Gauge({
      name: 'mcp_active_connections',
      help: 'Number of active MCP connections',
      registers: [this.registry]
    });

    // Tool execution counter
    this.toolExecutions = new Counter({
      name: 'mcp_tool_executions_total',
      help: 'Total number of tool executions',
      labelNames: ['tool_name', 'status'],
      registers: [this.registry]
    });

    // Error rate
    this.errorRate = new Counter({
      name: 'mcp_errors_total',
      help: 'Total number of errors',
      labelNames: ['type', 'severity'],
      registers: [this.registry]
    });

    // System metrics
    this.collectDefaultMetrics();
  }

  private collectDefaultMetrics(): void {
    const collectDefaultMetrics = require('prom-client').collectDefaultMetrics;
    collectDefaultMetrics({ register: this.registry });
  }

  recordHttpRequest(method: string, route: string, statusCode: number, duration: number): void {
    this.httpRequests.inc({
      method,
      route,
      status_code: statusCode.toString()
    });

    this.httpDuration.observe({ method, route }, duration / 1000);
  }

  recordToolExecution(toolName: string, success: boolean): void {
    this.toolExecutions.inc({
      tool_name: toolName,
      status: success ? 'success' : 'error'
    });
  }

  recordError(type: string, severity: string): void {
    this.errorRate.inc({ type, severity });
  }

  setActiveConnections(count: number): void {
    this.activeConnections.set(count);
  }

  getMetrics(): Promise<string> {
    return this.registry.metrics();
  }

  getRegistry(): Registry {
    return this.registry;
  }
}

// src/monitoring/HealthCheck.ts
export class HealthCheckEndpoint {
  private dependencies: Map<string, HealthCheckFunction> = new Map();

  interface HealthCheckFunction {
    (): Promise<{ status: 'healthy' | 'unhealthy'; message?: string; details?: any }>;
  }

  addDependency(name: string, checkFn: HealthCheckFunction): void {
    this.dependencies.set(name, checkFn);
  }

  async getHealth(): Promise<{
    status: 'healthy' | 'degraded' | 'unhealthy';
    timestamp: string;
    uptime: number;
    dependencies: Record<string, any>;
    system: {
      memory: any;
      cpu: any;
      eventLoop: any;
    };
  }> {
    const startTime = process.hrtime();
    const results: Record<string, any> = {};
    let overallStatus: 'healthy' | 'degraded' | 'unhealthy' = 'healthy';

    // Check all dependencies
    for (const [name, checkFn] of this.dependencies) {
      try {
        const result = await Promise.race([
          checkFn(),
          new Promise((_, reject) =>
            setTimeout(() => reject(new Error('Health check timeout')), 5000)
          )
        ]) as any;

        results[name] = result;

        if (result.status === 'unhealthy') {
          overallStatus = 'unhealthy';
        } else if (result.status === 'degraded' && overallStatus === 'healthy') {
          overallStatus = 'degraded';
        }
      } catch (error) {
        results[name] = {
          status: 'unhealthy',
          message: error instanceof Error ? error.message : 'Unknown error'
        };
        overallStatus = 'unhealthy';
      }
    }

    // System information
    const memUsage = process.memoryUsage();
    const cpuUsage = process.cpuUsage();

    const [seconds, nanoseconds] = process.hrtime(startTime);
    const eventLoopDelay = (seconds * 1000) + (nanoseconds / 1000000);

    return {
      status: overallStatus,
      timestamp: new Date().toISOString(),
      uptime: process.uptime(),
      dependencies: results,
      system: {
        memory: {
          heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024),
          heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024),
          rss: Math.round(memUsage.rss / 1024 / 1024)
        },
        cpu: {
          user: Math.round(cpuUsage.user / 1000),
          system: Math.round(cpuUsage.system / 1000)
        },
        eventLoop: {
          delay: Math.round(eventLoopDelay * 100) / 100
        }
      }
    };
  }
}

12.3.2 Monitoring Configuration

# monitoring/prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

rule_files:
  - "alert_rules.yml"

alerting:
  alertmanagers:
    - static_configs:
        - targets:
          - alertmanager:9093

scrape_configs:
  - job_name: 'mcp-server'
    static_configs:
      - targets: ['mcp-server-service:80']
    metrics_path: /metrics
    scrape_interval: 10s
    scrape_timeout: 5s

  - job_name: 'node-exporter'
    static_configs:
      - targets: ['node-exporter:9100']

  - job_name: 'postgres-exporter'
    static_configs:
      - targets: ['postgres-exporter:9187']

# monitoring/alert_rules.yml
groups:
  - name: mcp-server-alerts
    rules:
      - alert: HighErrorRate
        expr: rate(mcp_errors_total[5m]) > 0.1
        for: 2m
        labels:
          severity: warning
        annotations:
          summary: High error rate detected
          description: "Error rate is {{ $value }} errors per second"

      - alert: HighResponseTime
        expr: histogram_quantile(0.95, rate(mcp_http_request_duration_seconds_bucket[5m])) > 2
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: High response time
          description: "95th percentile response time is {{ $value }}s"

      - alert: HighMemoryUsage
        expr: process_resident_memory_bytes / 1024 / 1024 > 500
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: High memory usage
          description: "Memory usage is {{ $value }}MB"

      - alert: ServiceDown
        expr: up{job="mcp-server"} == 0
        for: 1m
        labels:
          severity: critical
        annotations:
          summary: MCP Server is down
          description: "MCP Server has been down for more than 1 minute"

# monitoring/grafana-dashboard.json
{
  "dashboard": {
    "title": "MCP Server Dashboard",
    "panels": [
      {
        "title": "Request Rate",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(mcp_http_requests_total[5m])",
            "legendFormat": "{{method}} {{route}}"
          }
        ]
      },
      {
        "title": "Response Time",
        "type": "graph",
        "targets": [
          {
            "expr": "histogram_quantile(0.95, rate(mcp_http_request_duration_seconds_bucket[5m]))",
            "legendFormat": "95th percentile"
          },
          {
            "expr": "histogram_quantile(0.50, rate(mcp_http_request_duration_seconds_bucket[5m]))",
            "legendFormat": "50th percentile"
          }
        ]
      },
      {
        "title": "Active Connections",
        "type": "singlestat",
        "targets": [
          {
            "expr": "mcp_active_connections",
            "legendFormat": "Connections"
          }
        ]
      },
      {
        "title": "Memory Usage",
        "type": "graph",
        "targets": [
          {
            "expr": "process_resident_memory_bytes / 1024 / 1024",
            "legendFormat": "RSS Memory (MB)"
          },
          {
            "expr": "nodejs_heap_size_used_bytes / 1024 / 1024",
            "legendFormat": "Heap Used (MB)"
          }
        ]
      },
      {
        "title": "Tool Executions",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(mcp_tool_executions_total[5m])",
            "legendFormat": "{{tool_name}} ({{status}})"
          }
        ]
      },
      {
        "title": "Error Rate",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(mcp_errors_total[5m])",
            "legendFormat": "{{type}} ({{severity}})"
          }
        ]
      }
    ]
  }
}

12.4 CI/CD Pipeline

12.4.1 GitHub Actions Configuration

# .github/workflows/ci.yml
name: CI/CD Pipeline

on:
  push:
    branches: [ main, develop ]
  pull_request:
    branches: [ main ]
  release:
    types: [ published ]

env:
  NODE_VERSION: '18'
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}

jobs:
  test:
    runs-on: ubuntu-latest

    services:
      postgres:
        image: postgres:15
        env:
          POSTGRES_PASSWORD: postgres
        options: >-
          --health-cmd pg_isready
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
        ports:
          - 5432:5432

      redis:
        image: redis:7
        options: >-
          --health-cmd "redis-cli ping"
          --health-interval 10s
          --health-timeout 3s
          --health-retries 5
        ports:
          - 6379:6379

    steps:
    - name: Checkout code
      uses: actions/checkout@v3

    - name: Setup Node.js
      uses: actions/setup-node@v3
      with:
        node-version: ${{ env.NODE_VERSION }}
        cache: 'npm'

    - name: Install dependencies
      run: npm ci

    - name: Run linting
      run: npm run lint

    - name: Run type checking
      run: npm run type-check

    - name: Run unit tests
      run: npm run test:unit
      env:
        NODE_ENV: test

    - name: Run integration tests
      run: npm run test:integration
      env:
        NODE_ENV: test
        DB_HOST: localhost
        DB_PORT: 5432
        REDIS_HOST: localhost
        REDIS_PORT: 6379

    - name: Generate coverage report
      run: npm run coverage

    - name: Upload coverage to Codecov
      uses: codecov/codecov-action@v3
      with:
        token: ${{ secrets.CODECOV_TOKEN }}

    - name: Build application
      run: npm run build

    - name: Run security audit
      run: npm audit --audit-level moderate

  build:
    needs: test
    runs-on: ubuntu-latest
    if: github.event_name != 'pull_request'

    steps:
    - name: Checkout code
      uses: actions/checkout@v3

    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v2

    - name: Log in to Container Registry
      uses: docker/login-action@v2
      with:
        registry: ${{ env.REGISTRY }}
        username: ${{ github.actor }}
        password: ${{ secrets.GITHUB_TOKEN }}

    - name: Extract metadata
      id: meta
      uses: docker/metadata-action@v4
      with:
        images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
        tags: |
          type=ref,event=branch
          type=ref,event=pr
          type=semver,pattern={{version}}
          type=semver,pattern={{major}}.{{minor}}

    - name: Build and push Docker image
      uses: docker/build-push-action@v4
      with:
        context: .
        platforms: linux/amd64,linux/arm64
        push: true
        tags: ${{ steps.meta.outputs.tags }}
        labels: ${{ steps.meta.outputs.labels }}
        cache-from: type=gha
        cache-to: type=gha,mode=max

  deploy:
    needs: build
    runs-on: ubuntu-latest
    if: github.ref == 'refs/heads/main'
    environment: production

    steps:
    - name: Checkout code
      uses: actions/checkout@v3

    - name: Setup kubectl
      uses: azure/setup-kubectl@v3
      with:
        version: 'v1.28.0'

    - name: Configure AWS credentials
      uses: aws-actions/configure-aws-credentials@v2
      with:
        aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
        aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
        aws-region: us-west-2

    - name: Update kubeconfig
      run: |
        aws eks update-kubeconfig --region us-west-2 --name production-cluster

    - name: Deploy to Kubernetes
      run: |
        sed -i "s|IMAGE_TAG|${{ github.sha }}|g" k8s/deployment.yaml
        kubectl apply -f k8s/
        kubectl rollout status deployment/mcp-server -n mcp-server

    - name: Run smoke tests
      run: |
        npm run test:smoke
      env:
        TEST_URL: https://mcp-server.example.com

  notify:
    needs: [test, build, deploy]
    runs-on: ubuntu-latest
    if: always()

    steps:
    - name: Notify Slack
      uses: 8398a7/action-slack@v3
      with:
        status: ${{ job.status }}
        channel: '#deployments'
        webhook_url: ${{ secrets.SLACK_WEBHOOK }}
        fields: repo,message,commit,author,action,eventName,ref,workflow

12.4.2 Deployment Scripts

#!/bin/bash
# scripts/deploy.sh

set -euo pipefail

ENVIRONMENT=${1:-staging}
VERSION=${2:-latest}

echo "🚀 Deploying MCP Server to $ENVIRONMENT"
echo "📦 Version: $VERSION"

# Validate environment
if [[ ! "$ENVIRONMENT" =~ ^(staging|production)$ ]]; then
  echo "❌ Invalid environment. Use 'staging' or 'production'"
  exit 1
fi

# Set variables
NAMESPACE="mcp-server-$ENVIRONMENT"
IMAGE="ghcr.io/company/mcp-server:$VERSION"

# Check kubectl connection
echo "🔍 Checking Kubernetes connection..."
kubectl cluster-info || {
  echo "❌ Cannot connect to Kubernetes cluster"
  exit 1
}

# Create namespace
echo "📂 Creating namespace if not exists..."
kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -

# Apply configuration
echo "⚙️ Applying configuration..."
envsubst < k8s/configmap.yaml | kubectl apply -n "$NAMESPACE" -f -
envsubst < k8s/secret.yaml | kubectl apply -n "$NAMESPACE" -f -

# Update deployment
echo "🔄 Updating deployment..."
sed "s|IMAGE_PLACEHOLDER|$IMAGE|g" k8s/deployment.yaml | \
kubectl apply -n "$NAMESPACE" -f -

# Wait for deployment to complete
echo "⏳ Waiting for deployment to complete..."
kubectl rollout status deployment/mcp-server -n "$NAMESPACE" --timeout=300s

# Verify deployment
echo "✅ Verifying deployment..."
READY_PODS=$(kubectl get pods -n "$NAMESPACE" -l app=mcp-server --field-selector=status.phase=Running --no-headers | wc -l)
DESIRED_REPLICAS=$(kubectl get deployment mcp-server -n "$NAMESPACE" -o jsonpath='{.spec.replicas}')

if [[ "$READY_PODS" -eq "$DESIRED_REPLICAS" ]]; then
  echo "✅ Deployment successful! $READY_PODS/$DESIRED_REPLICAS pods ready"
else
  echo "❌ Deployment failed! Only $READY_PODS/$DESIRED_REPLICAS pods ready"
  exit 1
fi

# Run health check
echo "🏥 Running health check..."
SERVICE_URL=$(kubectl get service mcp-server-service -n "$NAMESPACE" -o jsonpath='{.status.loadBalancer.ingress[0].hostname}')
if curl -f -s "$SERVICE_URL/health" > /dev/null; then
  echo "✅ Health check passed"
else
  echo "❌ Health check failed"
  exit 1
fi

echo "🎉 Deployment to $ENVIRONMENT completed successfully!"

# Send notification
if [[ "$ENVIRONMENT" == "production" ]]; then
  curl -X POST "$SLACK_WEBHOOK" \
    -H 'Content-type: application/json' \
    --data "{\"text\":\"🚀 MCP Server v$VERSION deployed to production successfully!\"}"
fi
#!/bin/bash
# scripts/rollback.sh

set -euo pipefail

ENVIRONMENT=${1:-staging}
REVISION=${2:-}

echo "🔄 Rolling back MCP Server in $ENVIRONMENT"

NAMESPACE="mcp-server-$ENVIRONMENT"

# Get current version
CURRENT_REVISION=$(kubectl rollout history deployment/mcp-server -n "$NAMESPACE" --revision=0 | tail -n1 | awk '{print $1}')
echo "📍 Current revision: $CURRENT_REVISION"

# Determine rollback version
if [[ -z "$REVISION" ]]; then
  REVISION=$((CURRENT_REVISION - 1))
  echo "🔙 Rolling back to previous revision: $REVISION"
else
  echo "🔙 Rolling back to specified revision: $REVISION"
fi

# Execute rollback
echo "⏪ Executing rollback..."
kubectl rollout undo deployment/mcp-server -n "$NAMESPACE" --to-revision="$REVISION"

# Wait for rollback to complete
echo "⏳ Waiting for rollback to complete..."
kubectl rollout status deployment/mcp-server -n "$NAMESPACE" --timeout=300s

# Verify rollback
echo "✅ Verifying rollback..."
NEW_REVISION=$(kubectl rollout history deployment/mcp-server -n "$NAMESPACE" --revision=0 | tail -n1 | awk '{print $1}')
echo "📍 New revision: $NEW_REVISION"

echo "🎉 Rollback completed successfully!"

# Send notification
curl -X POST "$SLACK_WEBHOOK" \
  -H 'Content-type: application/json' \
  --data "{\"text\":\"⚠️ MCP Server rolled back to revision $REVISION in $ENVIRONMENT\"}"

Chapter Summary

Chapter 12 comprehensively covers testing, deployment, and operations for MCP Server:

Key Knowledge Points

  1. Testing System: Established a complete framework for unit tests, integration tests, and end-to-end tests
  2. Containerized Deployment: Implemented Docker containerization and Kubernetes orchestration deployment
  3. Monitoring and Observability: Built Prometheus metrics collection and Grafana visualization monitoring
  4. CI/CD Pipeline: Established automated continuous integration and deployment processes
  5. Operations Automation: Implemented automated scripts for deployment, rollback, and fault recovery

Practice Points

  • Establish multi-layered testing strategies to ensure code quality
  • Use containerization technology to achieve environment consistency
  • Build comprehensive monitoring and alerting systems
  • Implement automated deployment and rollback mechanisms
  • Establish complete logging and debugging systems

Through this chapter, you’ve mastered the complete deployment and operations system for MCP Server in production environments, ensuring high availability and stability of services.