第8章：错误处理和日志系统

Haiyue2025/9/1大约 14 分钟

第8章：错误处理和日志系统

学习目标

建立完善的错误处理机制
实现结构化日志记录系统
掌握异常捕获和恢复策略
学习错误码和错误消息规范
实现调试和诊断功能

8.1 MCP协议错误处理规范

8.1.1 MCP标准错误码

根据MCP协议规范，错误处理遵循JSON-RPC 2.0标准，并定义了特定的错误码：

// src/errors/ErrorCodes.ts
export enum MCPErrorCode {
  // JSON-RPC 标准错误码
  PARSE_ERROR = -32700,
  INVALID_REQUEST = -32600,
  METHOD_NOT_FOUND = -32601,
  INVALID_PARAMS = -32602,
  INTERNAL_ERROR = -32603,
  
  // MCP 特定错误码
  INITIALIZATION_FAILED = -32000,
  CAPABILITY_NOT_SUPPORTED = -32001,
  RESOURCE_NOT_FOUND = -32002,
  RESOURCE_ACCESS_DENIED = -32003,
  TOOL_EXECUTION_FAILED = -32004,
  PROMPT_GENERATION_FAILED = -32005,
  RATE_LIMIT_EXCEEDED = -32006,
  AUTHENTICATION_FAILED = -32007,
  AUTHORIZATION_FAILED = -32008,
  VALIDATION_ERROR = -32009,
  TIMEOUT_ERROR = -32010,
  CONNECTION_LOST = -32011,
  PROTOCOL_VERSION_MISMATCH = -32012,
}

export interface MCPError {
  code: MCPErrorCode;
  message: string;
  data?: {
    details?: string;
    cause?: string;
    timestamp?: string;
    requestId?: string;
    stack?: string;
    context?: Record<string, any>;
  };
}

export class MCPErrorBuilder {
  static parseError(message: string, details?: any): MCPError {
    return {
      code: MCPErrorCode.PARSE_ERROR,
      message: message || 'Parse error',
      data: {
        details: details?.toString(),
        timestamp: new Date().toISOString(),
      },
    };
  }
  
  static invalidRequest(message: string, requestId?: string): MCPError {
    return {
      code: MCPErrorCode.INVALID_REQUEST,
      message: message || 'Invalid Request',
      data: {
        requestId,
        timestamp: new Date().toISOString(),
      },
    };
  }
  
  static methodNotFound(method: string, requestId?: string): MCPError {
    return {
      code: MCPErrorCode.METHOD_NOT_FOUND,
      message: `Method not found: ${method}`,
      data: {
        details: `The method '${method}' is not supported by this server`,
        requestId,
        timestamp: new Date().toISOString(),
      },
    };
  }
  
  static invalidParams(message: string, params?: any, requestId?: string): MCPError {
    return {
      code: MCPErrorCode.INVALID_PARAMS,
      message: message || 'Invalid params',
      data: {
        details: `Parameter validation failed: ${message}`,
        context: { params },
        requestId,
        timestamp: new Date().toISOString(),
      },
    };
  }
  
  static resourceNotFound(uri: string, requestId?: string): MCPError {
    return {
      code: MCPErrorCode.RESOURCE_NOT_FOUND,
      message: `Resource not found: ${uri}`,
      data: {
        details: `The requested resource '${uri}' could not be found`,
        context: { uri },
        requestId,
        timestamp: new Date().toISOString(),
      },
    };
  }
  
  static toolExecutionFailed(toolName: string, cause: string, requestId?: string): MCPError {
    return {
      code: MCPErrorCode.TOOL_EXECUTION_FAILED,
      message: `Tool execution failed: ${toolName}`,
      data: {
        details: `The tool '${toolName}' failed to execute`,
        cause,
        context: { toolName },
        requestId,
        timestamp: new Date().toISOString(),
      },
    };
  }
  
  static rateLimitExceeded(limit: number, windowMs: number, requestId?: string): MCPError {
    return {
      code: MCPErrorCode.RATE_LIMIT_EXCEEDED,
      message: 'Rate limit exceeded',
      data: {
        details: `Request rate limit of ${limit} requests per ${windowMs}ms exceeded`,
        context: { limit, windowMs },
        requestId,
        timestamp: new Date().toISOString(),
      },
    };
  }
  
  static timeoutError(operation: string, timeoutMs: number, requestId?: string): MCPError {
    return {
      code: MCPErrorCode.TIMEOUT_ERROR,
      message: `Operation timeout: ${operation}`,
      data: {
        details: `The operation '${operation}' timed out after ${timeoutMs}ms`,
        context: { operation, timeoutMs },
        requestId,
        timestamp: new Date().toISOString(),
      },
    };
  }
}

8.1.2 异常类层次结构

// src/errors/MCPExceptions.ts
export abstract class MCPException extends Error {
  public readonly code: MCPErrorCode;
  public readonly details?: string;
  public readonly context?: Record<string, any>;
  public readonly timestamp: Date;
  public readonly requestId?: string;
  
  constructor(
    code: MCPErrorCode,
    message: string,
    details?: string,
    context?: Record<string, any>,
    requestId?: string
  ) {
    super(message);
    this.name = this.constructor.name;
    this.code = code;
    this.details = details;
    this.context = context;
    this.timestamp = new Date();
    this.requestId = requestId;
    
    // 确保stack trace正确
    Error.captureStackTrace(this, this.constructor);
  }
  
  toMCPError(): MCPError {
    return {
      code: this.code,
      message: this.message,
      data: {
        details: this.details,
        context: this.context,
        timestamp: this.timestamp.toISOString(),
        requestId: this.requestId,
        stack: process.env.NODE_ENV === 'development' ? this.stack : undefined,
      },
    };
  }
  
  toJSON() {
    return this.toMCPError();
  }
}

export class ParseException extends MCPException {
  constructor(message: string, details?: string, requestId?: string) {
    super(MCPErrorCode.PARSE_ERROR, message, details, undefined, requestId);
  }
}

export class InvalidRequestException extends MCPException {
  constructor(message: string, context?: Record<string, any>, requestId?: string) {
    super(MCPErrorCode.INVALID_REQUEST, message, undefined, context, requestId);
  }
}

export class MethodNotFoundException extends MCPException {
  constructor(method: string, requestId?: string) {
    super(
      MCPErrorCode.METHOD_NOT_FOUND,
      `Method not found: ${method}`,
      `The method '${method}' is not supported by this server`,
      { method },
      requestId
    );
  }
}

export class InvalidParamsException extends MCPException {
  constructor(message: string, params?: any, requestId?: string) {
    super(
      MCPErrorCode.INVALID_PARAMS,
      message,
      `Parameter validation failed: ${message}`,
      { params },
      requestId
    );
  }
}

export class ResourceNotFoundException extends MCPException {
  constructor(uri: string, requestId?: string) {
    super(
      MCPErrorCode.RESOURCE_NOT_FOUND,
      `Resource not found: ${uri}`,
      `The requested resource '${uri}' could not be found`,
      { uri },
      requestId
    );
  }
}

export class ToolExecutionException extends MCPException {
  constructor(toolName: string, cause: string, context?: Record<string, any>, requestId?: string) {
    super(
      MCPErrorCode.TOOL_EXECUTION_FAILED,
      `Tool execution failed: ${toolName}`,
      cause,
      { toolName, ...context },
      requestId
    );
  }
}

export class RateLimitException extends MCPException {
  constructor(limit: number, windowMs: number, requestId?: string) {
    super(
      MCPErrorCode.RATE_LIMIT_EXCEEDED,
      'Rate limit exceeded',
      `Request rate limit of ${limit} requests per ${windowMs}ms exceeded`,
      { limit, windowMs },
      requestId
    );
  }
}

export class TimeoutException extends MCPException {
  constructor(operation: string, timeoutMs: number, requestId?: string) {
    super(
      MCPErrorCode.TIMEOUT_ERROR,
      `Operation timeout: ${operation}`,
      `The operation '${operation}' timed out after ${timeoutMs}ms`,
      { operation, timeoutMs },
      requestId
    );
  }
}

8.2 结构化日志系统

8.2.1 日志级别和格式

// src/logging/Logger.ts
export enum LogLevel {
  ERROR = 0,
  WARN = 1,
  INFO = 2,
  DEBUG = 3,
  TRACE = 4,
}

export interface LogEntry {
  timestamp: string;
  level: LogLevel;
  message: string;
  category?: string;
  requestId?: string;
  userId?: string;
  sessionId?: string;
  context?: Record<string, any>;
  error?: {
    name: string;
    message: string;
    stack?: string;
    code?: number;
  };
  duration?: number;
  tags?: string[];
}

export interface LogFormatter {
  format(entry: LogEntry): string;
}

export class JSONLogFormatter implements LogFormatter {
  format(entry: LogEntry): string {
    return JSON.stringify(entry, null, 0);
  }
}

export class ConsoleLogFormatter implements LogFormatter {
  private colorize = {
    [LogLevel.ERROR]: '\x1b[31m',   // Red
    [LogLevel.WARN]: '\x1b[33m',    // Yellow
    [LogLevel.INFO]: '\x1b[36m',    // Cyan
    [LogLevel.DEBUG]: '\x1b[35m',   // Magenta
    [LogLevel.TRACE]: '\x1b[90m',   // Bright Black
  };
  
  private reset = '\x1b[0m';
  
  format(entry: LogEntry): string {
    const color = this.colorize[entry.level] || '';
    const levelName = LogLevel[entry.level].padEnd(5);
    const timestamp = entry.timestamp;
    const category = entry.category ? `[${entry.category}]` : '';
    const requestId = entry.requestId ? `[${entry.requestId.slice(0, 8)}]` : '';
    
    let message = `${color}${timestamp} ${levelName}${this.reset} ${category}${requestId} ${entry.message}`;
    
    if (entry.context && Object.keys(entry.context).length > 0) {
      message += `\\n  Context: ${JSON.stringify(entry.context, null, 2)}`;
    }
    
    if (entry.error) {
      message += `\\n  Error: ${entry.error.name}: ${entry.error.message}`;
      if (entry.error.stack && entry.level <= LogLevel.DEBUG) {
        message += `\\n  Stack: ${entry.error.stack}`;
      }
    }
    
    return message;
  }
}

8.2.2 日志记录器实现

// src/logging/Logger.ts (续)
export interface LogTransport {
  write(entry: LogEntry): Promise<void>;
  close(): Promise<void>;
}

export class ConsoleTransport implements LogTransport {
  private formatter: LogFormatter;
  
  constructor(formatter: LogFormatter = new ConsoleLogFormatter()) {
    this.formatter = formatter;
  }
  
  async write(entry: LogEntry): Promise<void> {
    const formatted = this.formatter.format(entry);
    
    if (entry.level <= LogLevel.ERROR) {
      console.error(formatted);
    } else if (entry.level <= LogLevel.WARN) {
      console.warn(formatted);
    } else {
      console.log(formatted);
    }
  }
  
  async close(): Promise<void> {
    // Console transport doesn't need cleanup
  }
}

export class FileTransport implements LogTransport {
  private formatter: LogFormatter;
  private filePath: string;
  private writeStream: fs.WriteStream | null = null;
  private rotateSize: number;
  private maxFiles: number;
  
  constructor(
    filePath: string,
    formatter: LogFormatter = new JSONLogFormatter(),
    rotateSize: number = 10 * 1024 * 1024, // 10MB
    maxFiles: number = 5
  ) {
    this.filePath = filePath;
    this.formatter = formatter;
    this.rotateSize = rotateSize;
    this.maxFiles = maxFiles;
  }
  
  private async ensureWriteStream(): Promise<fs.WriteStream> {
    if (!this.writeStream) {
      await fs.promises.mkdir(path.dirname(this.filePath), { recursive: true });
      this.writeStream = fs.createWriteStream(this.filePath, { flags: 'a' });
      
      this.writeStream.on('error', (error) => {
        console.error('Log file write error:', error);
      });
    }
    return this.writeStream;
  }
  
  async write(entry: LogEntry): Promise<void> {
    const formatted = this.formatter.format(entry) + '\\n';
    const stream = await this.ensureWriteStream();
    
    // 检查是否需要轮转
    const stats = await fs.promises.stat(this.filePath).catch(() => ({ size: 0 }));
    if (stats.size > this.rotateSize) {
      await this.rotate();
    }
    
    return new Promise((resolve, reject) => {
      stream.write(formatted, (error) => {
        if (error) reject(error);
        else resolve();
      });
    });
  }
  
  private async rotate(): Promise<void> {
    if (this.writeStream) {
      this.writeStream.end();
      this.writeStream = null;
    }
    
    // 轮转日志文件
    const baseName = this.filePath;
    const ext = path.extname(baseName);
    const nameWithoutExt = baseName.slice(0, -ext.length);
    
    // 移动现有文件
    for (let i = this.maxFiles - 1; i >= 1; i--) {
      const oldFile = `${nameWithoutExt}.${i}${ext}`;
      const newFile = `${nameWithoutExt}.${i + 1}${ext}`;
      
      try {
        await fs.promises.access(oldFile);
        if (i === this.maxFiles - 1) {
          await fs.promises.unlink(oldFile);
        } else {
          await fs.promises.rename(oldFile, newFile);
        }
      } catch (error) {
        // File doesn't exist, continue
      }
    }
    
    // 重命名当前文件
    try {
      await fs.promises.rename(baseName, `${nameWithoutExt}.1${ext}`);
    } catch (error) {
      // Current file might not exist
    }
  }
  
  async close(): Promise<void> {
    if (this.writeStream) {
      return new Promise((resolve) => {
        this.writeStream!.end(resolve);
        this.writeStream = null;
      });
    }
  }
}

export class Logger {
  private transports: LogTransport[] = [];
  private level: LogLevel = LogLevel.INFO;
  private category?: string;
  private defaultContext: Record<string, any> = {};
  
  constructor(
    category?: string,
    level: LogLevel = LogLevel.INFO,
    defaultContext: Record<string, any> = {}
  ) {
    this.category = category;
    this.level = level;
    this.defaultContext = defaultContext;
  }
  
  addTransport(transport: LogTransport): void {
    this.transports.push(transport);
  }
  
  setLevel(level: LogLevel): void {
    this.level = level;
  }
  
  private async log(
    level: LogLevel,
    message: string,
    context?: Record<string, any>,
    error?: Error,
    requestId?: string
  ): Promise<void> {
    if (level > this.level) {
      return;
    }
    
    const entry: LogEntry = {
      timestamp: new Date().toISOString(),
      level,
      message,
      category: this.category,
      requestId,
      context: { ...this.defaultContext, ...context },
      tags: [],
    };
    
    if (error) {
      entry.error = {
        name: error.name,
        message: error.message,
        stack: error.stack,
        code: (error as any).code,
      };
    }
    
    // 并行写入所有传输器
    const writePromises = this.transports.map(transport => 
      transport.write(entry).catch(error => {
        console.error('Log transport error:', error);
      })
    );
    
    await Promise.allSettled(writePromises);
  }
  
  async error(message: string, error?: Error, context?: Record<string, any>, requestId?: string): Promise<void> {
    await this.log(LogLevel.ERROR, message, context, error, requestId);
  }
  
  async warn(message: string, context?: Record<string, any>, requestId?: string): Promise<void> {
    await this.log(LogLevel.WARN, message, context, undefined, requestId);
  }
  
  async info(message: string, context?: Record<string, any>, requestId?: string): Promise<void> {
    await this.log(LogLevel.INFO, message, context, undefined, requestId);
  }
  
  async debug(message: string, context?: Record<string, any>, requestId?: string): Promise<void> {
    await this.log(LogLevel.DEBUG, message, context, undefined, requestId);
  }
  
  async trace(message: string, context?: Record<string, any>, requestId?: string): Promise<void> {
    await this.log(LogLevel.TRACE, message, context, undefined, requestId);
  }
  
  // 创建带有特定上下文的子logger
  child(context: Record<string, any>, category?: string): Logger {
    const childLogger = new Logger(
      category || this.category,
      this.level,
      { ...this.defaultContext, ...context }
    );
    
    // 复制传输器
    childLogger.transports = [...this.transports];
    
    return childLogger;
  }
  
  async close(): Promise<void> {
    const closePromises = this.transports.map(transport => transport.close());
    await Promise.all(closePromises);
  }
}

8.3 错误处理中间件

8.3.1 请求处理错误中间件

// src/middleware/ErrorMiddleware.ts
import { Request, Response } from '@modelcontextprotocol/sdk/types.js';

export interface ErrorHandlerContext {
  request: Request;
  response?: Response;
  error: Error | MCPException;
  requestId: string;
  startTime: Date;
  logger: Logger;
}

export interface ErrorHandler {
  handle(context: ErrorHandlerContext): Promise<Response>;
}

export class DefaultErrorHandler implements ErrorHandler {
  async handle(context: ErrorHandlerContext): Promise<Response> {
    const { error, requestId, logger } = context;
    
    // 记录错误日志
    await logger.error(
      `Request failed: ${error.message}`,
      error,
      {
        requestId,
        method: context.request.method,
        params: context.request.params,
        duration: Date.now() - context.startTime.getTime(),
      },
      requestId
    );
    
    // 构造错误响应
    let mcpError: MCPError;
    
    if (error instanceof MCPException) {
      mcpError = error.toMCPError();
    } else {
      // 处理普通Error
      mcpError = {
        code: MCPErrorCode.INTERNAL_ERROR,
        message: 'Internal server error',
        data: {
          details: process.env.NODE_ENV === 'development' ? error.message : 'An internal error occurred',
          timestamp: new Date().toISOString(),
          requestId,
          stack: process.env.NODE_ENV === 'development' ? error.stack : undefined,
        },
      };
    }
    
    return {
      jsonrpc: '2.0',
      id: context.request.id,
      error: mcpError,
    };
  }
}

export class ErrorMiddleware {
  private handlers: ErrorHandler[] = [];
  private logger: Logger;
  
  constructor(logger: Logger) {
    this.logger = logger;
    this.handlers.push(new DefaultErrorHandler());
  }
  
  addHandler(handler: ErrorHandler): void {
    this.handlers.unshift(handler); // 新的处理器优先
  }
  
  async handleError(
    error: Error | MCPException,
    request: Request,
    requestId: string,
    startTime: Date
  ): Promise<Response> {
    const context: ErrorHandlerContext = {
      error,
      request,
      requestId,
      startTime,
      logger: this.logger.child({ requestId }),
    };
    
    // 尝试每个错误处理器
    for (const handler of this.handlers) {
      try {
        const response = await handler.handle(context);
        if (response) {
          return response;
        }
      } catch (handlerError) {
        await this.logger.error(
          'Error handler failed',
          handlerError instanceof Error ? handlerError : new Error(String(handlerError)),
          { requestId, originalError: error.message },
          requestId
        );
      }
    }
    
    // 如果所有处理器都失败，返回基本错误响应
    return {
      jsonrpc: '2.0',
      id: request.id,
      error: {
        code: MCPErrorCode.INTERNAL_ERROR,
        message: 'Internal server error',
        data: {
          timestamp: new Date().toISOString(),
          requestId,
        },
      },
    };
  }
}

8.3.2 请求追踪中间件

// src/middleware/TracingMiddleware.ts
export interface RequestTrace {
  id: string;
  method: string;
  params?: any;
  startTime: Date;
  endTime?: Date;
  duration?: number;
  status: 'pending' | 'success' | 'error';
  error?: Error;
  response?: any;
  metadata: {
    clientId?: string;
    sessionId?: string;
    userId?: string;
    userAgent?: string;
    remoteAddress?: string;
  };
}

export class TracingMiddleware {
  private activeTraces = new Map<string, RequestTrace>();
  private logger: Logger;
  private maxTraceRetention = 1000; // 保留最近的1000个追踪
  private traceHistory: RequestTrace[] = [];
  
  constructor(logger: Logger) {
    this.logger = logger.child({}, 'tracing');
  }
  
  startTrace(request: Request, metadata: RequestTrace['metadata'] = {}): string {
    const traceId = this.generateTraceId();
    
    const trace: RequestTrace = {
      id: traceId,
      method: request.method,
      params: request.params,
      startTime: new Date(),
      status: 'pending',
      metadata,
    };
    
    this.activeTraces.set(traceId, trace);
    
    this.logger.debug('Request started', {
      traceId,
      method: request.method,
      params: request.params,
      metadata,
    }, traceId);
    
    return traceId;
  }
  
  endTrace(traceId: string, response?: any, error?: Error): void {
    const trace = this.activeTraces.get(traceId);
    if (!trace) {
      this.logger.warn('Trace not found for completion', { traceId });
      return;
    }
    
    trace.endTime = new Date();
    trace.duration = trace.endTime.getTime() - trace.startTime.getTime();
    trace.status = error ? 'error' : 'success';
    trace.error = error;
    trace.response = response;
    
    // 移到历史记录
    this.activeTraces.delete(traceId);
    this.traceHistory.push(trace);
    
    // 限制历史记录数量
    if (this.traceHistory.length > this.maxTraceRetention) {
      this.traceHistory.shift();
    }
    
    // 记录完成日志
    const logMethod = error ? 'error' : 'info';
    this.logger[logMethod](
      `Request ${error ? 'failed' : 'completed'}`,
      error,
      {
        traceId,
        method: trace.method,
        duration: trace.duration,
        status: trace.status,
      },
      traceId
    );
  }
  
  getTrace(traceId: string): RequestTrace | undefined {
    return this.activeTraces.get(traceId) || 
           this.traceHistory.find(t => t.id === traceId);
  }
  
  getActiveTraces(): RequestTrace[] {
    return Array.from(this.activeTraces.values());
  }
  
  getTraceHistory(limit?: number): RequestTrace[] {
    const history = [...this.traceHistory].reverse();
    return limit ? history.slice(0, limit) : history;
  }
  
  getTraceStats(): {
    active: number;
    completed: number;
    averageDuration: number;
    errorRate: number;
  } {
    const active = this.activeTraces.size;
    const completed = this.traceHistory.length;
    
    const completedWithDuration = this.traceHistory.filter(t => t.duration !== undefined);
    const averageDuration = completedWithDuration.length > 0 
      ? completedWithDuration.reduce((sum, t) => sum + (t.duration || 0), 0) / completedWithDuration.length
      : 0;
    
    const errorCount = this.traceHistory.filter(t => t.status === 'error').length;
    const errorRate = completed > 0 ? errorCount / completed : 0;
    
    return {
      active,
      completed,
      averageDuration: Math.round(averageDuration),
      errorRate: Math.round(errorRate * 100) / 100,
    };
  }
  
  private generateTraceId(): string {
    return `trace_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
  }
}

8.4 监控和诊断工具

8.4.1 性能监控

// src/monitoring/PerformanceMonitor.ts
export interface PerformanceMetric {
  name: string;
  value: number;
  unit: string;
  timestamp: Date;
  tags?: Record<string, string>;
}

export class PerformanceMonitor {
  private metrics: PerformanceMetric[] = [];
  private timers = new Map<string, { start: number; labels?: Record<string, string> }>();
  private logger: Logger;
  private metricsRetention = 10000; // 保留最近的10000个指标
  
  constructor(logger: Logger) {
    this.logger = logger.child({}, 'performance');
    this.startSystemMetricsCollection();
  }
  
  startTimer(name: string, labels?: Record<string, string>): void {
    this.timers.set(name, {
      start: performance.now(),
      labels,
    });
  }
  
  endTimer(name: string): number | undefined {
    const timer = this.timers.get(name);
    if (!timer) {
      this.logger.warn('Timer not found', { name });
      return undefined;
    }
    
    const duration = performance.now() - timer.start;
    this.timers.delete(name);
    
    this.recordMetric({
      name,
      value: duration,
      unit: 'milliseconds',
      timestamp: new Date(),
      tags: timer.labels,
    });
    
    return duration;
  }
  
  recordMetric(metric: PerformanceMetric): void {
    this.metrics.push(metric);
    
    // 限制指标数量
    if (this.metrics.length > this.metricsRetention) {
      this.metrics.shift();
    }
    
    this.logger.debug('Metric recorded', {
      name: metric.name,
      value: metric.value,
      unit: metric.unit,
      tags: metric.tags,
    });
  }
  
  getMetrics(name?: string, since?: Date): PerformanceMetric[] {
    let filtered = this.metrics;
    
    if (name) {
      filtered = filtered.filter(m => m.name === name);
    }
    
    if (since) {
      filtered = filtered.filter(m => m.timestamp >= since);
    }
    
    return filtered;
  }
  
  getMetricsSummary(name: string, since?: Date): {
    count: number;
    min: number;
    max: number;
    average: number;
    median: number;
    p95: number;
    p99: number;
  } | null {
    const metrics = this.getMetrics(name, since);
    if (metrics.length === 0) {
      return null;
    }
    
    const values = metrics.map(m => m.value).sort((a, b) => a - b);
    const count = values.length;
    
    const min = values[0];
    const max = values[count - 1];
    const average = values.reduce((sum, v) => sum + v, 0) / count;
    const median = values[Math.floor(count / 2)];
    const p95 = values[Math.floor(count * 0.95)];
    const p99 = values[Math.floor(count * 0.99)];
    
    return { count, min, max, average, median, p95, p99 };
  }
  
  private startSystemMetricsCollection(): void {
    setInterval(() => {
      this.collectSystemMetrics().catch(error => {
        this.logger.error('System metrics collection failed', error);
      });
    }, 30000); // 每30秒收集一次
  }
  
  private async collectSystemMetrics(): Promise<void> {
    const timestamp = new Date();
    
    // 内存使用情况
    const memUsage = process.memoryUsage();
    this.recordMetric({
      name: 'memory.heap.used',
      value: memUsage.heapUsed,
      unit: 'bytes',
      timestamp,
    });
    
    this.recordMetric({
      name: 'memory.heap.total',
      value: memUsage.heapTotal,
      unit: 'bytes',
      timestamp,
    });
    
    this.recordMetric({
      name: 'memory.rss',
      value: memUsage.rss,
      unit: 'bytes',
      timestamp,
    });
    
    // CPU使用情况
    const cpuUsage = process.cpuUsage();
    this.recordMetric({
      name: 'cpu.user',
      value: cpuUsage.user,
      unit: 'microseconds',
      timestamp,
    });
    
    this.recordMetric({
      name: 'cpu.system',
      value: cpuUsage.system,
      unit: 'microseconds',
      timestamp,
    });
    
    // 事件循环延迟
    const start = process.hrtime.bigint();
    setImmediate(() => {
      const end = process.hrtime.bigint();
      const delay = Number(end - start) / 1000000; // 转换为毫秒
      
      this.recordMetric({
        name: 'eventloop.delay',
        value: delay,
        unit: 'milliseconds',
        timestamp: new Date(),
      });
    });
  }
}

8.4.2 健康检查增强

// src/diagnostics/DiagnosticsManager.ts
export interface DiagnosticCheck {
  name: string;
  description: string;
  category: 'system' | 'application' | 'external';
  execute(): Promise<DiagnosticResult>;
}

export interface DiagnosticResult {
  status: 'pass' | 'fail' | 'warn';
  message: string;
  details?: Record<string, any>;
  duration: number;
  timestamp: Date;
  recommendations?: string[];
}

export class DiagnosticsManager {
  private checks = new Map<string, DiagnosticCheck>();
  private results = new Map<string, DiagnosticResult>();
  private logger: Logger;
  
  constructor(logger: Logger) {
    this.logger = logger.child({}, 'diagnostics');
    this.setupDefaultChecks();
  }
  
  addCheck(check: DiagnosticCheck): void {
    this.checks.set(check.name, check);
    this.logger.debug('Diagnostic check added', { name: check.name, category: check.category });
  }
  
  async runCheck(name: string): Promise<DiagnosticResult> {
    const check = this.checks.get(name);
    if (!check) {
      throw new Error(`Diagnostic check not found: ${name}`);
    }
    
    const startTime = performance.now();
    
    try {
      const result = await check.execute();
      result.duration = performance.now() - startTime;
      result.timestamp = new Date();
      
      this.results.set(name, result);
      
      this.logger.debug('Diagnostic check completed', {
        name,
        status: result.status,
        duration: result.duration,
        message: result.message,
      });
      
      return result;
      
    } catch (error) {
      const result: DiagnosticResult = {
        status: 'fail',
        message: error instanceof Error ? error.message : 'Unknown error',
        duration: performance.now() - startTime,
        timestamp: new Date(),
        details: { error: error instanceof Error ? error.stack : String(error) },
      };
      
      this.results.set(name, result);
      this.logger.error('Diagnostic check failed', error instanceof Error ? error : new Error(String(error)), { name });
      
      return result;
    }
  }
  
  async runAllChecks(): Promise<Map<string, DiagnosticResult>> {
    const results = new Map<string, DiagnosticResult>();
    
    const promises = Array.from(this.checks.keys()).map(async (name) => {
      const result = await this.runCheck(name);
      results.set(name, result);
    });
    
    await Promise.allSettled(promises);
    return results;
  }
  
  getCheckResult(name: string): DiagnosticResult | undefined {
    return this.results.get(name);
  }
  
  getAllResults(): Map<string, DiagnosticResult> {
    return new Map(this.results);
  }
  
  getSummary(): {
    total: number;
    passed: number;
    failed: number;
    warnings: number;
    categories: Record<string, { total: number; passed: number; failed: number; warnings: number }>;
  } {
    const summary = {
      total: 0,
      passed: 0,
      failed: 0,
      warnings: 0,
      categories: {} as Record<string, any>,
    };
    
    for (const [name, result] of this.results) {
      const check = this.checks.get(name);
      const category = check?.category || 'unknown';
      
      // 初始化分类统计
      if (!summary.categories[category]) {
        summary.categories[category] = { total: 0, passed: 0, failed: 0, warnings: 0 };
      }
      
      summary.total++;
      summary.categories[category].total++;
      
      switch (result.status) {
        case 'pass':
          summary.passed++;
          summary.categories[category].passed++;
          break;
        case 'fail':
          summary.failed++;
          summary.categories[category].failed++;
          break;
        case 'warn':
          summary.warnings++;
          summary.categories[category].warnings++;
          break;
      }
    }
    
    return summary;
  }
  
  private setupDefaultChecks(): void {
    // 内存使用检查
    this.addCheck({
      name: 'memory-usage',
      description: 'Check memory usage levels',
      category: 'system',
      execute: async () => {
        const usage = process.memoryUsage();
        const heapUsedMB = usage.heapUsed / (1024 * 1024);
        const heapTotalMB = usage.heapTotal / (1024 * 1024);
        const usagePercent = (usage.heapUsed / usage.heapTotal) * 100;
        
        let status: 'pass' | 'fail' | 'warn';
        let message: string;
        let recommendations: string[] = [];
        
        if (usagePercent > 90) {
          status = 'fail';
          message = `Critical memory usage: ${usagePercent.toFixed(1)}%`;
          recommendations = [
            'Consider restarting the server',
            'Check for memory leaks',
            'Increase available memory',
          ];
        } else if (usagePercent > 75) {
          status = 'warn';
          message = `High memory usage: ${usagePercent.toFixed(1)}%`;
          recommendations = [
            'Monitor memory usage trends',
            'Consider optimizing memory-intensive operations',
          ];
        } else {
          status = 'pass';
          message = `Memory usage normal: ${usagePercent.toFixed(1)}%`;
        }
        
        return {
          status,
          message,
          details: {
            heapUsed: `${heapUsedMB.toFixed(1)} MB`,
            heapTotal: `${heapTotalMB.toFixed(1)} MB`,
            usagePercent: `${usagePercent.toFixed(1)}%`,
            rss: `${(usage.rss / (1024 * 1024)).toFixed(1)} MB`,
          },
          recommendations: recommendations.length > 0 ? recommendations : undefined,
          duration: 0,
          timestamp: new Date(),
        };
      },
    });
    
    // 事件循环延迟检查
    this.addCheck({
      name: 'event-loop-delay',
      description: 'Check event loop delay',
      category: 'system',
      execute: async () => {
        const start = Date.now();
        
        return new Promise<DiagnosticResult>((resolve) => {
          setImmediate(() => {
            const delay = Date.now() - start;
            
            let status: 'pass' | 'fail' | 'warn';
            let message: string;
            let recommendations: string[] = [];
            
            if (delay > 100) {
              status = 'fail';
              message = `High event loop delay: ${delay}ms`;
              recommendations = [
                'Check for CPU-intensive synchronous operations',
                'Consider using worker threads for heavy computations',
                'Profile the application for performance bottlenecks',
              ];
            } else if (delay > 50) {
              status = 'warn';
              message = `Moderate event loop delay: ${delay}ms`;
              recommendations = [
                'Monitor event loop delay trends',
                'Consider optimizing blocking operations',
              ];
            } else {
              status = 'pass';
              message = `Event loop delay normal: ${delay}ms`;
            }
            
            resolve({
              status,
              message,
              details: { delayMs: delay },
              recommendations: recommendations.length > 0 ? recommendations : undefined,
              duration: 0,
              timestamp: new Date(),
            });
          });
        });
      },
    });
  }
}

8.5 集成示例：带完整错误处理的MCP Server

// src/EnhancedMCPServer.ts
export class EnhancedMCPServer extends MCPServer {
  private errorMiddleware: ErrorMiddleware;
  private tracingMiddleware: TracingMiddleware;
  private performanceMonitor: PerformanceMonitor;
  private diagnosticsManager: DiagnosticsManager;
  private requestLogger: Logger;
  
  constructor(config: ServerConfig) {
    super(config);
    this.setupEnhancedLogging();
    this.setupErrorHandling();
    this.setupMonitoring();
  }
  
  private setupEnhancedLogging(): void {
    // 创建主日志记录器
    const mainLogger = new Logger('mcp-server', LogLevel.INFO);
    
    // 添加控制台传输器
    mainLogger.addTransport(new ConsoleTransport());
    
    // 添加文件传输器（生产环境）
    if (process.env.NODE_ENV === 'production') {
      mainLogger.addTransport(new FileTransport('./logs/server.log'));
      mainLogger.addTransport(new FileTransport('./logs/error.log', new JSONLogFormatter()));
    }
    
    // 设置请求日志记录器
    this.requestLogger = mainLogger.child({}, 'requests');
    
    // 替换资源管理器的logger
    this.resourceManager.register({
      name: 'main-logger',
      close: async () => {
        await mainLogger.close();
      },
    });
  }
  
  private setupErrorHandling(): void {
    this.errorMiddleware = new ErrorMiddleware(this.requestLogger);
    this.tracingMiddleware = new TracingMiddleware(this.requestLogger);
    
    // 添加自定义错误处理器
    this.errorMiddleware.addHandler({
      handle: async (context) => {
        // 记录详细的错误信息用于调试
        if (context.error instanceof ToolExecutionException) {
          await context.logger.error(
            'Tool execution failed with detailed context',
            context.error,
            {
              toolName: context.error.context?.toolName,
              params: context.request.params,
              duration: Date.now() - context.startTime.getTime(),
            },
            context.requestId
          );
        }
        
        // 返回null让默认处理器继续处理
        return null as any;
      },
    });
  }
  
  private setupMonitoring(): void {
    this.performanceMonitor = new PerformanceMonitor(this.requestLogger);
    this.diagnosticsManager = new DiagnosticsManager(this.requestLogger);
    
    // 添加自定义健康检查
    this.diagnosticsManager.addCheck({
      name: 'request-processing',
      description: 'Check request processing performance',
      category: 'application',
      execute: async () => {
        const stats = this.tracingMiddleware.getTraceStats();
        
        let status: 'pass' | 'fail' | 'warn';
        let message: string;
        
        if (stats.errorRate > 0.1) { // 错误率超过10%
          status = 'fail';
          message = `High error rate: ${(stats.errorRate * 100).toFixed(1)}%`;
        } else if (stats.averageDuration > 5000) { // 平均响应时间超过5秒
          status = 'warn';
          message = `Slow average response time: ${stats.averageDuration}ms`;
        } else {
          status = 'pass';
          message = `Request processing healthy: ${(stats.errorRate * 100).toFixed(1)}% error rate, ${stats.averageDuration}ms avg`;
        }
        
        return {
          status,
          message,
          details: stats,
          duration: 0,
          timestamp: new Date(),
        };
      },
    });
  }
  
  // 增强的请求处理方法
  async handleRequest(request: Request): Promise<Response> {
    const requestId = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
    const startTime = new Date();
    
    // 开始追踪
    const traceId = this.tracingMiddleware.startTrace(request, { requestId });
    
    // 开始性能监控
    this.performanceMonitor.startTimer(`request.${request.method}`, {
      method: request.method,
      requestId,
    });
    
    try {
      // 记录请求开始
      await this.requestLogger.info(
        `Request started: ${request.method}`,
        {
          method: request.method,
          params: request.params,
        },
        requestId
      );
      
      // 处理请求（这里应该是实际的请求处理逻辑）
      const response = await this.processRequest(request, requestId);
      
      // 结束监控
      this.performanceMonitor.endTimer(`request.${request.method}`);
      this.tracingMiddleware.endTrace(traceId, response);
      
      // 记录成功
      await this.requestLogger.info(
        `Request completed: ${request.method}`,
        {
          method: request.method,
          duration: Date.now() - startTime.getTime(),
        },
        requestId
      );
      
      return response;
      
    } catch (error) {
      // 结束监控（错误情况）
      this.performanceMonitor.endTimer(`request.${request.method}`);
      this.tracingMiddleware.endTrace(traceId, undefined, error instanceof Error ? error : new Error(String(error)));
      
      // 通过错误中间件处理错误
      return await this.errorMiddleware.handleError(
        error instanceof Error ? error : new Error(String(error)),
        request,
        requestId,
        startTime
      );
    }
  }
  
  private async processRequest(request: Request, requestId: string): Promise<Response> {
    // 这里应该是实际的请求路由和处理逻辑
    // 为了示例，这里抛出一个方法未找到的错误
    throw new MethodNotFoundException(request.method, requestId);
  }
  
  // 诊断端点
  async getDiagnostics(): Promise<any> {
    const results = await this.diagnosticsManager.runAllChecks();
    const summary = this.diagnosticsManager.getSummary();
    const traceStats = this.tracingMiddleware.getTraceStats();
    
    return {
      summary,
      traceStats,
      checks: Object.fromEntries(results),
      performance: {
        memory: this.performanceMonitor.getMetricsSummary('memory.heap.used', new Date(Date.now() - 60000)),
        requests: this.performanceMonitor.getMetricsSummary('request.tools/list', new Date(Date.now() - 60000)),
      },
    };
  }
}

// 使用示例
async function startEnhancedServer() {
  try {
    const config = await ConfigLoader.loadFromFile('./config/server.json');
    const server = new EnhancedMCPServer(config);
    
    await server.start();
    
    // 定期运行诊断
    setInterval(async () => {
      const diagnostics = await server.getDiagnostics();
      console.log('Server diagnostics:', JSON.stringify(diagnostics, null, 2));
    }, 60000); // 每分钟一次
    
  } catch (error) {
    console.error('Failed to start enhanced server:', error);
    process.exit(1);
  }
}

本章总结

第8章深入学习了MCP Server的错误处理和日志系统：

核心知识点

MCP错误规范：建立了完整的错误码体系和异常类层次
结构化日志：实现了多级别、多传输器的日志系统
错误处理中间件：构建了请求级别的错误处理机制
请求追踪：实现了完整的请求生命周期追踪
性能监控：建立了系统和应用级别的性能监控
诊断工具：实现了自动化的健康检查和诊断系统

实践要点

遵循MCP协议的错误码规范
实现结构化和可搜索的日志格式
建立多层次的错误处理机制
实现请求级别的性能监控
提供丰富的诊断和调试信息
区分开发和生产环境的日志策略

通过本章学习，掌握了构建生产级MCP Server所需的完整错误处理和监控体系，为服务的稳定运行和问题排查提供了强有力的支撑。