Chapter 10: Performance Optimization and Scalability

Haiyue

September 1, 2025

23min

Chapter 10: Performance Optimization and Scalability

Learning Objectives

Analyze and optimize Server performance bottlenecks
Implement caching and resource pool management
Master concurrency handling and asynchronous programming
Learn memory management and garbage collection optimization
Design scalable architecture patterns

10.1 Performance Analysis and Monitoring

10.1.1 Performance Metrics System

// src/performance/MetricsCollector.ts
export interface PerformanceMetrics {
  // Request processing performance
  requestMetrics: {
    totalRequests: number;
    requestsPerSecond: number;
    averageResponseTime: number;
    p50ResponseTime: number;
    p95ResponseTime: number;
    p99ResponseTime: number;
    errorRate: number;
  };

  // System resource usage
  systemMetrics: {
    cpuUsage: number;
    memoryUsage: {
      heapUsed: number;
      heapTotal: number;
      rss: number;
      external: number;
    };
    eventLoopDelay: number;
    eventLoopUtilization: number;
  };

  // Business metrics
  businessMetrics: {
    toolExecutions: number;
    resourceAccesses: number;
    promptGenerations: number;
    cacheHitRate: number;
  };

  // Connection metrics
  connectionMetrics: {
    activeConnections: number;
    totalConnections: number;
    connectionRate: number;
    avgConnectionDuration: number;
  };
}

export class MetricsCollector {
  private requestTimes: number[] = [];
  private requestCount = 0;
  private errorCount = 0;

  recordRequest(duration: number, success: boolean): void {
    this.requestCount++;
    if (!success) this.errorCount++;

    this.requestTimes.push(duration);

    // Limit sample size
    if (this.requestTimes.length > 10000) {
      this.requestTimes.shift();
    }
  }

  getMetrics(): PerformanceMetrics {
    const sortedTimes = [...this.requestTimes].sort((a, b) => a - b);
    const p50 = this.getPercentile(sortedTimes, 0.5);
    const p95 = this.getPercentile(sortedTimes, 0.95);
    const p99 = this.getPercentile(sortedTimes, 0.99);
    const avgResponseTime = sortedTimes.length > 0 ?
      sortedTimes.reduce((sum, time) => sum + time, 0) / sortedTimes.length : 0;

    const memUsage = process.memoryUsage();

    return {
      requestMetrics: {
        totalRequests: this.requestCount,
        requestsPerSecond: this.requestCount / (Date.now() / 1000),
        averageResponseTime: avgResponseTime,
        p50ResponseTime: p50,
        p95ResponseTime: p95,
        p99ResponseTime: p99,
        errorRate: this.requestCount > 0 ? this.errorCount / this.requestCount : 0,
      },
      systemMetrics: {
        cpuUsage: 0,
        memoryUsage: {
          heapUsed: memUsage.heapUsed,
          heapTotal: memUsage.heapTotal,
          rss: memUsage.rss,
          external: memUsage.external,
        },
        eventLoopDelay: 0,
        eventLoopUtilization: 0,
      },
      businessMetrics: {
        toolExecutions: 0,
        resourceAccesses: 0,
        promptGenerations: 0,
        cacheHitRate: 0,
      },
      connectionMetrics: {
        activeConnections: 0,
        totalConnections: 0,
        connectionRate: 0,
        avgConnectionDuration: 0,
      },
    };
  }

  private getPercentile(sortedArray: number[], percentile: number): number {
    if (sortedArray.length === 0) return 0;

    const index = Math.ceil(sortedArray.length * percentile) - 1;
    return sortedArray[Math.max(0, index)];
  }
}

10.2 Cache System

10.2.1 Multi-tier Cache Architecture

// src/cache/CacheManager.ts
export interface CacheEntry<T> {
  value: T;
  timestamp: number;
  ttl: number;
  accessCount: number;
  lastAccess: number;
  size?: number;
}

export interface CacheStrategy {
  name: string;
  shouldEvict(entry: CacheEntry<any>, now: number): boolean;
  getPriority(entry: CacheEntry<any>, now: number): number;
}

export interface CacheConfig {
  maxSize: number; // Maximum entries
  maxMemory?: number; // Maximum memory usage (bytes)
  defaultTTL: number; // Default TTL (milliseconds)
  strategy: CacheStrategy;
  enableStatistics: boolean;
}

// LRU cache strategy
export class LRUCacheStrategy implements CacheStrategy {
  name = 'LRU';

  shouldEvict(entry: CacheEntry<any>, now: number): boolean {
    return (now - entry.timestamp) > entry.ttl;
  }

  getPriority(entry: CacheEntry<any>, now: number): number {
    return now - entry.lastAccess; // Longer since last access = higher priority for eviction
  }
}

// LFU cache strategy
export class LFUCacheStrategy implements CacheStrategy {
  name = 'LFU';

  shouldEvict(entry: CacheEntry<any>, now: number): boolean {
    return (now - entry.timestamp) > entry.ttl;
  }

  getPriority(entry: CacheEntry<any>, now: number): number {
    return -entry.accessCount; // Lower access count = higher priority for eviction
  }
}

export class CacheManager<T = any> {
  private cache = new Map<string, CacheEntry<T>>();
  private config: CacheConfig;

  constructor(config: CacheConfig) {
    this.config = config;
  }

  async get(key: string): Promise<T | null> {
    const entry = this.cache.get(key);

    if (!entry) {
      return null;
    }

    const now = Date.now();

    // Check if expired
    if (this.config.strategy.shouldEvict(entry, now)) {
      this.cache.delete(key);
      return null;
    }

    // Update access information
    entry.accessCount++;
    entry.lastAccess = now;

    return entry.value;
  }

  async set(key: string, value: T, ttl?: number): Promise<void> {
    const now = Date.now();
    const entryTTL = ttl || this.config.defaultTTL;
    const size = this.estimateSize(value);

    const entry: CacheEntry<T> = {
      value,
      timestamp: now,
      ttl: entryTTL,
      accessCount: 1,
      lastAccess: now,
      size,
    };

    // Check if need to free space
    await this.ensureCapacity(size);

    this.cache.set(key, entry);
  }

  private async ensureCapacity(newEntrySize: number): Promise<void> {
    // Check entry count limit
    if (this.cache.size >= this.config.maxSize) {
      await this.evictEntries(1);
    }

    // Check memory limit
    if (this.config.maxMemory) {
      const currentMemory = this.calculateMemoryUsage();
      while (currentMemory + newEntrySize > this.config.maxMemory) {
        const evicted = await this.evictEntries(1);
        if (evicted === 0) break; // Cannot free more space
      }
    }
  }

  private async evictEntries(count: number): Promise<number> {
    if (this.cache.size === 0) return 0;

    const now = Date.now();
    const entries = Array.from(this.cache.entries());

    // Sort by strategy priority
    entries.sort(([, a], [, b]) => {
      return this.config.strategy.getPriority(b, now) - this.config.strategy.getPriority(a, now);
    });

    let evicted = 0;
    for (let i = 0; i < Math.min(count, entries.length); i++) {
      const [key] = entries[i];
      this.cache.delete(key);
      evicted++;
    }

    return evicted;
  }

  private calculateMemoryUsage(): number {
    let total = 0;
    for (const entry of this.cache.values()) {
      total += entry.size || 0;
    }
    return total;
  }

  private estimateSize(value: T): number {
    try {
      return JSON.stringify(value).length * 2; // UTF-16 encoding estimate
    } catch {
      return 1024; // Default size
    }
  }
}

10.3 Concurrency Handling and Async Optimization

10.3.1 Request Queue and Rate Limiting

// src/concurrency/RequestQueue.ts
export interface QueuedRequest {
  id: string;
  request: any;
  priority: number;
  timestamp: number;
  resolve: (result: any) => void;
  reject: (error: any) => void;
}

export interface QueueConfig {
  maxConcurrent: number;
  maxQueue: number;
  timeout: number; // Request timeout
  priorityLevels: number;
}

export class RequestQueue {
  private queue: QueuedRequest[] = [];
  private processing = new Map<string, QueuedRequest>();
  private config: QueueConfig;
  private processingCount = 0;

  constructor(config: QueueConfig) {
    this.config = config;
  }

  async enqueue<T>(
    request: any,
    processor: (request: any) => Promise<T>,
    priority: number = 0
  ): Promise<T> {
    // Check queue capacity
    if (this.queue.length >= this.config.maxQueue) {
      throw new Error('Request queue is full');
    }

    return new Promise<T>((resolve, reject) => {
      const queuedRequest: QueuedRequest = {
        id: this.generateRequestId(),
        request,
        priority,
        timestamp: Date.now(),
        resolve,
        reject,
      };

      // Insert into queue and sort by priority
      this.queue.push(queuedRequest);
      this.queue.sort((a, b) => b.priority - a.priority);

      // Try to process request
      this.processNext(processor);
    });
  }

  private async processNext<T>(processor: (request: any) => Promise<T>): Promise<void> {
    // Check concurrency limit
    if (this.processingCount >= this.config.maxConcurrent) {
      return;
    }

    // Get next request from queue
    const queuedRequest = this.queue.shift();
    if (!queuedRequest) {
      return;
    }

    this.processingCount++;
    this.processing.set(queuedRequest.id, queuedRequest);

    try {
      // Process request
      const result = await processor(queuedRequest.request);

      // Successfully completed
      queuedRequest.resolve(result);

    } catch (error) {
      // Processing failed
      queuedRequest.reject(error);

    } finally {
      // Cleanup and continue processing next
      this.processing.delete(queuedRequest.id);
      this.processingCount--;

      // Recursively process next request
      setImmediate(() => this.processNext(processor));
    }
  }

  private generateRequestId(): string {
    return `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
  }
}

10.4 Memory Management and Optimization

10.4.1 Object Pool and Reuse

// src/memory/ObjectPool.ts
export interface PoolConfig {
  initialSize: number;
  maxSize: number;
  factory: () => any;
  reset?: (obj: any) => void;
  validate?: (obj: any) => boolean;
}

export class ObjectPool<T> {
  private available: T[] = [];
  private inUse = new Set<T>();
  private config: PoolConfig;
  private created = 0;

  constructor(config: PoolConfig) {
    this.config = config;

    // Pre-create objects
    for (let i = 0; i < config.initialSize; i++) {
      this.createObject();
    }
  }

  acquire(): T {
    let obj: T;

    if (this.available.length > 0) {
      obj = this.available.pop()!;
    } else if (this.created < this.config.maxSize) {
      obj = this.createObject();
    } else {
      throw new Error('Object pool exhausted');
    }

    // Validate object
    if (this.config.validate && !this.config.validate(obj)) {
      obj = this.config.factory();
      this.created++;
    }

    this.inUse.add(obj);
    return obj;
  }

  release(obj: T): void {
    if (!this.inUse.has(obj)) {
      return;
    }

    this.inUse.delete(obj);

    // Reset object state
    if (this.config.reset) {
      try {
        this.config.reset(obj);
      } catch (error) {
        this.created--;
        return;
      }
    }

    // Validate reset object
    if (this.config.validate && !this.config.validate(obj)) {
      this.created--;
      return;
    }

    this.available.push(obj);
  }

  clear(): void {
    this.available.length = 0;
    this.inUse.clear();
    this.created = 0;
  }

  private createObject(): T {
    const obj = this.config.factory();
    this.created++;
    this.available.push(obj);
    return obj;
  }
}

// Buffer pool for reusing Buffer objects
export class BufferPool {
  private pools = new Map<number, ObjectPool<Buffer>>();
  private commonSizes = [1024, 4096, 16384, 65536, 262144];

  constructor() {
    // Create object pools for common sizes
    for (const size of this.commonSizes) {
      this.createPoolForSize(size);
    }
  }

  acquire(size: number): Buffer {
    const poolSize = this.findBestPoolSize(size);
    const pool = this.pools.get(poolSize);

    if (pool) {
      try {
        const buffer = pool.acquire();
        return size < poolSize ? buffer.slice(0, size) : buffer;
      } catch (error) {
        // Pool exhausted, create new buffer
      }
    }

    return Buffer.alloc(size);
  }

  release(buffer: Buffer): void {
    const size = buffer.length;
    const poolSize = this.commonSizes.find(s => s >= size);

    if (poolSize) {
      const pool = this.pools.get(poolSize);
      if (pool && size === poolSize) {
        buffer.fill(0); // Clear data
        pool.release(buffer);
      }
    }
  }

  private findBestPoolSize(size: number): number {
    return this.commonSizes.find(s => s >= size) || size;
  }

  private createPoolForSize(size: number): void {
    const pool = new ObjectPool<Buffer>({
      initialSize: 10,
      maxSize: 50,
      factory: () => Buffer.alloc(size),
      reset: (buffer) => buffer.fill(0),
      validate: (buffer) => buffer.length === size,
    });

    this.pools.set(size, pool);
  }
}

10.4.2 Memory Monitoring and Leak Detection

// src/memory/MemoryMonitor.ts
export interface MemorySnapshot {
  timestamp: Date;
  heapUsed: number;
  heapTotal: number;
  external: number;
  rss: number;
}

export interface MemoryLeak {
  id: string;
  detectedAt: Date;
  type: 'heap-growth' | 'external-growth';
  severity: 'low' | 'medium' | 'high' | 'critical';
  details: Record<string, any>;
  snapshots: MemorySnapshot[];
}

export class MemoryMonitor {
  private snapshots: MemorySnapshot[] = [];
  private detectedLeaks: MemoryLeak[] = [];

  // Thresholds
  private thresholds = {
    heapGrowthRate: 10 * 1024 * 1024, // 10MB/minute
    externalGrowthRate: 5 * 1024 * 1024, // 5MB/minute
    maxHeapSize: 500 * 1024 * 1024, // 500MB
  };

  start(intervalMs: number = 60000): void {
    setInterval(() => {
      this.collectMemoryData();
    }, intervalMs);
  }

  private collectMemoryData(): void {
    const snapshot = this.takeSnapshot();
    this.snapshots.push(snapshot);

    // Limit snapshot count
    if (this.snapshots.length > 1000) {
      this.snapshots.shift();
    }

    // Check for memory leaks
    this.detectMemoryLeaks();
  }

  private takeSnapshot(): MemorySnapshot {
    const memUsage = process.memoryUsage();

    return {
      timestamp: new Date(),
      heapUsed: memUsage.heapUsed,
      heapTotal: memUsage.heapTotal,
      external: memUsage.external,
      rss: memUsage.rss,
    };
  }

  private detectMemoryLeaks(): void {
    if (this.snapshots.length < 10) {
      return; // Need enough data points
    }

    const recent = this.snapshots.slice(-10);
    const old = this.snapshots.slice(-20, -10);

    if (old.length < 10) return;

    // Check heap growth
    this.checkHeapGrowth(old, recent);
  }

  private checkHeapGrowth(oldSnapshots: MemorySnapshot[], recentSnapshots: MemorySnapshot[]): void {
    const oldAvg = this.calculateAverage(oldSnapshots, 'heapUsed');
    const recentAvg = this.calculateAverage(recentSnapshots, 'heapUsed');
    const growth = recentAvg - oldAvg;

    if (growth > this.thresholds.heapGrowthRate) {
      const leak: MemoryLeak = {
        id: this.generateLeakId(),
        detectedAt: new Date(),
        type: 'heap-growth',
        severity: this.calculateSeverity(growth, this.thresholds.heapGrowthRate),
        details: {
          growthBytes: growth,
          growthMB: Math.round(growth / 1024 / 1024),
        },
        snapshots: [...oldSnapshots, ...recentSnapshots],
      };

      this.detectedLeaks.push(leak);
      console.warn('Heap memory leak detected', {
        leakId: leak.id,
        growth: Math.round(growth / 1024 / 1024) + 'MB',
        severity: leak.severity,
      });
    }
  }

  private calculateAverage(snapshots: MemorySnapshot[], field: keyof MemorySnapshot): number {
    const values = snapshots.map(s => s[field] as number);
    return values.reduce((sum, val) => sum + val, 0) / values.length;
  }

  private calculateSeverity(value: number, threshold: number): 'low' | 'medium' | 'high' | 'critical' {
    const ratio = value / threshold;

    if (ratio >= 5) return 'critical';
    if (ratio >= 3) return 'high';
    if (ratio >= 2) return 'medium';
    return 'low';
  }

  private generateLeakId(): string {
    return `leak_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
  }
}

10.5 Scalable Architecture Design

10.5.1 Microservice Architecture Pattern

// src/architecture/MicroserviceServer.ts
export interface ServiceConfig {
  name: string;
  version: string;
  port?: number;
  dependencies?: string[];
}

export interface ServiceDiscovery {
  register(service: ServiceConfig, endpoint: string): Promise<void>;
  discover(serviceName: string): Promise<string[]>;
  unregister(serviceName: string, endpoint: string): Promise<void>;
}

export class InMemoryServiceDiscovery implements ServiceDiscovery {
  private services = new Map<string, Set<string>>();

  async register(service: ServiceConfig, endpoint: string): Promise<void> {
    if (!this.services.has(service.name)) {
      this.services.set(service.name, new Set());
    }

    this.services.get(service.name)!.add(endpoint);
    console.log('Service registered', { serviceName: service.name, endpoint });
  }

  async discover(serviceName: string): Promise<string[]> {
    const endpoints = this.services.get(serviceName);
    return endpoints ? Array.from(endpoints) : [];
  }

  async unregister(serviceName: string, endpoint: string): Promise<void> {
    const endpoints = this.services.get(serviceName);
    if (endpoints) {
      endpoints.delete(endpoint);
      if (endpoints.size === 0) {
        this.services.delete(serviceName);
      }
    }
  }
}

10.5.2 Load Balancing and High Availability

// src/architecture/LoadBalancer.ts
export interface LoadBalancingStrategy {
  name: string;
  selectEndpoint(endpoints: ServiceEndpoint[]): ServiceEndpoint | null;
}

export interface ServiceEndpoint {
  id: string;
  url: string;
  weight: number;
  healthy: boolean;
  responseTime: number;
}

// Round-robin strategy
export class RoundRobinStrategy implements LoadBalancingStrategy {
  name = 'round-robin';
  private currentIndex = 0;

  selectEndpoint(endpoints: ServiceEndpoint[]): ServiceEndpoint | null {
    const healthyEndpoints = endpoints.filter(ep => ep.healthy);
    if (healthyEndpoints.length === 0) return null;

    const selected = healthyEndpoints[this.currentIndex % healthyEndpoints.length];
    this.currentIndex = (this.currentIndex + 1) % healthyEndpoints.length;

    return selected;
  }
}

// Weighted round-robin strategy
export class WeightedRoundRobinStrategy implements LoadBalancingStrategy {
  name = 'weighted-round-robin';

  selectEndpoint(endpoints: ServiceEndpoint[]): ServiceEndpoint | null {
    const healthyEndpoints = endpoints.filter(ep => ep.healthy);
    if (healthyEndpoints.length === 0) return null;

    // Calculate total weight
    const totalWeight = healthyEndpoints.reduce((sum, ep) => sum + ep.weight, 0);
    if (totalWeight === 0) return healthyEndpoints[0];

    // Random weighted selection
    const random = Math.random() * totalWeight;
    let currentWeight = 0;

    for (const endpoint of healthyEndpoints) {
      currentWeight += endpoint.weight;
      if (random <= currentWeight) {
        return endpoint;
      }
    }

    return healthyEndpoints[0];
  }
}

export class LoadBalancer {
  private endpoints = new Map<string, ServiceEndpoint[]>();
  private strategies = new Map<string, LoadBalancingStrategy>();
  private defaultStrategy: string = 'round-robin';

  constructor() {
    // Register default strategies
    this.registerStrategy(new RoundRobinStrategy());
    this.registerStrategy(new WeightedRoundRobinStrategy());
  }

  registerStrategy(strategy: LoadBalancingStrategy): void {
    this.strategies.set(strategy.name, strategy);
  }

  addEndpoints(serviceName: string, endpoints: ServiceEndpoint[]): void {
    this.endpoints.set(serviceName, endpoints);
  }

  async callService(
    serviceName: string,
    requestFn: (endpoint: ServiceEndpoint) => Promise<any>,
    strategyName?: string
  ): Promise<any> {
    const endpoints = this.endpoints.get(serviceName);
    if (!endpoints || endpoints.length === 0) {
      throw new Error(`No endpoints available for service: ${serviceName}`);
    }

    const strategy = this.strategies.get(strategyName || this.defaultStrategy);
    if (!strategy) {
      throw new Error(`Unknown load balancing strategy: ${strategyName}`);
    }

    const endpoint = strategy.selectEndpoint(endpoints);
    if (!endpoint) {
      throw new Error(`No healthy endpoints available for service: ${serviceName}`);
    }

    return await requestFn(endpoint);
  }
}

Chapter Summary

Chapter 10 delves into MCP Server performance optimization and scalability design:

Core Knowledge Points

Performance Monitoring: Established complete performance metrics collection and analysis system
Cache System: Implemented multi-tier cache architecture and various caching strategies
Concurrency Handling: Built request queues, worker pools, and other concurrency mechanisms
Memory Management: Implemented object pools, memory monitoring, and leak detection
Microservice Architecture: Designed service discovery, load balancing, and distributed system components

Practical Points

Establish comprehensive performance monitoring and analysis system
Implement intelligent multi-tier caching system
Design reasonable concurrency handling and task scheduling mechanisms
Proactively manage and optimize memory
Adopt microservice architecture to support horizontal scaling
Implement load balancing and high availability guarantees

Through this chapter, we mastered the complete technology system for building high-performance, scalable MCP Servers, providing a solid technical foundation for handling large-scale, high-concurrency scenarios.

P Info

Project Cards Demo

Trading Chart Demo

Basic Skills

001. Genix Ventures Overview

P Info

Project Cards Demo

Trading Chart Demo

Basic Skills

001. Genix Ventures Overview

Chapter 10: Performance Optimization and Scalability

Chapter 10: Performance Optimization and Scalability

Learning Objectives

10.1 Performance Analysis and Monitoring

10.1.1 Performance Metrics System

10.2 Cache System

10.2.1 Multi-tier Cache Architecture

10.3 Concurrency Handling and Async Optimization

10.3.1 Request Queue and Rate Limiting

10.4 Memory Management and Optimization

10.4.1 Object Pool and Reuse

10.4.2 Memory Monitoring and Leak Detection

10.5 Scalable Architecture Design

10.5.1 Microservice Architecture Pattern

10.5.2 Load Balancing and High Availability

Chapter Summary

Core Knowledge Points

Practical Points