Chapter 10: Performance Optimization and Scalability
Haiyue
23min
Chapter 10: Performance Optimization and Scalability
Learning Objectives
- Analyze and optimize Server performance bottlenecks
- Implement caching and resource pool management
- Master concurrency handling and asynchronous programming
- Learn memory management and garbage collection optimization
- Design scalable architecture patterns
10.1 Performance Analysis and Monitoring
10.1.1 Performance Metrics System
// src/performance/MetricsCollector.ts
export interface PerformanceMetrics {
// Request processing performance
requestMetrics: {
totalRequests: number;
requestsPerSecond: number;
averageResponseTime: number;
p50ResponseTime: number;
p95ResponseTime: number;
p99ResponseTime: number;
errorRate: number;
};
// System resource usage
systemMetrics: {
cpuUsage: number;
memoryUsage: {
heapUsed: number;
heapTotal: number;
rss: number;
external: number;
};
eventLoopDelay: number;
eventLoopUtilization: number;
};
// Business metrics
businessMetrics: {
toolExecutions: number;
resourceAccesses: number;
promptGenerations: number;
cacheHitRate: number;
};
// Connection metrics
connectionMetrics: {
activeConnections: number;
totalConnections: number;
connectionRate: number;
avgConnectionDuration: number;
};
}
export class MetricsCollector {
private requestTimes: number[] = [];
private requestCount = 0;
private errorCount = 0;
recordRequest(duration: number, success: boolean): void {
this.requestCount++;
if (!success) this.errorCount++;
this.requestTimes.push(duration);
// Limit sample size
if (this.requestTimes.length > 10000) {
this.requestTimes.shift();
}
}
getMetrics(): PerformanceMetrics {
const sortedTimes = [...this.requestTimes].sort((a, b) => a - b);
const p50 = this.getPercentile(sortedTimes, 0.5);
const p95 = this.getPercentile(sortedTimes, 0.95);
const p99 = this.getPercentile(sortedTimes, 0.99);
const avgResponseTime = sortedTimes.length > 0 ?
sortedTimes.reduce((sum, time) => sum + time, 0) / sortedTimes.length : 0;
const memUsage = process.memoryUsage();
return {
requestMetrics: {
totalRequests: this.requestCount,
requestsPerSecond: this.requestCount / (Date.now() / 1000),
averageResponseTime: avgResponseTime,
p50ResponseTime: p50,
p95ResponseTime: p95,
p99ResponseTime: p99,
errorRate: this.requestCount > 0 ? this.errorCount / this.requestCount : 0,
},
systemMetrics: {
cpuUsage: 0,
memoryUsage: {
heapUsed: memUsage.heapUsed,
heapTotal: memUsage.heapTotal,
rss: memUsage.rss,
external: memUsage.external,
},
eventLoopDelay: 0,
eventLoopUtilization: 0,
},
businessMetrics: {
toolExecutions: 0,
resourceAccesses: 0,
promptGenerations: 0,
cacheHitRate: 0,
},
connectionMetrics: {
activeConnections: 0,
totalConnections: 0,
connectionRate: 0,
avgConnectionDuration: 0,
},
};
}
private getPercentile(sortedArray: number[], percentile: number): number {
if (sortedArray.length === 0) return 0;
const index = Math.ceil(sortedArray.length * percentile) - 1;
return sortedArray[Math.max(0, index)];
}
}
10.2 Cache System
10.2.1 Multi-tier Cache Architecture
// src/cache/CacheManager.ts
export interface CacheEntry<T> {
value: T;
timestamp: number;
ttl: number;
accessCount: number;
lastAccess: number;
size?: number;
}
export interface CacheStrategy {
name: string;
shouldEvict(entry: CacheEntry<any>, now: number): boolean;
getPriority(entry: CacheEntry<any>, now: number): number;
}
export interface CacheConfig {
maxSize: number; // Maximum entries
maxMemory?: number; // Maximum memory usage (bytes)
defaultTTL: number; // Default TTL (milliseconds)
strategy: CacheStrategy;
enableStatistics: boolean;
}
// LRU cache strategy
export class LRUCacheStrategy implements CacheStrategy {
name = 'LRU';
shouldEvict(entry: CacheEntry<any>, now: number): boolean {
return (now - entry.timestamp) > entry.ttl;
}
getPriority(entry: CacheEntry<any>, now: number): number {
return now - entry.lastAccess; // Longer since last access = higher priority for eviction
}
}
// LFU cache strategy
export class LFUCacheStrategy implements CacheStrategy {
name = 'LFU';
shouldEvict(entry: CacheEntry<any>, now: number): boolean {
return (now - entry.timestamp) > entry.ttl;
}
getPriority(entry: CacheEntry<any>, now: number): number {
return -entry.accessCount; // Lower access count = higher priority for eviction
}
}
export class CacheManager<T = any> {
private cache = new Map<string, CacheEntry<T>>();
private config: CacheConfig;
constructor(config: CacheConfig) {
this.config = config;
}
async get(key: string): Promise<T | null> {
const entry = this.cache.get(key);
if (!entry) {
return null;
}
const now = Date.now();
// Check if expired
if (this.config.strategy.shouldEvict(entry, now)) {
this.cache.delete(key);
return null;
}
// Update access information
entry.accessCount++;
entry.lastAccess = now;
return entry.value;
}
async set(key: string, value: T, ttl?: number): Promise<void> {
const now = Date.now();
const entryTTL = ttl || this.config.defaultTTL;
const size = this.estimateSize(value);
const entry: CacheEntry<T> = {
value,
timestamp: now,
ttl: entryTTL,
accessCount: 1,
lastAccess: now,
size,
};
// Check if need to free space
await this.ensureCapacity(size);
this.cache.set(key, entry);
}
private async ensureCapacity(newEntrySize: number): Promise<void> {
// Check entry count limit
if (this.cache.size >= this.config.maxSize) {
await this.evictEntries(1);
}
// Check memory limit
if (this.config.maxMemory) {
const currentMemory = this.calculateMemoryUsage();
while (currentMemory + newEntrySize > this.config.maxMemory) {
const evicted = await this.evictEntries(1);
if (evicted === 0) break; // Cannot free more space
}
}
}
private async evictEntries(count: number): Promise<number> {
if (this.cache.size === 0) return 0;
const now = Date.now();
const entries = Array.from(this.cache.entries());
// Sort by strategy priority
entries.sort(([, a], [, b]) => {
return this.config.strategy.getPriority(b, now) - this.config.strategy.getPriority(a, now);
});
let evicted = 0;
for (let i = 0; i < Math.min(count, entries.length); i++) {
const [key] = entries[i];
this.cache.delete(key);
evicted++;
}
return evicted;
}
private calculateMemoryUsage(): number {
let total = 0;
for (const entry of this.cache.values()) {
total += entry.size || 0;
}
return total;
}
private estimateSize(value: T): number {
try {
return JSON.stringify(value).length * 2; // UTF-16 encoding estimate
} catch {
return 1024; // Default size
}
}
}
10.3 Concurrency Handling and Async Optimization
10.3.1 Request Queue and Rate Limiting
// src/concurrency/RequestQueue.ts
export interface QueuedRequest {
id: string;
request: any;
priority: number;
timestamp: number;
resolve: (result: any) => void;
reject: (error: any) => void;
}
export interface QueueConfig {
maxConcurrent: number;
maxQueue: number;
timeout: number; // Request timeout
priorityLevels: number;
}
export class RequestQueue {
private queue: QueuedRequest[] = [];
private processing = new Map<string, QueuedRequest>();
private config: QueueConfig;
private processingCount = 0;
constructor(config: QueueConfig) {
this.config = config;
}
async enqueue<T>(
request: any,
processor: (request: any) => Promise<T>,
priority: number = 0
): Promise<T> {
// Check queue capacity
if (this.queue.length >= this.config.maxQueue) {
throw new Error('Request queue is full');
}
return new Promise<T>((resolve, reject) => {
const queuedRequest: QueuedRequest = {
id: this.generateRequestId(),
request,
priority,
timestamp: Date.now(),
resolve,
reject,
};
// Insert into queue and sort by priority
this.queue.push(queuedRequest);
this.queue.sort((a, b) => b.priority - a.priority);
// Try to process request
this.processNext(processor);
});
}
private async processNext<T>(processor: (request: any) => Promise<T>): Promise<void> {
// Check concurrency limit
if (this.processingCount >= this.config.maxConcurrent) {
return;
}
// Get next request from queue
const queuedRequest = this.queue.shift();
if (!queuedRequest) {
return;
}
this.processingCount++;
this.processing.set(queuedRequest.id, queuedRequest);
try {
// Process request
const result = await processor(queuedRequest.request);
// Successfully completed
queuedRequest.resolve(result);
} catch (error) {
// Processing failed
queuedRequest.reject(error);
} finally {
// Cleanup and continue processing next
this.processing.delete(queuedRequest.id);
this.processingCount--;
// Recursively process next request
setImmediate(() => this.processNext(processor));
}
}
private generateRequestId(): string {
return `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
}
10.4 Memory Management and Optimization
10.4.1 Object Pool and Reuse
// src/memory/ObjectPool.ts
export interface PoolConfig {
initialSize: number;
maxSize: number;
factory: () => any;
reset?: (obj: any) => void;
validate?: (obj: any) => boolean;
}
export class ObjectPool<T> {
private available: T[] = [];
private inUse = new Set<T>();
private config: PoolConfig;
private created = 0;
constructor(config: PoolConfig) {
this.config = config;
// Pre-create objects
for (let i = 0; i < config.initialSize; i++) {
this.createObject();
}
}
acquire(): T {
let obj: T;
if (this.available.length > 0) {
obj = this.available.pop()!;
} else if (this.created < this.config.maxSize) {
obj = this.createObject();
} else {
throw new Error('Object pool exhausted');
}
// Validate object
if (this.config.validate && !this.config.validate(obj)) {
obj = this.config.factory();
this.created++;
}
this.inUse.add(obj);
return obj;
}
release(obj: T): void {
if (!this.inUse.has(obj)) {
return;
}
this.inUse.delete(obj);
// Reset object state
if (this.config.reset) {
try {
this.config.reset(obj);
} catch (error) {
this.created--;
return;
}
}
// Validate reset object
if (this.config.validate && !this.config.validate(obj)) {
this.created--;
return;
}
this.available.push(obj);
}
clear(): void {
this.available.length = 0;
this.inUse.clear();
this.created = 0;
}
private createObject(): T {
const obj = this.config.factory();
this.created++;
this.available.push(obj);
return obj;
}
}
// Buffer pool for reusing Buffer objects
export class BufferPool {
private pools = new Map<number, ObjectPool<Buffer>>();
private commonSizes = [1024, 4096, 16384, 65536, 262144];
constructor() {
// Create object pools for common sizes
for (const size of this.commonSizes) {
this.createPoolForSize(size);
}
}
acquire(size: number): Buffer {
const poolSize = this.findBestPoolSize(size);
const pool = this.pools.get(poolSize);
if (pool) {
try {
const buffer = pool.acquire();
return size < poolSize ? buffer.slice(0, size) : buffer;
} catch (error) {
// Pool exhausted, create new buffer
}
}
return Buffer.alloc(size);
}
release(buffer: Buffer): void {
const size = buffer.length;
const poolSize = this.commonSizes.find(s => s >= size);
if (poolSize) {
const pool = this.pools.get(poolSize);
if (pool && size === poolSize) {
buffer.fill(0); // Clear data
pool.release(buffer);
}
}
}
private findBestPoolSize(size: number): number {
return this.commonSizes.find(s => s >= size) || size;
}
private createPoolForSize(size: number): void {
const pool = new ObjectPool<Buffer>({
initialSize: 10,
maxSize: 50,
factory: () => Buffer.alloc(size),
reset: (buffer) => buffer.fill(0),
validate: (buffer) => buffer.length === size,
});
this.pools.set(size, pool);
}
}
10.4.2 Memory Monitoring and Leak Detection
// src/memory/MemoryMonitor.ts
export interface MemorySnapshot {
timestamp: Date;
heapUsed: number;
heapTotal: number;
external: number;
rss: number;
}
export interface MemoryLeak {
id: string;
detectedAt: Date;
type: 'heap-growth' | 'external-growth';
severity: 'low' | 'medium' | 'high' | 'critical';
details: Record<string, any>;
snapshots: MemorySnapshot[];
}
export class MemoryMonitor {
private snapshots: MemorySnapshot[] = [];
private detectedLeaks: MemoryLeak[] = [];
// Thresholds
private thresholds = {
heapGrowthRate: 10 * 1024 * 1024, // 10MB/minute
externalGrowthRate: 5 * 1024 * 1024, // 5MB/minute
maxHeapSize: 500 * 1024 * 1024, // 500MB
};
start(intervalMs: number = 60000): void {
setInterval(() => {
this.collectMemoryData();
}, intervalMs);
}
private collectMemoryData(): void {
const snapshot = this.takeSnapshot();
this.snapshots.push(snapshot);
// Limit snapshot count
if (this.snapshots.length > 1000) {
this.snapshots.shift();
}
// Check for memory leaks
this.detectMemoryLeaks();
}
private takeSnapshot(): MemorySnapshot {
const memUsage = process.memoryUsage();
return {
timestamp: new Date(),
heapUsed: memUsage.heapUsed,
heapTotal: memUsage.heapTotal,
external: memUsage.external,
rss: memUsage.rss,
};
}
private detectMemoryLeaks(): void {
if (this.snapshots.length < 10) {
return; // Need enough data points
}
const recent = this.snapshots.slice(-10);
const old = this.snapshots.slice(-20, -10);
if (old.length < 10) return;
// Check heap growth
this.checkHeapGrowth(old, recent);
}
private checkHeapGrowth(oldSnapshots: MemorySnapshot[], recentSnapshots: MemorySnapshot[]): void {
const oldAvg = this.calculateAverage(oldSnapshots, 'heapUsed');
const recentAvg = this.calculateAverage(recentSnapshots, 'heapUsed');
const growth = recentAvg - oldAvg;
if (growth > this.thresholds.heapGrowthRate) {
const leak: MemoryLeak = {
id: this.generateLeakId(),
detectedAt: new Date(),
type: 'heap-growth',
severity: this.calculateSeverity(growth, this.thresholds.heapGrowthRate),
details: {
growthBytes: growth,
growthMB: Math.round(growth / 1024 / 1024),
},
snapshots: [...oldSnapshots, ...recentSnapshots],
};
this.detectedLeaks.push(leak);
console.warn('Heap memory leak detected', {
leakId: leak.id,
growth: Math.round(growth / 1024 / 1024) + 'MB',
severity: leak.severity,
});
}
}
private calculateAverage(snapshots: MemorySnapshot[], field: keyof MemorySnapshot): number {
const values = snapshots.map(s => s[field] as number);
return values.reduce((sum, val) => sum + val, 0) / values.length;
}
private calculateSeverity(value: number, threshold: number): 'low' | 'medium' | 'high' | 'critical' {
const ratio = value / threshold;
if (ratio >= 5) return 'critical';
if (ratio >= 3) return 'high';
if (ratio >= 2) return 'medium';
return 'low';
}
private generateLeakId(): string {
return `leak_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
}
10.5 Scalable Architecture Design
10.5.1 Microservice Architecture Pattern
// src/architecture/MicroserviceServer.ts
export interface ServiceConfig {
name: string;
version: string;
port?: number;
dependencies?: string[];
}
export interface ServiceDiscovery {
register(service: ServiceConfig, endpoint: string): Promise<void>;
discover(serviceName: string): Promise<string[]>;
unregister(serviceName: string, endpoint: string): Promise<void>;
}
export class InMemoryServiceDiscovery implements ServiceDiscovery {
private services = new Map<string, Set<string>>();
async register(service: ServiceConfig, endpoint: string): Promise<void> {
if (!this.services.has(service.name)) {
this.services.set(service.name, new Set());
}
this.services.get(service.name)!.add(endpoint);
console.log('Service registered', { serviceName: service.name, endpoint });
}
async discover(serviceName: string): Promise<string[]> {
const endpoints = this.services.get(serviceName);
return endpoints ? Array.from(endpoints) : [];
}
async unregister(serviceName: string, endpoint: string): Promise<void> {
const endpoints = this.services.get(serviceName);
if (endpoints) {
endpoints.delete(endpoint);
if (endpoints.size === 0) {
this.services.delete(serviceName);
}
}
}
}
10.5.2 Load Balancing and High Availability
// src/architecture/LoadBalancer.ts
export interface LoadBalancingStrategy {
name: string;
selectEndpoint(endpoints: ServiceEndpoint[]): ServiceEndpoint | null;
}
export interface ServiceEndpoint {
id: string;
url: string;
weight: number;
healthy: boolean;
responseTime: number;
}
// Round-robin strategy
export class RoundRobinStrategy implements LoadBalancingStrategy {
name = 'round-robin';
private currentIndex = 0;
selectEndpoint(endpoints: ServiceEndpoint[]): ServiceEndpoint | null {
const healthyEndpoints = endpoints.filter(ep => ep.healthy);
if (healthyEndpoints.length === 0) return null;
const selected = healthyEndpoints[this.currentIndex % healthyEndpoints.length];
this.currentIndex = (this.currentIndex + 1) % healthyEndpoints.length;
return selected;
}
}
// Weighted round-robin strategy
export class WeightedRoundRobinStrategy implements LoadBalancingStrategy {
name = 'weighted-round-robin';
selectEndpoint(endpoints: ServiceEndpoint[]): ServiceEndpoint | null {
const healthyEndpoints = endpoints.filter(ep => ep.healthy);
if (healthyEndpoints.length === 0) return null;
// Calculate total weight
const totalWeight = healthyEndpoints.reduce((sum, ep) => sum + ep.weight, 0);
if (totalWeight === 0) return healthyEndpoints[0];
// Random weighted selection
const random = Math.random() * totalWeight;
let currentWeight = 0;
for (const endpoint of healthyEndpoints) {
currentWeight += endpoint.weight;
if (random <= currentWeight) {
return endpoint;
}
}
return healthyEndpoints[0];
}
}
export class LoadBalancer {
private endpoints = new Map<string, ServiceEndpoint[]>();
private strategies = new Map<string, LoadBalancingStrategy>();
private defaultStrategy: string = 'round-robin';
constructor() {
// Register default strategies
this.registerStrategy(new RoundRobinStrategy());
this.registerStrategy(new WeightedRoundRobinStrategy());
}
registerStrategy(strategy: LoadBalancingStrategy): void {
this.strategies.set(strategy.name, strategy);
}
addEndpoints(serviceName: string, endpoints: ServiceEndpoint[]): void {
this.endpoints.set(serviceName, endpoints);
}
async callService(
serviceName: string,
requestFn: (endpoint: ServiceEndpoint) => Promise<any>,
strategyName?: string
): Promise<any> {
const endpoints = this.endpoints.get(serviceName);
if (!endpoints || endpoints.length === 0) {
throw new Error(`No endpoints available for service: ${serviceName}`);
}
const strategy = this.strategies.get(strategyName || this.defaultStrategy);
if (!strategy) {
throw new Error(`Unknown load balancing strategy: ${strategyName}`);
}
const endpoint = strategy.selectEndpoint(endpoints);
if (!endpoint) {
throw new Error(`No healthy endpoints available for service: ${serviceName}`);
}
return await requestFn(endpoint);
}
}
Chapter Summary
Chapter 10 delves into MCP Server performance optimization and scalability design:
Core Knowledge Points
- Performance Monitoring: Established complete performance metrics collection and analysis system
- Cache System: Implemented multi-tier cache architecture and various caching strategies
- Concurrency Handling: Built request queues, worker pools, and other concurrency mechanisms
- Memory Management: Implemented object pools, memory monitoring, and leak detection
- Microservice Architecture: Designed service discovery, load balancing, and distributed system components
Practical Points
- Establish comprehensive performance monitoring and analysis system
- Implement intelligent multi-tier caching system
- Design reasonable concurrency handling and task scheduling mechanisms
- Proactively manage and optimize memory
- Adopt microservice architecture to support horizontal scaling
- Implement load balancing and high availability guarantees
Through this chapter, we mastered the complete technology system for building high-performance, scalable MCP Servers, providing a solid technical foundation for handling large-scale, high-concurrency scenarios.