Chapter 12: Model Evaluation and Risk Management
Haiyue
19min
Chapter 12: Model Evaluation and Risk Management
Learning Objectives
- Master diagnostic methods for Markov models
- Perform model robustness testing
- Implement dynamic risk measurement
- Build model risk management framework
Knowledge Summary
1. Model Validation Framework
🔄 正在渲染 Mermaid 图表...
2. Key Evaluation Metrics
Statistical Metrics:
- Log-likelihood:
- AIC:
- BIC:
Prediction Accuracy:
- RMSE:
- MAE:
Risk Metrics:
- VaR:
- ES:
Example Code
Example 1: Model Diagnostics and Validation
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
class ModelValidator:
"""Markov model validator"""
def __init__(self, model):
self.model = model
self.validation_results = {}
def goodness_of_fit_test(self, observed_transitions, expected_transitions):
"""Goodness of fit test"""
# Chi-square test
chi2_stat = np.sum((observed_transitions - expected_transitions)**2 / expected_transitions)
dof = len(observed_transitions.flatten()) - 1
p_value = 1 - stats.chi2.cdf(chi2_stat, dof)
return {
'chi2_statistic': chi2_stat,
'p_value': p_value,
'degrees_of_freedom': dof
}
def ljung_box_test(self, residuals, lags=10):
"""Ljung-Box autocorrelation test"""
from statsmodels.stats.diagnostic import acorr_ljungbox
result = acorr_ljungbox(residuals, lags=lags, return_df=True)
return result
def arch_test(self, residuals, lags=5):
"""ARCH effect test"""
squared_residuals = residuals**2
from statsmodels.tsa.stattools import acf
autocorrs = acf(squared_residuals, nlags=lags, fft=True)[1:]
# Simplified ARCH test statistic
n = len(residuals)
lm_stat = n * np.sum(autocorrs**2)
p_value = 1 - stats.chi2.cdf(lm_stat, lags)
return {
'lm_statistic': lm_stat,
'p_value': p_value,
'autocorrelations': autocorrs
}
# Example: Model validation
np.random.seed(42)
# Generate test data
def generate_test_data(n_samples=1000):
# True 2-state Markov chain
true_P = np.array([[0.8, 0.2], [0.3, 0.7]])
states = [0]
for _ in range(n_samples-1):
current_state = states[-1]
next_state = np.random.choice([0, 1], p=true_P[current_state])
states.append(next_state)
return np.array(states), true_P
states, true_P = generate_test_data(2000)
# Estimate transition matrix
def estimate_transition_matrix(states):
n_states = len(np.unique(states))
transition_counts = np.zeros((n_states, n_states))
for t in range(len(states)-1):
transition_counts[states[t], states[t+1]] += 1
row_sums = transition_counts.sum(axis=1, keepdims=True)
estimated_P = transition_counts / row_sums
return estimated_P, transition_counts
estimated_P, transition_counts = estimate_transition_matrix(states[:-500])
test_states = states[-500:]
print("Model Validation Analysis:")
print(f"True transition matrix:\n{true_P}")
print(f"Estimated transition matrix:\n{estimated_P}")
print(f"Estimation error:\n{np.abs(true_P - estimated_P)}")
# Create validator
validator = ModelValidator(None)
# Goodness of fit test
expected_counts = np.zeros_like(transition_counts)
for t in range(len(states[:-500])-1):
for i in range(2):
for j in range(2):
expected_counts[i,j] += (states[t] == i) * estimated_P[i,j]
gof_result = validator.goodness_of_fit_test(
transition_counts.flatten(),
expected_counts.flatten()
)
print(f"\nGoodness of Fit Test:")
print(f"Chi-square statistic: {gof_result['chi2_statistic']:.4f}")
print(f"p-value: {gof_result['p_value']:.4f}")
Example 2: Out-of-Sample Performance Evaluation
class OutOfSampleTester:
"""Out-of-sample testing"""
def rolling_window_validation(self, data, model_func, window_size=500, step_size=50):
"""Rolling window validation"""
results = []
for start in range(0, len(data) - window_size - step_size, step_size):
# Training window
train_data = data[start:start + window_size]
# Testing window
test_data = data[start + window_size:start + window_size + step_size]
# Train model
model = model_func(train_data)
# Predict
predictions = self.predict_sequence(model, test_data)
# Evaluate
accuracy = np.mean(predictions == test_data[1:])
log_likelihood = self.calculate_log_likelihood(model, test_data)
results.append({
'start_idx': start,
'accuracy': accuracy,
'log_likelihood': log_likelihood,
'n_test': len(test_data) - 1
})
return pd.DataFrame(results)
def predict_sequence(self, transition_matrix, test_sequence):
"""Predict state sequence"""
predictions = []
for t in range(len(test_sequence) - 1):
current_state = test_sequence[t]
# Predict next state (choose most probable state)
next_state_probs = transition_matrix[current_state]
predicted_state = np.argmax(next_state_probs)
predictions.append(predicted_state)
return np.array(predictions)
def calculate_log_likelihood(self, transition_matrix, sequence):
"""Calculate log-likelihood"""
log_likelihood = 0
for t in range(len(sequence) - 1):
current_state = sequence[t]
next_state = sequence[t + 1]
prob = transition_matrix[current_state, next_state]
if prob > 0:
log_likelihood += np.log(prob)
else:
log_likelihood += -np.inf
return log_likelihood
# Out-of-sample testing
def simple_model_func(data):
estimated_P, _ = estimate_transition_matrix(data)
return estimated_P
tester = OutOfSampleTester()
oos_results = tester.rolling_window_validation(
states, simple_model_func, window_size=300, step_size=50
)
print(f"\nOut-of-Sample Test Results:")
print(f"Average accuracy: {oos_results['accuracy'].mean():.3f}")
print(f"Accuracy standard deviation: {oos_results['accuracy'].std():.3f}")
print(f"Average log-likelihood: {oos_results['log_likelihood'].mean():.2f}")
# Visualize out-of-sample performance
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
# Accuracy over time
axes[0, 0].plot(oos_results['start_idx'], oos_results['accuracy'], 'b-o', markersize=4)
axes[0, 0].set_title('Out-of-Sample Prediction Accuracy')
axes[0, 0].set_xlabel('Starting Position')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].grid(True, alpha=0.3)
# Log-likelihood over time
axes[0, 1].plot(oos_results['start_idx'], oos_results['log_likelihood'], 'r-o', markersize=4)
axes[0, 1].set_title('Out-of-Sample Log-Likelihood')
axes[0, 1].set_xlabel('Starting Position')
axes[0, 1].set_ylabel('Log-Likelihood')
axes[0, 1].grid(True, alpha=0.3)
# Accuracy distribution
axes[1, 0].hist(oos_results['accuracy'], bins=15, alpha=0.7, edgecolor='black')
axes[1, 0].axvline(oos_results['accuracy'].mean(), color='red', linestyle='--',
label=f'Mean: {oos_results["accuracy"].mean():.3f}')
axes[1, 0].set_title('Accuracy Distribution')
axes[1, 0].set_xlabel('Accuracy')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)
# Performance stability
rolling_mean = oos_results['accuracy'].rolling(window=5, center=True).mean()
rolling_std = oos_results['accuracy'].rolling(window=5, center=True).std()
axes[1, 1].plot(oos_results['start_idx'], oos_results['accuracy'], 'b-', alpha=0.3, label='Raw')
axes[1, 1].plot(oos_results['start_idx'], rolling_mean, 'r-', linewidth=2, label='5-period Moving Average')
axes[1, 1].fill_between(oos_results['start_idx'],
rolling_mean - rolling_std,
rolling_mean + rolling_std,
alpha=0.2, color='red', label='±1 Std Dev')
axes[1, 1].set_title('Prediction Performance Stability')
axes[1, 1].set_xlabel('Starting Position')
axes[1, 1].set_ylabel('Accuracy')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
Example 3: Risk Management Framework
class RiskManager:
"""Risk management framework"""
def __init__(self, confidence_level=0.95):
self.confidence_level = confidence_level
self.risk_metrics = {}
def calculate_var(self, returns, method='historical'):
"""Calculate VaR"""
if method == 'historical':
var = np.percentile(returns, (1 - self.confidence_level) * 100)
elif method == 'parametric':
mean_return = np.mean(returns)
std_return = np.std(returns)
var = mean_return - stats.norm.ppf(self.confidence_level) * std_return
else:
raise ValueError("Method must be 'historical' or 'parametric'")
return var
def calculate_expected_shortfall(self, returns):
"""Calculate expected shortfall"""
var = self.calculate_var(returns, method='historical')
tail_losses = returns[returns <= var]
if len(tail_losses) > 0:
es = np.mean(tail_losses)
else:
es = var
return es
def stress_testing(self, model, base_scenario, stress_scenarios):
"""Stress testing"""
results = {}
# Base scenario
base_returns = self.simulate_returns(model, base_scenario)
results['base'] = {
'var': self.calculate_var(base_returns),
'es': self.calculate_expected_shortfall(base_returns),
'returns': base_returns
}
# Stress scenarios
for scenario_name, scenario_params in stress_scenarios.items():
stress_returns = self.simulate_returns(model, scenario_params)
results[scenario_name] = {
'var': self.calculate_var(stress_returns),
'es': self.calculate_expected_shortfall(stress_returns),
'returns': stress_returns
}
return results
def simulate_returns(self, transition_matrix, scenario_params):
"""Simulate returns"""
n_simulations = scenario_params.get('n_simulations', 1000)
n_periods = scenario_params.get('n_periods', 252)
state_returns = scenario_params.get('state_returns', [-0.02, 0.015])
initial_state = scenario_params.get('initial_state', 0)
all_returns = []
for _ in range(n_simulations):
returns = []
current_state = initial_state
for _ in range(n_periods):
# Generate returns
state_return = state_returns[current_state]
noise = np.random.normal(0, 0.01) # Add noise
period_return = state_return + noise
returns.append(period_return)
# Transition to next state
next_state = np.random.choice(len(state_returns),
p=transition_matrix[current_state])
current_state = next_state
all_returns.extend(returns)
return np.array(all_returns)
def model_risk_assessment(self, models, test_data):
"""Model risk assessment"""
model_results = {}
for model_name, model in models.items():
# Predict
predictions = self.predict_with_model(model, test_data)
# Calculate accuracy
accuracy = np.mean(predictions == test_data[1:])
# Calculate log-likelihood
log_likelihood = 0
for t in range(len(test_data) - 1):
current_state = test_data[t]
next_state = test_data[t + 1]
prob = model[current_state, next_state]
if prob > 0:
log_likelihood += np.log(prob)
model_results[model_name] = {
'accuracy': accuracy,
'log_likelihood': log_likelihood,
'predictions': predictions
}
return model_results
def predict_with_model(self, transition_matrix, sequence):
"""Predict using model"""
predictions = []
for t in range(len(sequence) - 1):
current_state = sequence[t]
next_state_probs = transition_matrix[current_state]
predicted_state = np.argmax(next_state_probs)
predictions.append(predicted_state)
return np.array(predictions)
# Risk management analysis
risk_manager = RiskManager(confidence_level=0.95)
# Define scenarios
base_scenario = {
'n_simulations': 1000,
'n_periods': 252,
'state_returns': [-0.01, 0.008], # Normal market
'initial_state': 1
}
stress_scenarios = {
'market_crash': {
'n_simulations': 1000,
'n_periods': 252,
'state_returns': [-0.05, -0.02], # Market crash
'initial_state': 0
},
'high_volatility': {
'n_simulations': 1000,
'n_periods': 252,
'state_returns': [-0.03, 0.025], # High volatility
'initial_state': 0
}
}
# Execute stress testing
stress_results = risk_manager.stress_testing(estimated_P, base_scenario, stress_scenarios)
print(f"\nRisk Management Analysis:")
print("=" * 50)
for scenario, results in stress_results.items():
print(f"\n{scenario.upper()} Scenario:")
print(f" 95% VaR: {results['var']:.4f}")
print(f" Expected Shortfall: {results['es']:.4f}")
print(f" Return mean: {np.mean(results['returns']):.4f}")
print(f" Return std dev: {np.std(results['returns']):.4f}")
# Model risk assessment
models = {
'estimated_model': estimated_P,
'true_model': true_P,
'naive_model': np.array([[0.5, 0.5], [0.5, 0.5]]) # Naive model
}
model_risk_results = risk_manager.model_risk_assessment(models, test_states)
print(f"\nModel Risk Assessment:")
for model_name, results in model_risk_results.items():
print(f"{model_name}:")
print(f" Accuracy: {results['accuracy']:.3f}")
print(f" Log-likelihood: {results['log_likelihood']:.2f}")
# Visualize risk analysis
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
# VaR comparison
scenarios = list(stress_results.keys())
vars = [stress_results[s]['var'] for s in scenarios]
colors = ['blue', 'red', 'orange']
axes[0, 0].bar(scenarios, vars, color=colors, alpha=0.7)
axes[0, 0].set_title('VaR Under Different Scenarios')
axes[0, 0].set_ylabel('VaR')
axes[0, 0].tick_params(axis='x', rotation=45)
axes[0, 0].grid(True, alpha=0.3, axis='y')
# Return distribution comparison
for i, (scenario, color) in enumerate(zip(scenarios, colors)):
returns = stress_results[scenario]['returns']
axes[0, 1].hist(returns, bins=50, alpha=0.6, density=True,
color=color, label=scenario)
axes[0, 1].set_title('Return Distribution Under Different Scenarios')
axes[0, 1].set_xlabel('Return')
axes[0, 1].set_ylabel('Density')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)
# Model performance comparison
model_names = list(model_risk_results.keys())
accuracies = [model_risk_results[m]['accuracy'] for m in model_names]
axes[1, 0].bar(model_names, accuracies, color='green', alpha=0.7)
axes[1, 0].set_title('Model Prediction Accuracy Comparison')
axes[1, 0].set_ylabel('Accuracy')
axes[1, 0].tick_params(axis='x', rotation=45)
axes[1, 0].grid(True, alpha=0.3, axis='y')
# Risk metric trends
periods = range(1, 11)
base_returns = stress_results['base']['returns']
rolling_vars = []
for period in periods:
period_returns = base_returns[:period*100] # Take data of different lengths
var = risk_manager.calculate_var(period_returns)
rolling_vars.append(var)
axes[1, 1].plot(periods, rolling_vars, 'b-o', linewidth=2, markersize=6)
axes[1, 1].set_title('VaR vs Sample Size')
axes[1, 1].set_xlabel('Sample Period (×100)')
axes[1, 1].set_ylabel('VaR')
axes[1, 1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print(f"\nRisk Management Recommendations:")
print(f"1. Establish multi-scenario stress testing framework")
print(f"2. Regularly assess model prediction accuracy")
print(f"3. Monitor time-varying characteristics of model risk")
print(f"4. Establish early warning mechanism for model failure")
Mathematical Formula Summary
-
Model Selection Criteria:
-
Prediction Evaluation Metrics:
-
Risk Measures:
-
Model Confidence Intervals:
Key Points in Risk Management
- Model validation is an ongoing process requiring regular updates
- Out-of-sample testing is more important than in-sample fitting
- Stress testing should include extreme but plausible scenarios
- Model risk needs to be quantified and managed
- Regulatory requirements are constantly evolving, need to stay informed