Chapter 7: Stock Price Modeling Practice
Haiyue
10min
Chapter 7: Stock Price Modeling Practice
Learning Objectives
- Use Markov chains to model stock price movements
- Implement state-based stock return models
- Write Python code for parameter estimation
- Perform model validation and backtesting analysis
Knowledge Summary
1. Markov Modeling Framework for Stock Prices
Discretizing Return States: Discretize continuous return distributions into finite states:
State Definition Criteria:
- Based on quantiles: e.g., 20%, 40%, 60%, 80% quantiles
- Based on volatility: ±0.5σ, ±1σ, ±2σ
- Based on technical analysis: breakouts, support, resistance levels
2. Multi-State Markov Model
State Transition Matrix :
Stationary Distribution:
Expected Return:
3. State-Dependent Risk Model
Conditional Variance:
Conditional VaR:
Example Code
Example 1: Building Stock Price Markov Chain
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from scipy import stats
from sklearn.preprocessing import KBinsDiscretizer
import seaborn as sns
class StockMarkovModel:
"""
Stock Price Markov Chain Model
"""
def __init__(self, n_states=5):
self.n_states = n_states
self.state_labels = None
self.transition_matrix = None
self.state_boundaries = None
self.discretizer = None
def fit(self, returns, method='quantile'):
"""
Fit Markov chain model
Parameters:
returns: Return sequence
method: State partitioning method ('quantile', 'kmeans', 'fixed')
"""
returns = np.array(returns)
# State discretization
if method == 'quantile':
self.discretizer = KBinsDiscretizer(
n_bins=self.n_states,
encode='ordinal',
strategy='quantile',
subsample=None
)
elif method == 'kmeans':
self.discretizer = KBinsDiscretizer(
n_bins=self.n_states,
encode='ordinal',
strategy='kmeans'
)
elif method == 'fixed':
# Fixed partitioning based on standard deviation
std_returns = np.std(returns)
boundaries = [-2*std_returns, -0.5*std_returns, 0.5*std_returns, 2*std_returns]
self.state_boundaries = boundaries
if method != 'fixed':
states = self.discretizer.fit_transform(returns.reshape(-1, 1)).astype(int).flatten()
self.state_boundaries = self.discretizer.bin_edges_[0]
else:
states = self._discretize_fixed(returns)
# Create state labels
self.state_labels = [f'State_{i}' for i in range(self.n_states)]
# Estimate transition probability matrix
self.transition_matrix = self._estimate_transition_matrix(states)
return states
def _discretize_fixed(self, returns):
"""Fixed threshold discretization"""
states = np.zeros(len(returns), dtype=int)
boundaries = self.state_boundaries
for i, ret in enumerate(returns):
if ret <= boundaries[0]:
states[i] = 0 # Large loss
elif ret <= boundaries[1]:
states[i] = 1 # Small loss
elif ret <= boundaries[2]:
states[i] = 2 # Flat
elif ret <= boundaries[3]:
states[i] = 3 # Small gain
else:
states[i] = 4 # Large gain
return states
def _estimate_transition_matrix(self, states):
"""Estimate transition probability matrix"""
n_obs = len(states)
transition_counts = np.zeros((self.n_states, self.n_states))
# Count transitions
for t in range(n_obs - 1):
i, j = states[t], states[t + 1]
transition_counts[i, j] += 1
# Convert to probabilities (row normalization)
row_sums = transition_counts.sum(axis=1, keepdims=True)
transition_matrix = np.divide(
transition_counts,
row_sums,
out=np.zeros_like(transition_counts),
where=row_sums != 0
)
return transition_matrix
def predict_next_state_probs(self, current_state):
"""Predict next state probability distribution"""
return self.transition_matrix[current_state]
def simulate_path(self, initial_state, n_steps, seed=None):
"""Simulate state path"""
if seed is not None:
np.random.seed(seed)
path = np.zeros(n_steps, dtype=int)
path[0] = initial_state
for t in range(1, n_steps):
probs = self.transition_matrix[path[t-1]]
path[t] = np.random.choice(self.n_states, p=probs)
return path
def calculate_stationary_distribution(self):
"""Calculate stationary distribution"""
eigenvals, eigenvecs = np.linalg.eig(self.transition_matrix.T)
stationary_idx = np.argmin(np.abs(eigenvals - 1))
stationary = np.real(eigenvecs[:, stationary_idx])
return stationary / np.sum(stationary)
def download_stock_data(symbol='AAPL', period='2y'):
"""Download stock data"""
try:
stock = yf.Ticker(symbol)
data = stock.history(period=period)
returns = data['Close'].pct_change().dropna()
return data, returns
except:
# If download fails, generate simulated data
print(f"Unable to download {symbol} data, using simulated data")
np.random.seed(42)
n_days = 500
returns = np.random.normal(0.001, 0.02, n_days) # Simulated daily returns
dates = pd.date_range('2022-01-01', periods=n_days, freq='D')
prices = 100 * np.cumprod(1 + returns)
data = pd.DataFrame({
'Close': prices,
'Open': prices * (1 + np.random.normal(0, 0.001, n_days)),
'High': prices * (1 + np.abs(np.random.normal(0, 0.005, n_days))),
'Low': prices * (1 - np.abs(np.random.normal(0, 0.005, n_days))),
'Volume': np.random.randint(1000000, 10000000, n_days)
}, index=dates)
returns = pd.Series(returns, index=dates)
return data, returns
# Download stock data
print("Fetching stock data...")
stock_data, stock_returns = download_stock_data('AAPL', '2y')
print(f"Data period: {stock_data.index[0].date()} to {stock_data.index[-1].date()}")
print(f"Total trading days: {len(stock_returns)}")
print(f"Average daily return: {stock_returns.mean():.4f}")
print(f"Daily return standard deviation: {stock_returns.std():.4f}")
# Build Markov chain model
markov_model = StockMarkovModel(n_states=5)
states = markov_model.fit(stock_returns, method='quantile')
print(f"\nTransition probability matrix:")
print(markov_model.transition_matrix)
# Calculate stationary distribution
stationary_dist = markov_model.calculate_stationary_distribution()
print(f"\nStationary distribution: {stationary_dist}")
# State boundaries
print(f"\nState boundaries: {markov_model.state_boundaries}")
Theoretical Analysis
Theoretical Foundations of Stock Price Markov Modeling
State Space Design: Choosing the number of states requires balancing:
- Goodness of Fit: More states provide better fit
- Parameter Efficiency: Too many states lead to over-parameterization
- Sample Adequacy: Each state needs sufficient observations
Information Criteria:
where is the number of free parameters.
Predictive Performance Evaluation
State Prediction Accuracy:
Probability Calibration:
Risk Management Applications
Dynamic VaR:
Expected Shortfall:
Mathematical Formula Summary
-
Transition probability estimation:
-
Log-likelihood function:
-
Stationary distribution:
-
Expected duration:
-
Conditional expected return:
Practical Considerations
- State partitioning method significantly affects model performance
- Need sufficient historical data to support parameter estimation
- Model assumes stationarity of market regimes
- Transaction costs have important impact on strategy returns