Chapter 4: Asymptotic Behavior of Markov Chains
Chapter 4: Asymptotic Behavior of Markov Chains
- Understand periodicity and aperiodicity of Markov chains
- Master the concepts of irreducibility and recurrence
- Learn about existence and uniqueness of stationary distributions
- Understand ergodic theorems and convergence properties
Knowledge Summary
1. Periodicity of Markov Chains
Definition: The period of state is defined as:
- Aperiodic State:
- Periodic State:
- If a Markov chain is irreducible, all states have the same period
2. Irreducibility
Definition: If for any two states , there exists some time such that , the Markov chain is called irreducible.
3. Recurrence and Transience
Recurrent State: If starting from state , the probability of eventually returning to state is 1:
Transient State: If
For an irreducible Markov chain with finite state space, all states are recurrent.
4. Stationary Distribution
Definition: A probability distribution is called stationary if:
That is:
Existence Theorem:
- For a finite, irreducible, aperiodic Markov chain, there exists a unique stationary distribution
- The stationary distribution satisfies: , where is the mean return time to state
Example Code
Example 1: Testing Irreducibility of Markov Chain
import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import matrix_power
def is_irreducible(P, max_steps=100):
"""
Test whether the Markov chain corresponding to transition matrix P is irreducible
Parameters:
P: Transition matrix
max_steps: Maximum number of steps
Returns:
bool: Whether irreducible
"""
n = P.shape[0]
# Calculate reachability matrix
reachable = np.zeros((n, n), dtype=bool)
current_power = np.eye(n)
for step in range(1, max_steps + 1):
current_power = current_power @ P
reachable |= (current_power > 0)
# Check if all state pairs are reachable
return np.all(reachable)
def find_period(P, state_idx, max_steps=100):
"""
Calculate the period of a specified state
Parameters:
P: Transition matrix
state_idx: State index
max_steps: Maximum number of steps
Returns:
int: Period of the state
"""
# Find all steps that can return to this state
return_steps = []
current_power = P.copy()
for n in range(1, max_steps + 1):
if current_power[state_idx, state_idx] > 0:
return_steps.append(n)
current_power = current_power @ P
if not return_steps:
return 0 # Transient state
# Calculate greatest common divisor
from math import gcd
from functools import reduce
return reduce(gcd, return_steps)
# Example: 3-state Markov chain
P1 = np.array([
[0.2, 0.5, 0.3],
[0.4, 0.3, 0.3],
[0.6, 0.2, 0.2]
])
print("Transition matrix P1:")
print(P1)
print(f"Is irreducible: {is_irreducible(P1)}")
print(f"Period of state 0: {find_period(P1, 0)}")
Example 2: Computing Stationary Distribution
def find_stationary_distribution(P, method='eigenvalue'):
"""
Calculate stationary distribution of Markov chain
Parameters:
P: Transition matrix
method: Calculation method ('eigenvalue' or 'iterative')
Returns:
numpy.array: Stationary distribution
"""
n = P.shape[0]
if method == 'eigenvalue':
# Method 1: Solve eigenvalue problem π = πP
# Equivalent to solving (P^T - I)π = 0
eigenvals, eigenvecs = np.linalg.eig(P.T)
# Find eigenvector for eigenvalue 1
stationary_idx = np.argmin(np.abs(eigenvals - 1))
stationary = np.real(eigenvecs[:, stationary_idx])
# Normalize to make it a probability distribution
stationary = stationary / np.sum(stationary)
elif method == 'iterative':
# Method 2: Iterative method
pi = np.ones(n) / n # Initial uniform distribution
for _ in range(1000):
pi_new = pi @ P
if np.allclose(pi, pi_new, atol=1e-8):
break
pi = pi_new
stationary = pi
return np.abs(stationary) # Ensure non-negativity
def calculate_mean_return_time(pi):
"""
Calculate mean return time
Parameters:
pi: Stationary distribution
Returns:
numpy.array: Mean return time for each state
"""
return 1 / pi
# Calculate stationary distribution
pi_eigenvalue = find_stationary_distribution(P1, 'eigenvalue')
pi_iterative = find_stationary_distribution(P1, 'iterative')
print(f"\nStationary distribution by eigenvalue method: {pi_eigenvalue}")
print(f"Stationary distribution by iterative method: {pi_iterative}")
print(f"Verify stationarity: π = πP")
print(f"πP = {pi_eigenvalue @ P1}")
# Calculate mean return times
mean_return_times = calculate_mean_return_time(pi_eigenvalue)
print(f"\nMean return time for each state: {mean_return_times}")
Example 3: Ergodic Theorem Verification
def simulate_markov_chain(P, initial_state, n_steps):
"""
Simulate Markov chain
Parameters:
P: Transition matrix
initial_state: Initial state
n_steps: Number of simulation steps
Returns:
list: State sequence
"""
states = [initial_state]
current_state = initial_state
for _ in range(n_steps):
# Choose next state based on transition probabilities
next_state = np.random.choice(len(P), p=P[current_state])
states.append(next_state)
current_state = next_state
return states
def calculate_empirical_distribution(states, n_states):
"""
Calculate empirical distribution
Parameters:
states: State sequence
n_states: Total number of states
Returns:
numpy.array: Empirical distribution
"""
counts = np.bincount(states, minlength=n_states)
return counts / len(states)
# Ergodic theorem verification
np.random.seed(42)
n_steps = 10000
n_states = len(P1)
# Simulate from different initial states
initial_states = [0, 1, 2]
empirical_distributions = []
plt.figure(figsize=(12, 8))
for i, initial_state in enumerate(initial_states):
# Simulate Markov chain
states = simulate_markov_chain(P1, initial_state, n_steps)
# Calculate cumulative empirical distribution
cumulative_empirical = []
for t in range(100, n_steps + 1, 100):
emp_dist = calculate_empirical_distribution(states[:t], n_states)
cumulative_empirical.append(emp_dist)
# Plot convergence process
cumulative_empirical = np.array(cumulative_empirical)
time_points = range(100, n_steps + 1, 100)
for state in range(n_states):
plt.subplot(2, 2, i + 1)
plt.plot(time_points, cumulative_empirical[:, state],
label=f'State {state}', linewidth=2)
plt.axhline(y=pi_eigenvalue[state], color=f'C{state}',
linestyle='--', alpha=0.7, label=f'Theoretical {state}')
plt.title(f'Convergence Process Starting from State {initial_state}')
plt.xlabel('Time Step')
plt.ylabel('State Probability')
plt.legend()
plt.grid(True, alpha=0.3)
# Final empirical distribution comparison
plt.subplot(2, 2, 4)
final_empirical = calculate_empirical_distribution(states, n_states)
x = np.arange(n_states)
width = 0.35
plt.bar(x - width/2, pi_eigenvalue, width, label='Theoretical Stationary Distribution', alpha=0.7)
plt.bar(x + width/2, final_empirical, width, label='Empirical Distribution', alpha=0.7)
plt.xlabel('State')
plt.ylabel('Probability')
plt.title('Stationary vs Empirical Distribution')
plt.legend()
plt.xticks(x)
plt.tight_layout()
plt.show()
print(f"\nTheoretical stationary distribution: {pi_eigenvalue}")
print(f"Empirical distribution: {final_empirical}")
print(f"Difference: {np.abs(pi_eigenvalue - final_empirical)}")
Example 4: Mixing Time Analysis
def total_variation_distance(p, q):
"""
Calculate total variation distance between two probability distributions
Parameters:
p, q: Probability distributions
Returns:
float: Total variation distance
"""
return 0.5 * np.sum(np.abs(p - q))
def mixing_time_analysis(P, epsilon=0.01):
"""
Analyze mixing time of Markov chain
Parameters:
P: Transition matrix
epsilon: Convergence tolerance
Returns:
dict: Mixing time analysis results
"""
n_states = P.shape[0]
pi_stationary = find_stationary_distribution(P)
results = {}
# Calculate mixing time for each initial distribution
for initial_state in range(n_states):
# Initial distribution (delta function)
current_dist = np.zeros(n_states)
current_dist[initial_state] = 1.0
mixing_time = 0
distances = []
for t in range(500): # Maximum iterations
# Calculate distribution after t steps
current_dist = current_dist @ matrix_power(P, 1)
# Calculate distance to stationary distribution
tv_distance = total_variation_distance(current_dist, pi_stationary)
distances.append(tv_distance)
# Check if mixing time is reached
if tv_distance <= epsilon:
mixing_time = t + 1
break
results[f'state_{initial_state}'] = {
'mixing_time': mixing_time,
'distances': distances
}
return results, pi_stationary
# Mixing time analysis
mixing_results, pi_stat = mixing_time_analysis(P1)
plt.figure(figsize=(12, 6))
# Plot convergence curves
plt.subplot(1, 2, 1)
for state in range(len(P1)):
distances = mixing_results[f'state_{state}']['distances']
plt.semilogy(distances, label=f'Starting from state {state}', linewidth=2)
mixing_time = mixing_results[f'state_{state}']['mixing_time']
if mixing_time > 0:
plt.axvline(x=mixing_time-1, color=f'C{state}',
linestyle='--', alpha=0.7)
plt.axhline(y=0.01, color='red', linestyle='-', alpha=0.5,
label='ε = 0.01')
plt.xlabel('Time Step')
plt.ylabel('Total Variation Distance (log scale)')
plt.title('Mixing Time of Markov Chain')
plt.legend()
plt.grid(True, alpha=0.3)
# Mixing time bar chart
plt.subplot(1, 2, 2)
states = [f'State {i}' for i in range(len(P1))]
mixing_times = [mixing_results[f'state_{i}']['mixing_time']
for i in range(len(P1))]
plt.bar(states, mixing_times, alpha=0.7, color=['C0', 'C1', 'C2'])
plt.ylabel('Mixing Time')
plt.title('Mixing Time from Each Initial State')
plt.grid(True, alpha=0.3, axis='y')
for i, time in enumerate(mixing_times):
plt.text(i, time + 0.5, str(time), ha='center', va='bottom')
plt.tight_layout()
plt.show()
print("\nMixing time analysis results:")
for state in range(len(P1)):
mixing_time = mixing_results[f'state_{state}']['mixing_time']
print(f"Mixing time starting from state {state}: {mixing_time} steps")
Theoretical Analysis
Ergodic Theorem
For an irreducible, finite-state Markov chain, we have:
where is the stationary probability of state .
Convergence Rate
The convergence rate of a Markov chain to its stationary distribution is determined by the second largest eigenvalue:
where:
- is the second largest eigenvalue
- is a constant
- is the total variation norm
Reversibility
then the Markov chain satisfies the detailed balance condition, and is a reversible stationary distribution.
Mathematical Formula Summary
-
Periodicity:
-
Recurrence Probability:
-
Stationary Distribution: , and
-
Mean Return Time:
-
Total Variation Distance:
-
Mixing Time:
warning Important Notes
- Periodicity affects convergence properties but not the existence of stationary distributions
- An irreducible chain with finite state space necessarily has a unique stationary distribution
- Reversible chains have good numerical properties and are commonly used in MCMC algorithms