Chapter 5: Hidden Markov Model (HMM) Fundamentals
Haiyue
14min
Chapter 5: Hidden Markov Model (HMM) Fundamentals
Learning Objectives
- Understand the structure of Hidden Markov Models
- Master the relationship between observation processes and hidden state processes
- Learn the Forward-Backward algorithm
- Understand the Viterbi algorithm and Baum-Welch algorithm
Knowledge Summary
1. Basic Structure of Hidden Markov Models
A Hidden Markov Model (HMM) is a doubly stochastic process:
- Hidden State Sequence : Unobservable Markov chain
- Observation Sequence : Observable stochastic process
🔄 正在渲染 Mermaid 图表...
2. Three Basic Elements of HMM
Transition Probability Matrix :
Emission Probability Matrix :
Initial State Probability :
Three Basic Assumptions of HMM
- Markov Assumption:
- Observation Independence Assumption:
- Homogeneity Assumption: Transition and emission probabilities don’t change over time
3. Three Basic Problems of HMM
- Evaluation Problem: Given model parameters, calculate the probability of an observation sequence
- Decoding Problem: Given an observation sequence, find the most likely hidden state sequence
- Learning Problem: Given an observation sequence, estimate model parameters
Example Code
Example 1: HMM Model Class Implementation
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import logsumexp
class HiddenMarkovModel:
"""
Hidden Markov Model Implementation
"""
def __init__(self, n_states, n_observations):
"""
Initialize HMM model
Parameters:
n_states: Number of hidden states
n_observations: Number of observation values
"""
self.n_states = n_states
self.n_observations = n_observations
# Initialize parameters
self.transition_matrix = None # Transition probability matrix A
self.emission_matrix = None # Emission probability matrix B
self.initial_probs = None # Initial probability distribution π
def initialize_parameters(self, random_state=42):
"""
Randomly initialize model parameters
"""
np.random.seed(random_state)
# Transition probability matrix (rows sum to 1)
self.transition_matrix = np.random.dirichlet(
np.ones(self.n_states), size=self.n_states
)
# Emission probability matrix (rows sum to 1)
self.emission_matrix = np.random.dirichlet(
np.ones(self.n_observations), size=self.n_states
)
# Initial state probability
self.initial_probs = np.random.dirichlet(np.ones(self.n_states))
def set_parameters(self, A, B, pi):
"""
Set model parameters
Parameters:
A: Transition probability matrix
B: Emission probability matrix
pi: Initial state probability
"""
self.transition_matrix = A
self.emission_matrix = B
self.initial_probs = pi
def generate_sequence(self, length):
"""
Generate observation sequence and corresponding hidden state sequence
Parameters:
length: Sequence length
Returns:
observations: Observation sequence
states: Hidden state sequence
"""
states = np.zeros(length, dtype=int)
observations = np.zeros(length, dtype=int)
# Initial state
states[0] = np.random.choice(self.n_states, p=self.initial_probs)
observations[0] = np.random.choice(
self.n_observations,
p=self.emission_matrix[states[0]]
)
# Subsequent states
for t in range(1, length):
states[t] = np.random.choice(
self.n_states,
p=self.transition_matrix[states[t-1]]
)
observations[t] = np.random.choice(
self.n_observations,
p=self.emission_matrix[states[t]]
)
return observations, states
# Create example HMM model
np.random.seed(42)
# Define model parameters (weather prediction example)
# States: 0=Sunny, 1=Rainy
# Observations: 0=Dry, 1=Damp, 2=Wet
A = np.array([
[0.7, 0.3], # Sunny->Sunny 0.7, Sunny->Rainy 0.3
[0.4, 0.6] # Rainy->Sunny 0.4, Rainy->Rainy 0.6
])
B = np.array([
[0.6, 0.3, 0.1], # Sunny: Dry 0.6, Damp 0.3, Wet 0.1
[0.1, 0.4, 0.5] # Rainy: Dry 0.1, Damp 0.4, Wet 0.5
])
pi = np.array([0.6, 0.4]) # Initial probability: Sunny 0.6, Rainy 0.4
# Create HMM instance
hmm = HiddenMarkovModel(n_states=2, n_observations=3)
hmm.set_parameters(A, B, pi)
# Generate example sequence
observations, true_states = hmm.generate_sequence(20)
print("HMM Model Parameters:")
print(f"Transition matrix A:\n{A}")
print(f"Emission matrix B:\n{B}")
print(f"Initial probability π: {pi}")
print(f"\nObservation sequence: {observations}")
print(f"True states: {true_states}")
Example 2: Forward Algorithm Implementation
def forward_algorithm(hmm, observations):
"""
Forward algorithm: Calculate probability of observation sequence
Parameters:
hmm: HMM model
observations: Observation sequence
Returns:
alpha: Forward probability matrix
log_probability: Log probability of observation sequence
"""
T = len(observations)
N = hmm.n_states
# Initialize forward probability matrix (use log probabilities to avoid underflow)
log_alpha = np.zeros((T, N))
# Initialization: t=0
log_alpha[0] = (np.log(hmm.initial_probs) +
np.log(hmm.emission_matrix[:, observations[0]]))
# Recursion: t=1,2,...,T-1
for t in range(1, T):
for j in range(N):
# log(sum(exp(log_values))) = logsumexp(log_values)
log_alpha[t, j] = logsumexp(
log_alpha[t-1] + np.log(hmm.transition_matrix[:, j])
) + np.log(hmm.emission_matrix[j, observations[t]])
# Calculate total probability of observation sequence
log_probability = logsumexp(log_alpha[T-1])
return np.exp(log_alpha), log_probability
def backward_algorithm(hmm, observations):
"""
Backward algorithm
Parameters:
hmm: HMM model
observations: Observation sequence
Returns:
beta: Backward probability matrix
"""
T = len(observations)
N = hmm.n_states
# Initialize backward probability matrix
log_beta = np.zeros((T, N))
# Initialization: t=T-1
log_beta[T-1] = 0 # log(1) = 0
# Recursion: t=T-2,T-3,...,0
for t in range(T-2, -1, -1):
for i in range(N):
log_beta[t, i] = logsumexp(
np.log(hmm.transition_matrix[i]) +
np.log(hmm.emission_matrix[:, observations[t+1]]) +
log_beta[t+1]
)
return np.exp(log_beta)
# Calculate forward and backward probabilities
alpha, log_prob = forward_algorithm(hmm, observations)
beta = backward_algorithm(hmm, observations)
print(f"\nLog probability of observation sequence: {log_prob:.4f}")
print(f"Probability of observation sequence: {np.exp(log_prob):.2e}")
# Verify forward-backward algorithm consistency
consistency_check = np.allclose(
alpha[0] * beta[0],
alpha[-1].sum()
)
print(f"Forward-backward algorithm consistency check: {consistency_check}")
Example 3: Viterbi Algorithm Implementation
def viterbi_algorithm(hmm, observations):
"""
Viterbi algorithm: Find the most likely hidden state sequence
Parameters:
hmm: HMM model
observations: Observation sequence
Returns:
best_path: Most likely state sequence
best_prob: Maximum probability
"""
T = len(observations)
N = hmm.n_states
# Initialize
log_delta = np.zeros((T, N)) # Viterbi probability
psi = np.zeros((T, N), dtype=int) # Backtrack path
# Initialization: t=0
log_delta[0] = (np.log(hmm.initial_probs) +
np.log(hmm.emission_matrix[:, observations[0]]))
# Recursion: t=1,2,...,T-1
for t in range(1, T):
for j in range(N):
# Find maximum probability path to state j
transition_scores = (log_delta[t-1] +
np.log(hmm.transition_matrix[:, j]))
# Record maximum probability and corresponding previous state
psi[t, j] = np.argmax(transition_scores)
log_delta[t, j] = (np.max(transition_scores) +
np.log(hmm.emission_matrix[j, observations[t]]))
# Termination: Find optimal state at final time
best_final_state = np.argmax(log_delta[T-1])
best_prob = np.max(log_delta[T-1])
# Backtrack: Reconstruct optimal path
best_path = np.zeros(T, dtype=int)
best_path[T-1] = best_final_state
for t in range(T-2, -1, -1):
best_path[t] = psi[t+1, best_path[t+1]]
return best_path, best_prob
# Use Viterbi algorithm for decoding
predicted_states, max_log_prob = viterbi_algorithm(hmm, observations)
print(f"\nViterbi decoding results:")
print(f"Predicted state sequence: {predicted_states}")
print(f"True state sequence: {true_states}")
print(f"Maximum path probability: {np.exp(max_log_prob):.2e}")
# Calculate decoding accuracy
accuracy = np.mean(predicted_states == true_states)
print(f"Decoding accuracy: {accuracy:.2%}")
# Visualization results
plt.figure(figsize=(12, 8))
# Subplot 1: Observation sequence
plt.subplot(3, 1, 1)
plt.plot(observations, 'bo-', linewidth=2, markersize=8)
plt.ylabel('Observation')
plt.title('Observation Sequence')
plt.grid(True, alpha=0.3)
plt.ylim(-0.5, 2.5)
plt.yticks([0, 1, 2], ['Dry', 'Damp', 'Wet'])
# Subplot 2: True states
plt.subplot(3, 1, 2)
plt.plot(true_states, 'ro-', linewidth=2, markersize=8, label='True States')
plt.ylabel('State')
plt.title('True Hidden State Sequence')
plt.grid(True, alpha=0.3)
plt.ylim(-0.5, 1.5)
plt.yticks([0, 1], ['Sunny', 'Rainy'])
# Subplot 3: Predicted states
plt.subplot(3, 1, 3)
plt.plot(predicted_states, 'go-', linewidth=2, markersize=8, label='Predicted States')
plt.xlabel('Time')
plt.ylabel('State')
plt.title('Viterbi Predicted State Sequence')
plt.grid(True, alpha=0.3)
plt.ylim(-0.5, 1.5)
plt.yticks([0, 1], ['Sunny', 'Rainy'])
plt.tight_layout()
plt.show()
Theoretical Analysis
Mathematical Representation of HMM
Let the HMM model be , observation sequence be , hidden state sequence be .
Joint Probability:
Mathematical Principles of Forward-Backward Algorithm
Forward Probability :
Recursive Relation:
Backward Probability :
Dynamic Programming in Viterbi Algorithm
Optimal Path Probability :
Recursive Relation:
EM Framework of Baum-Welch Algorithm
E Step: Calculate expected sufficient statistics
M Step: Update parameters
Mathematical Formula Summary
-
Forward probability recursion:
-
Backward probability recursion:
-
Viterbi recursion:
-
State posterior probability:
-
Transition posterior probability:
Implementation Notes
- Use log probabilities to avoid numerical underflow
- Baum-Welch algorithm may converge to local optima
- Model selection (number of states) significantly affects performance
- Initial parameter selection affects convergence speed and results