第14章：线性代数实际应用

Haiyue

October 7, 2025

30min

第14章：线性代数实际应用

学习目标

掌握线性代数在数据科学中的应用
理解主成分分析的原理
学习图论中的线性代数方法
掌握马尔可夫链的矩阵描述
理解线性代数在计算机图形学中的应用

主成分分析（PCA）

理论基础

主成分分析通过线性变换将数据投影到低维空间，同时尽可能保持数据的方差。核心思想是对协方差矩阵进行特征值分解。

数学原理

给定数据矩阵 $X \in \mathbb{R}^{n \times p}$ ，协方差矩阵为： $C = \frac{1}{n-1}X^TX$

主成分是 $C$ 的特征向量，按特征值大小排序。

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris, make_blobs
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import seaborn as sns

def pca_comprehensive_demo():
    """主成分分析的全面演示"""

    print("主成分分析（PCA）应用演示")
    print("=" * 50)

    # 生成示例数据
    np.random.seed(42)
    n_samples = 300

    # 创建三个相关的特征
    X1 = np.random.randn(n_samples)
    X2 = 0.8 * X1 + 0.6 * np.random.randn(n_samples)
    X3 = -0.5 * X1 + 0.3 * X2 + 0.7 * np.random.randn(n_samples)
    X4 = 0.1 * np.random.randn(n_samples)  # 噪声特征

    X = np.column_stack([X1, X2, X3, X4])
    feature_names = ['Feature 1', 'Feature 2', 'Feature 3', 'Feature 4']

    print(f"数据形状: {X.shape}")
    print(f"特征统计:")
    for i, name in enumerate(feature_names):
        print(f"  {name}: 均值={np.mean(X[:, i]):.3f}, 标准差={np.std(X[:, i]):.3f}")

    # 标准化数据
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    print(f"\n标准化后的数据统计:")
    for i, name in enumerate(feature_names):
        print(f"  {name}: 均值={np.mean(X_scaled[:, i]):.3f}, 标准差={np.std(X_scaled[:, i]):.3f}")

    # 手动实现PCA
    print(f"\n手动PCA实现:")

    # 计算协方差矩阵
    cov_matrix = np.cov(X_scaled.T)
    print(f"协方差矩阵:\n{cov_matrix}")

    # 特征值分解
    eigenvals, eigenvecs = np.linalg.eig(cov_matrix)

    # 排序
    idx = np.argsort(eigenvals)[::-1]
    eigenvals = eigenvals[idx]
    eigenvecs = eigenvecs[:, idx]

    print(f"\n特征值: {eigenvals}")
    print(f"方差解释比例: {eigenvals / np.sum(eigenvals)}")
    print(f"累积方差解释: {np.cumsum(eigenvals / np.sum(eigenvals))}")

    # 主成分变换
    X_pca_manual = X_scaled @ eigenvecs

    # 使用sklearn PCA验证
    pca_sklearn = PCA()
    X_pca_sklearn = pca_sklearn.fit_transform(X_scaled)

    print(f"\n验证sklearn结果:")
    print(f"特征值一致: {np.allclose(eigenvals, pca_sklearn.explained_variance_)}")
    print(f"主成分一致: {np.allclose(np.abs(X_pca_manual), np.abs(X_pca_sklearn))}")

    # 可视化
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))

    # 原始数据相关性
    ax1 = axes[0, 0]
    correlation_matrix = np.corrcoef(X_scaled.T)
    im = ax1.imshow(correlation_matrix, cmap='RdBu', vmin=-1, vmax=1)
    ax1.set_xticks(range(4))
    ax1.set_yticks(range(4))
    ax1.set_xticklabels(feature_names, rotation=45)
    ax1.set_yticklabels(feature_names)
    ax1.set_title('原始特征相关矩阵')

    # 添加数值标注
    for i in range(4):
        for j in range(4):
            ax1.text(j, i, f'{correlation_matrix[i,j]:.2f}',
                    ha='center', va='center', color='white' if abs(correlation_matrix[i,j]) > 0.5 else 'black')

    plt.colorbar(im, ax=ax1)

    # 特征值和方差解释
    ax2 = axes[0, 1]
    x_pos = range(1, 5)
    bars = ax2.bar(x_pos, eigenvals, alpha=0.7, color='skyblue')
    ax2.set_xlabel('主成分')
    ax2.set_ylabel('特征值（方差）')
    ax2.set_title('各主成分的方差贡献')
    ax2.set_xticks(x_pos)
    ax2.set_xticklabels([f'PC{i}' for i in x_pos])

    # 添加方差解释比例标签
    for bar, val, ratio in zip(bars, eigenvals, eigenvals/np.sum(eigenvals)):
        ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
                f'{ratio:.1%}', ha='center', va='bottom')

    ax2.grid(True, alpha=0.3)

    # 累积方差解释
    ax3 = axes[0, 2]
    cumsum_ratio = np.cumsum(eigenvals / np.sum(eigenvals))
    ax3.plot(x_pos, cumsum_ratio, 'o-', linewidth=2, markersize=8, color='red')
    ax3.axhline(y=0.95, color='green', linestyle='--', label='95%阈值')
    ax3.set_xlabel('主成分数量')
    ax3.set_ylabel('累积方差解释比例')
    ax3.set_title('累积方差解释比例')
    ax3.set_xticks(x_pos)
    ax3.set_xticklabels([f'PC{i}' for i in x_pos])
    ax3.grid(True, alpha=0.3)
    ax3.legend()

    # 前两个主成分的数据分布
    ax4 = axes[1, 0]
    scatter = ax4.scatter(X_pca_manual[:, 0], X_pca_manual[:, 1],
                         alpha=0.6, c=X_scaled[:, 0], cmap='viridis')
    ax4.set_xlabel(f'PC1 ({eigenvals[0]/np.sum(eigenvals)*100:.1f}%)')
    ax4.set_ylabel(f'PC2 ({eigenvals[1]/np.sum(eigenvals)*100:.1f}%)')
    ax4.set_title('前两个主成分的数据分布')
    ax4.grid(True, alpha=0.3)
    plt.colorbar(scatter, ax=ax4, label='Feature 1 value')

    # 主成分载荷图
    ax5 = axes[1, 1]
    for i, name in enumerate(feature_names):
        ax5.arrow(0, 0, eigenvecs[i, 0]*2, eigenvecs[i, 1]*2,
                 head_width=0.1, head_length=0.1, fc=f'C{i}', ec=f'C{i}',
                 linewidth=2, label=name)

    ax5.set_xlim(-1, 1)
    ax5.set_ylim(-1, 1)
    ax5.set_xlabel('PC1载荷')
    ax5.set_ylabel('PC2载荷')
    ax5.set_title('主成分载荷图')
    ax5.legend()
    ax5.grid(True, alpha=0.3)
    ax5.set_aspect('equal')

    # 降维重构误差
    ax6 = axes[1, 2]
    reconstruction_errors = []

    for n_components in range(1, 5):
        # 使用前n个主成分重构
        X_reconstructed = X_pca_manual[:, :n_components] @ eigenvecs[:, :n_components].T
        error = np.mean(np.sum((X_scaled - X_reconstructed)**2, axis=1))
        reconstruction_errors.append(error)

    ax6.plot(range(1, 5), reconstruction_errors, 'o-', linewidth=2, markersize=8)
    ax6.set_xlabel('使用的主成分数量')
    ax6.set_ylabel('平均重构误差')
    ax6.set_title('降维重构误差')
    ax6.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    return X_scaled, X_pca_manual, eigenvals, eigenvecs

def pca_real_world_example():
    """PCA在真实数据上的应用"""

    print(f"\n真实数据示例：鸢尾花数据集")
    print("=" * 40)

    # 加载鸢尾花数据集
    iris = load_iris()
    X = iris.data
    y = iris.target
    feature_names = iris.feature_names
    target_names = iris.target_names

    print(f"数据形状: {X.shape}")
    print(f"特征名称: {feature_names}")
    print(f"类别: {target_names}")

    # 标准化
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # PCA
    pca = PCA()
    X_pca = pca.fit_transform(X_scaled)

    print(f"\n各主成分的方差解释比例:")
    for i, ratio in enumerate(pca.explained_variance_ratio_):
        print(f"PC{i+1}: {ratio:.3f} ({ratio*100:.1f}%)")

    print(f"累积方差解释: {np.cumsum(pca.explained_variance_ratio_)}")

    # 可视化
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))

    # 原始数据的前两个特征
    ax1 = axes[0, 0]
    colors = ['red', 'green', 'blue']
    for i, (color, target) in enumerate(zip(colors, target_names)):
        mask = y == i
        ax1.scatter(X_scaled[mask, 0], X_scaled[mask, 1],
                   c=color, label=target, alpha=0.7)

    ax1.set_xlabel(f'{feature_names[0]} (标准化)')
    ax1.set_ylabel(f'{feature_names[1]} (标准化)')
    ax1.set_title('原始特征空间')
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    # PCA后的前两个主成分
    ax2 = axes[0, 1]
    for i, (color, target) in enumerate(zip(colors, target_names)):
        mask = y == i
        ax2.scatter(X_pca[mask, 0], X_pca[mask, 1],
                   c=color, label=target, alpha=0.7)

    ax2.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)')
    ax2.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)')
    ax2.set_title('主成分空间')
    ax2.legend()
    ax2.grid(True, alpha=0.3)

    # 主成分载荷
    ax3 = axes[1, 0]
    loadings = pca.components_[:2].T * np.sqrt(pca.explained_variance_[:2])

    for i, feature in enumerate(feature_names):
        ax3.arrow(0, 0, loadings[i, 0], loadings[i, 1],
                 head_width=0.1, head_length=0.1, fc=f'C{i}', ec=f'C{i}',
                 linewidth=2)
        ax3.text(loadings[i, 0]*1.1, loadings[i, 1]*1.1, feature,
                fontsize=10, ha='center', va='center')

    ax3.set_xlim(-3, 3)
    ax3.set_ylim(-3, 3)
    ax3.set_xlabel('PC1载荷')
    ax3.set_ylabel('PC2载荷')
    ax3.set_title('特征载荷图')
    ax3.grid(True, alpha=0.3)
    ax3.axhline(0, color='black', linewidth=0.5)
    ax3.axvline(0, color='black', linewidth=0.5)

    # 方差解释比例
    ax4 = axes[1, 1]
    x_pos = range(1, 5)
    bars = ax4.bar(x_pos, pca.explained_variance_ratio_, alpha=0.7)
    ax4.set_xlabel('主成分')
    ax4.set_ylabel('方差解释比例')
    ax4.set_title('各主成分方差贡献')
    ax4.set_xticks(x_pos)

    # 累积线
    ax4_twin = ax4.twinx()
    ax4_twin.plot(x_pos, np.cumsum(pca.explained_variance_ratio_),
                 'ro-', linewidth=2, label='累积')
    ax4_twin.set_ylabel('累积方差解释比例')
    ax4_twin.legend()

    plt.tight_layout()
    plt.show()

# 执行PCA演示
X_scaled, X_pca, eigenvals, eigenvecs = pca_comprehensive_demo()
pca_real_world_example()

图论中的线性代数

邻接矩阵和拉普拉斯矩阵

图的结构可以用矩阵表示，图的性质与矩阵的特征值密切相关。

PageRank算法

Google的PageRank算法本质上是求解特征向量问题。

import networkx as nx
from scipy.sparse import csc_matrix
from scipy.sparse.linalg import eigs

def graph_theory_applications():
    """图论中的线性代数应用"""

    print("图论中的线性代数应用")
    print("=" * 50)

    # 创建示例图
    G = nx.Graph()
    edges = [(1, 2), (1, 3), (2, 3), (2, 4), (3, 4), (3, 5), (4, 5), (4, 6), (5, 6)]
    G.add_edges_from(edges)

    print(f"图的基本信息:")
    print(f"节点数: {G.number_of_nodes()}")
    print(f"边数: {G.number_of_edges()}")

    # 邻接矩阵
    A = nx.adjacency_matrix(G).toarray()
    print(f"\n邻接矩阵:")
    print(A)

    # 度矩阵
    degrees = dict(G.degree())
    D = np.diag([degrees[i] for i in range(1, 7)])
    print(f"\n度矩阵:")
    print(D)

    # 拉普拉斯矩阵
    L = D - A
    print(f"\n拉普拉斯矩阵 (L = D - A):")
    print(L)

    # 计算拉普拉斯矩阵的特征值
    eigenvals_L, eigenvecs_L = np.linalg.eig(L)
    eigenvals_L = np.real(eigenvals_L)
    idx = np.argsort(eigenvals_L)
    eigenvals_L = eigenvals_L[idx]
    eigenvecs_L = eigenvecs_L[:, idx]

    print(f"\n拉普拉斯矩阵的特征值:")
    print(eigenvals_L)

    # 第二小特征值（代数连通度）
    algebraic_connectivity = eigenvals_L[1]
    print(f"代数连通度（第二小特征值）: {algebraic_connectivity:.6f}")

    # 图的连通性
    num_components = np.sum(eigenvals_L < 1e-10)
    print(f"连通分量数: {num_components}")

    # 可视化
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))

    # 图的可视化
    ax1 = axes[0, 0]
    pos = nx.spring_layout(G, seed=42)
    nx.draw(G, pos, ax=ax1, with_labels=True, node_color='lightblue',
            node_size=500, font_size=10, font_weight='bold')
    ax1.set_title('原始图')

    # 邻接矩阵热图
    ax2 = axes[0, 1]
    im2 = ax2.imshow(A, cmap='Blues')
    ax2.set_title('邻接矩阵')
    ax2.set_xticks(range(6))
    ax2.set_yticks(range(6))
    ax2.set_xticklabels(range(1, 7))
    ax2.set_yticklabels(range(1, 7))
    plt.colorbar(im2, ax=ax2)

    # 拉普拉斯矩阵热图
    ax3 = axes[0, 2]
    im3 = ax3.imshow(L, cmap='RdBu')
    ax3.set_title('拉普拉斯矩阵')
    ax3.set_xticks(range(6))
    ax3.set_yticks(range(6))
    ax3.set_xticklabels(range(1, 7))
    ax3.set_yticklabels(range(1, 7))
    plt.colorbar(im3, ax=ax3)

    # 特征值谱
    ax4 = axes[1, 0]
    ax4.bar(range(len(eigenvals_L)), eigenvals_L, alpha=0.7)
    ax4.set_xlabel('特征值索引')
    ax4.set_ylabel('特征值')
    ax4.set_title('拉普拉斯矩阵特征值谱')
    ax4.grid(True, alpha=0.3)

    # Fiedler向量（第二小特征向量）
    ax5 = axes[1, 1]
    fiedler_vector = eigenvecs_L[:, 1]
    colors = ['red' if x < 0 else 'blue' for x in fiedler_vector]

    pos_fixed = {i+1: pos[i+1] for i in range(6)}
    node_colors = ['red' if fiedler_vector[i] < 0 else 'blue' for i in range(6)]

    nx.draw(G, pos_fixed, ax=ax5, with_labels=True, node_color=node_colors,
            node_size=500, font_size=10, font_weight='bold')
    ax5.set_title('Fiedler向量分割')

    # 特征向量分析
    ax6 = axes[1, 2]
    for i in range(min(3, len(eigenvals_L))):
        ax6.plot(eigenvecs_L[:, i], 'o-', label=f'特征向量{i+1} (λ={eigenvals_L[i]:.3f})')

    ax6.set_xlabel('节点')
    ax6.set_ylabel('特征向量值')
    ax6.set_title('前三个特征向量')
    ax6.legend()
    ax6.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

def pagerank_demo():
    """PageRank算法演示"""

    print(f"\nPageRank算法演示")
    print("=" * 30)

    # 创建有向图
    G = nx.DiGraph()
    edges = [(1, 2), (1, 3), (2, 3), (3, 1), (3, 4), (4, 3), (4, 5), (5, 4), (5, 6), (6, 5)]
    G.add_edges_from(edges)

    print(f"有向图信息:")
    print(f"节点数: {G.number_of_nodes()}")
    print(f"边数: {G.number_of_edges()}")

    # 构造转移矩阵
    A = nx.adjacency_matrix(G).toarray().astype(float)

    # 列归一化（每列和为1）
    out_degrees = np.sum(A, axis=0)
    for i in range(len(out_degrees)):
        if out_degrees[i] > 0:
            A[:, i] = A[:, i] / out_degrees[i]

    # PageRank矩阵（加入随机跳转）
    damping_factor = 0.85
    n = G.number_of_nodes()
    M = damping_factor * A + (1 - damping_factor) / n * np.ones((n, n))

    print(f"\nPageRank转移矩阵:")
    print(M)

    # 计算主特征向量（特征值1对应的）
    eigenvals, eigenvecs = np.linalg.eig(M)

    # 找到特征值最接近1的
    idx = np.argmin(np.abs(eigenvals - 1))
    pagerank_vector = np.real(eigenvecs[:, idx])
    pagerank_vector = pagerank_vector / np.sum(pagerank_vector)  # 归一化

    print(f"\n手动计算的PageRank值:")
    for i in range(n):
        print(f"节点 {i+1}: {pagerank_vector[i]:.6f}")

    # 使用NetworkX验证
    pagerank_nx = nx.pagerank(G, alpha=damping_factor)

    print(f"\nNetworkX计算的PageRank值:")
    for node in sorted(pagerank_nx.keys()):
        print(f"节点 {node}: {pagerank_nx[node]:.6f}")

    # 幂法求解PageRank
    def power_method_pagerank(M, max_iterations=100, tolerance=1e-6):
        n = M.shape[0]
        x = np.ones(n) / n  # 初始化

        for i in range(max_iterations):
            x_new = M @ x

            if np.linalg.norm(x_new - x) < tolerance:
                break

            x = x_new

        return x, i+1

    pr_power, iterations = power_method_pagerank(M)
    print(f"\n幂法计算的PageRank值 ({iterations}次迭代):")
    for i in range(n):
        print(f"节点 {i+1}: {pr_power[i]:.6f}")

    # 可视化
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))

    # 有向图
    ax1 = axes[0]
    pos = nx.spring_layout(G, seed=42)
    nx.draw(G, pos, ax=ax1, with_labels=True, node_color='lightgreen',
            node_size=1000, font_size=12, font_weight='bold',
            arrows=True, arrowsize=20, arrowstyle='->')
    ax1.set_title('有向图结构')

    # PageRank值比较
    ax2 = axes[1]
    nodes = list(range(1, n+1))
    width = 0.25

    ax2.bar([x - width for x in nodes], pagerank_vector, width,
            label='手动计算', alpha=0.7)
    ax2.bar(nodes, [pagerank_nx[i] for i in nodes], width,
            label='NetworkX', alpha=0.7)
    ax2.bar([x + width for x in nodes], pr_power, width,
            label='幂法', alpha=0.7)

    ax2.set_xlabel('节点')
    ax2.set_ylabel('PageRank值')
    ax2.set_title('PageRank值比较')
    ax2.set_xticks(nodes)
    ax2.legend()
    ax2.grid(True, alpha=0.3)

    # 根据PageRank值调整节点大小的图
    ax3 = axes[2]
    node_sizes = [pr_power[i-1] * 5000 for i in G.nodes()]  # 放大显示
    nx.draw(G, pos, ax=ax3, with_labels=True,
            node_size=node_sizes, font_size=12, font_weight='bold',
            arrows=True, arrowsize=20, arrowstyle='->',
            node_color='lightcoral')
    ax3.set_title('节点大小反映PageRank值')

    plt.tight_layout()
    plt.show()

# 执行图论演示
graph_theory_applications()
pagerank_demo()

马尔可夫链

转移矩阵

马尔可夫链的状态转移可以用矩阵表示，稳态分布是转移矩阵的主特征向量。

稳态分析

系统的长期行为由转移矩阵的特征值和特征向量决定。

def markov_chain_analysis():
    """马尔可夫链的矩阵分析"""

    print("马尔可夫链矩阵分析")
    print("=" * 50)

    # 天气预报模型
    # 状态: 0=晴天, 1=阴天, 2=雨天
    weather_states = ['晴天', '阴天', '雨天']

    # 转移概率矩阵 P[i,j] = 从状态i转移到状态j的概率
    P = np.array([[0.7, 0.2, 0.1],    # 晴天 -> 晴天/阴天/雨天
                  [0.3, 0.4, 0.3],    # 阴天 -> 晴天/阴天/雨天
                  [0.2, 0.3, 0.5]])   # 雨天 -> 晴天/阴天/雨天

    print("转移概率矩阵:")
    print("     ", "  ".join(f"{state:>6}" for state in weather_states))
    for i, from_state in enumerate(weather_states):
        print(f"{from_state:>6}: {P[i]}")

    # 验证概率矩阵（每行和为1）
    row_sums = np.sum(P, axis=1)
    print(f"\n每行概率和: {row_sums}")
    print(f"是否为有效概率矩阵: {np.allclose(row_sums, 1.0)}")

    # 计算n步转移概率
    print(f"\n多步转移概率:")
    for n in [2, 5, 10, 20]:
        P_n = np.linalg.matrix_power(P, n)
        print(f"\n{n}步转移概率矩阵:")
        print(P_n)

    # 稳态分布（主特征向量）
    eigenvals, eigenvecs = np.linalg.eig(P.T)  # 注意转置

    # 找到特征值为1的特征向量
    idx = np.argmin(np.abs(eigenvals - 1.0))
    steady_state = np.real(eigenvecs[:, idx])
    steady_state = steady_state / np.sum(steady_state)  # 归一化为概率

    print(f"\n稳态分布:")
    for i, (state, prob) in enumerate(zip(weather_states, steady_state)):
        print(f"{state}: {prob:.6f} ({prob*100:.2f}%)")

    # 验证稳态性质
    steady_check = P.T @ steady_state
    print(f"\n稳态验证 (Pᵀπ = π):")
    print(f"误差: {np.linalg.norm(steady_check - steady_state):.2e}")

    # 模拟马尔可夫链
    def simulate_markov_chain(P, initial_state, n_steps):
        """模拟马尔可夫链"""
        states = [initial_state]
        current_state = initial_state

        for _ in range(n_steps):
            # 根据转移概率选择下一状态
            current_state = np.random.choice(3, p=P[current_state])
            states.append(current_state)

        return np.array(states)

    # 运行多次模拟
    np.random.seed(42)
    n_simulations = 1000
    n_steps = 100

    all_final_states = []
    for _ in range(n_simulations):
        # 随机初始状态
        initial = np.random.choice(3)
        trajectory = simulate_markov_chain(P, initial, n_steps)
        all_final_states.append(trajectory[-1])

    # 计算经验稳态分布
    empirical_steady = np.bincount(all_final_states) / len(all_final_states)

    print(f"\n经验稳态分布 ({n_simulations}次模拟):")
    for i, (state, prob) in enumerate(zip(weather_states, empirical_steady)):
        print(f"{state}: {prob:.6f} ({prob*100:.2f}%)")

    print(f"\n理论vs经验稳态分布误差: {np.linalg.norm(steady_state - empirical_steady):.6f}")

    # 可视化
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))

    # 转移概率矩阵热图
    ax1 = axes[0, 0]
    im1 = ax1.imshow(P, cmap='Blues', vmin=0, vmax=1)
    ax1.set_xticks(range(3))
    ax1.set_yticks(range(3))
    ax1.set_xticklabels(weather_states, rotation=45)
    ax1.set_yticklabels(weather_states)
    ax1.set_title('转移概率矩阵')

    # 添加数值标注
    for i in range(3):
        for j in range(3):
            ax1.text(j, i, f'{P[i,j]:.2f}', ha='center', va='center',
                    color='white' if P[i,j] > 0.5 else 'black')

    plt.colorbar(im1, ax=ax1)

    # 特征值
    ax2 = axes[0, 1]
    eigenvals_magnitude = np.abs(eigenvals)
    ax2.bar(range(len(eigenvals)), eigenvals_magnitude, alpha=0.7)
    ax2.set_xlabel('特征值索引')
    ax2.set_ylabel('特征值模长')
    ax2.set_title('转移矩阵特征值')
    ax2.grid(True, alpha=0.3)

    # 稳态分布比较
    ax3 = axes[0, 2]
    x_pos = range(3)
    width = 0.35

    ax3.bar([x - width/2 for x in x_pos], steady_state, width,
            label='理论稳态', alpha=0.7)
    ax3.bar([x + width/2 for x in x_pos], empirical_steady, width,
            label='经验稳态', alpha=0.7)

    ax3.set_xlabel('天气状态')
    ax3.set_ylabel('概率')
    ax3.set_title('稳态分布比较')
    ax3.set_xticks(x_pos)
    ax3.set_xticklabels(weather_states)
    ax3.legend()
    ax3.grid(True, alpha=0.3)

    # 收敛过程
    ax4 = axes[1, 0]
    initial_dist = np.array([1, 0, 0])  # 从晴天开始
    distributions = [initial_dist]

    for n in range(1, 21):
        dist = initial_dist @ np.linalg.matrix_power(P, n)
        distributions.append(dist)

    distributions = np.array(distributions)

    for i, state in enumerate(weather_states):
        ax4.plot(range(21), distributions[:, i], 'o-', label=state)

    ax4.axhline(y=steady_state[0], color='C0', linestyle='--', alpha=0.7)
    ax4.axhline(y=steady_state[1], color='C1', linestyle='--', alpha=0.7)
    ax4.axhline(y=steady_state[2], color='C2', linestyle='--', alpha=0.7)

    ax4.set_xlabel('时间步数')
    ax4.set_ylabel('概率')
    ax4.set_title('分布收敛过程')
    ax4.legend()
    ax4.grid(True, alpha=0.3)

    # 单次模拟轨迹
    ax5 = axes[1, 1]
    trajectory = simulate_markov_chain(P, 0, 50)
    ax5.plot(trajectory, 'o-', markersize=4)
    ax5.set_xlabel('时间')
    ax5.set_ylabel('状态')
    ax5.set_title('马尔可夫链轨迹示例')
    ax5.set_yticks(range(3))
    ax5.set_yticklabels(weather_states)
    ax5.grid(True, alpha=0.3)

    # 状态占用时间统计
    ax6 = axes[1, 2]
    long_trajectory = simulate_markov_chain(P, 0, 10000)
    state_counts = np.bincount(long_trajectory[1000:])  # 去掉初始1000步
    state_frequencies = state_counts / len(long_trajectory[1000:])

    ax6.bar(range(3), state_frequencies, alpha=0.7, label='长期模拟')
    ax6.bar(range(3), steady_state, alpha=0.5, label='理论稳态', width=0.5)

    ax6.set_xlabel('天气状态')
    ax6.set_ylabel('频率/概率')
    ax6.set_title('长期状态分布')
    ax6.set_xticks(range(3))
    ax6.set_xticklabels(weather_states)
    ax6.legend()
    ax6.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    return P, steady_state

# 执行马尔可夫链演示
P, steady_state = markov_chain_analysis()

计算机图形学中的线性代数

三维变换

旋转、缩放、平移等变换都可以用矩阵表示。

投影变换

三维到二维的投影是线性变换的重要应用。

from mpl_toolkits.mplot3d.art3d import Poly3DCollection

def computer_graphics_demo():
    """计算机图形学中的线性代数应用"""

    print("计算机图形学中的线性代数")
    print("=" * 50)

    # 定义3D立方体的顶点
    cube_vertices = np.array([
        [0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0],  # 底面
        [0, 0, 1], [1, 0, 1], [1, 1, 1], [0, 1, 1]   # 顶面
    ])

    print("立方体顶点坐标:")
    for i, vertex in enumerate(cube_vertices):
        print(f"顶点{i}: {vertex}")

    # 定义立方体的面（每个面用4个顶点索引表示）
    cube_faces = [
        [0, 1, 2, 3],  # 底面
        [4, 5, 6, 7],  # 顶面
        [0, 1, 5, 4],  # 前面
        [2, 3, 7, 6],  # 后面
        [0, 3, 7, 4],  # 左面
        [1, 2, 6, 5]   # 右面
    ]

    # 变换矩阵定义
    def rotation_x(angle):
        """绕X轴旋转"""
        c, s = np.cos(angle), np.sin(angle)
        return np.array([[1, 0, 0], [0, c, -s], [0, s, c]])

    def rotation_y(angle):
        """绕Y轴旋转"""
        c, s = np.cos(angle), np.sin(angle)
        return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]])

    def rotation_z(angle):
        """绕Z轴旋转"""
        c, s = np.cos(angle), np.sin(angle)
        return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])

    def scaling(sx, sy, sz):
        """缩放变换"""
        return np.array([[sx, 0, 0], [0, sy, 0], [0, 0, sz]])

    def translation(tx, ty, tz):
        """平移变换（齐次坐标）"""
        return np.array([[1, 0, 0, tx],
                        [0, 1, 0, ty],
                        [0, 0, 1, tz],
                        [0, 0, 0, 1]])

    def perspective_projection(d):
        """透视投影"""
        return np.array([[1, 0, 0, 0],
                        [0, 1, 0, 0],
                        [0, 0, 1, 0],
                        [0, 0, 1/d, 0]])

    # 应用各种变换
    print(f"\n应用变换:")

    # 1. 旋转变换
    angle = np.pi / 6  # 30度
    R_x = rotation_x(angle)
    R_y = rotation_y(angle)
    R_z = rotation_z(angle)

    print(f"绕X轴旋转30度矩阵:")
    print(R_x)

    # 复合旋转
    R_combined = R_z @ R_y @ R_x
    rotated_cube = (R_combined @ cube_vertices.T).T

    # 2. 缩放变换
    S = scaling(1.5, 0.8, 1.2)
    scaled_cube = (S @ cube_vertices.T).T

    # 3. 齐次坐标变换（旋转+平移）
    # 将3D点转换为齐次坐标
    vertices_homogeneous = np.column_stack([cube_vertices, np.ones(8)])

    # 复合变换：先旋转再平移
    T = translation(2, 1, 0.5)
    R_homo = np.eye(4)
    R_homo[:3, :3] = R_combined

    transform_combined = T @ R_homo
    transformed_vertices = (transform_combined @ vertices_homogeneous.T).T
    transformed_cube = transformed_vertices[:, :3]  # 去掉齐次坐标

    print(f"\n复合变换矩阵 (先旋转后平移):")
    print(transform_combined)

    # 可视化
    fig = plt.figure(figsize=(20, 15))

    def draw_cube(ax, vertices, title, color='blue', alpha=0.7):
        """绘制立方体"""
        # 创建面的集合
        faces = []
        for face in cube_faces:
            face_vertices = vertices[face]
            faces.append(face_vertices)

        # 绘制立方体的面
        poly3d = [[vertices[j] for j in face] for face in cube_faces]
        ax.add_collection3d(Poly3DCollection(poly3d, alpha=alpha,
                                           facecolor=color, edgecolor='black'))

        # 设置坐标轴
        all_coords = vertices.flatten()
        ax.set_xlim([all_coords.min()-0.5, all_coords.max()+0.5])
        ax.set_ylim([all_coords.min()-0.5, all_coords.max()+0.5])
        ax.set_zlim([all_coords.min()-0.5, all_coords.max()+0.5])

        ax.set_xlabel('X')
        ax.set_ylabel('Y')
        ax.set_zlabel('Z')
        ax.set_title(title)

    # 原始立方体
    ax1 = fig.add_subplot(2, 4, 1, projection='3d')
    draw_cube(ax1, cube_vertices, '原始立方体', 'lightblue')

    # 旋转后的立方体
    ax2 = fig.add_subplot(2, 4, 2, projection='3d')
    draw_cube(ax2, rotated_cube, '旋转变换', 'lightgreen')

    # 缩放后的立方体
    ax3 = fig.add_subplot(2, 4, 3, projection='3d')
    draw_cube(ax3, scaled_cube, '缩放变换', 'lightcoral')

    # 复合变换后的立方体
    ax4 = fig.add_subplot(2, 4, 4, projection='3d')
    draw_cube(ax4, transformed_cube, '复合变换', 'lightyellow')

    # 投影变换演示
    ax5 = fig.add_subplot(2, 4, 5)

    # 正交投影到XY平面
    projected_xy = cube_vertices[:, :2]

    # 绘制投影后的图形
    for face in cube_faces:
        face_2d = projected_xy[face]
        # 闭合多边形
        face_2d_closed = np.vstack([face_2d, face_2d[0]])
        ax5.plot(face_2d_closed[:, 0], face_2d_closed[:, 1], 'b-', alpha=0.7)

    ax5.scatter(projected_xy[:, 0], projected_xy[:, 1], c='red', s=50)
    ax5.set_xlabel('X')
    ax5.set_ylabel('Y')
    ax5.set_title('XY平面正交投影')
    ax5.grid(True, alpha=0.3)
    ax5.set_aspect('equal')

    # 透视投影
    ax6 = fig.add_subplot(2, 4, 6)

    # 简单透视投影：z坐标作为深度信息
    d = 3  # 观察距离
    perspective_x = cube_vertices[:, 0] / (1 + cube_vertices[:, 2]/d)
    perspective_y = cube_vertices[:, 1] / (1 + cube_vertices[:, 2]/d)

    for face in cube_faces:
        face_x = perspective_x[face]
        face_y = perspective_y[face]
        # 闭合多边形
        face_x = np.append(face_x, face_x[0])
        face_y = np.append(face_y, face_y[0])
        ax6.plot(face_x, face_y, 'g-', alpha=0.7)

    ax6.scatter(perspective_x, perspective_y, c='red', s=50)
    ax6.set_xlabel('X')
    ax6.set_ylabel('Y')
    ax6.set_title('透视投影')
    ax6.grid(True, alpha=0.3)
    ax6.set_aspect('equal')

    # 变换矩阵可视化
    ax7 = fig.add_subplot(2, 4, 7)

    matrices = [R_x, R_y, R_z, S]
    labels = ['Rot_X', 'Rot_Y', 'Rot_Z', 'Scale']

    for i, (matrix, label) in enumerate(zip(matrices, labels)):
        # 显示矩阵的条件数或行列式
        det_val = np.linalg.det(matrix)
        ax7.bar(i, det_val, alpha=0.7, label=f'{label}\ndet={det_val:.3f}')

    ax7.set_xlabel('变换类型')
    ax7.set_ylabel('行列式')
    ax7.set_title('变换矩阵行列式')
    ax7.set_xticks(range(4))
    ax7.set_xticklabels(labels)
    ax7.legend()
    ax7.grid(True, alpha=0.3)

    # 变换序列
    ax8 = fig.add_subplot(2, 4, 8)

    # 显示变换的步骤效果
    steps = ['原始', '旋转X', '旋转Y', '旋转Z', '缩放']
    step_vertices = [
        cube_vertices,
        (R_x @ cube_vertices.T).T,
        (R_y @ R_x @ cube_vertices.T).T,
        (R_z @ R_y @ R_x @ cube_vertices.T).T,
        (S @ R_z @ R_y @ R_x @ cube_vertices.T).T
    ]

    # 计算每步后立方体中心的位移
    centers = [np.mean(vertices, axis=0) for vertices in step_vertices]
    center_distances = [np.linalg.norm(center - centers[0]) for center in centers]

    ax8.plot(range(len(steps)), center_distances, 'o-', linewidth=2, markersize=8)
    ax8.set_xlabel('变换步骤')
    ax8.set_ylabel('中心位移距离')
    ax8.set_title('变换序列中心位移')
    ax8.set_xticks(range(len(steps)))
    ax8.set_xticklabels(steps, rotation=45)
    ax8.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # 动画效果演示（静态显示多帧）
    print(f"\n创建旋转动画帧:")

    fig_anim, axes_anim = plt.subplots(2, 4, figsize=(16, 8),
                                      subplot_kw={'projection': '3d'})
    axes_anim = axes_anim.flatten()

    angles = np.linspace(0, 2*np.pi, 8)

    for i, angle in enumerate(angles):
        R_anim = rotation_y(angle)
        rotated_vertices = (R_anim @ cube_vertices.T).T

        draw_cube(axes_anim[i], rotated_vertices,
                 f'旋转 {angle*180/np.pi:.0f}°', 'lightblue')

    plt.tight_layout()
    plt.show()

    return cube_vertices, transform_combined

# 执行计算机图形学演示
cube_vertices, transform_matrix = computer_graphics_demo()

本章总结

应用领域总结

应用领域	核心概念	关键技术	实际价值
数据科学	特征值分解	PCA, SVD	降维、去噪
图论	邻接矩阵	谱图理论	网络分析
马尔可夫链	转移矩阵	稳态分析	预测建模
计算机图形学	变换矩阵	3D变换	渲染、动画
机器学习	优化理论	梯度方法	模型训练

数学工具箱

def linear_algebra_toolbox_summary():
    """线性代数工具箱总结"""

    print("线性代数实用工具箱")
    print("=" * 50)

    tools = {
        "矩阵分解": {
            "特征值分解": "A = QΛQ⁻¹，用于PCA、稳态分析",
            "奇异值分解": "A = UΣVᵀ，用于降维、压缩",
            "QR分解": "A = QR，用于最小二乘、正交化",
            "Cholesky分解": "A = LLᵀ，用于正定矩阵求解"
        },

        "数值方法": {
            "幂法": "求主特征值，用于PageRank",
            "雅可比方法": "求所有特征值，用于对称矩阵",
            "Gram-Schmidt": "正交化，用于构造标准正交基",
            "最小二乘": "超定方程求解，用于数据拟合"
        },

        "应用算法": {
            "主成分分析": "数据降维和可视化",
            "马尔可夫链": "状态转移和预测",
            "图算法": "网络分析和排序",
            "图形变换": "3D建模和渲染"
        }
    }

    for category, items in tools.items():
        print(f"\n{category}:")
        print("-" * 30)
        for tool, description in items.items():
            print(f"• {tool:15} : {description}")

    # 选择合适工具的决策树
    print(f"\n工具选择指南:")
    print("=" * 30)

    decision_tree = """
    数据分析问题?
    ├─ 降维需求 → PCA (特征值分解)
    ├─ 数据压缩 → SVD
    └─ 回归分析 → 最小二乘法

    网络问题?
    ├─ 节点重要性 → PageRank (主特征向量)
    ├─ 社区发现 → 谱聚类 (拉普拉斯矩阵)
    └─ 连通性分析 → 代数连通度

    动态系统?
    ├─ 稳态预测 → 马尔可夫链 (转移矩阵)
    ├─ 振动分析 → 特征频率 (特征值)
    └─ 稳定性 → 特征值实部符号

    图形学问题?
    ├─ 3D变换 → 齐次坐标矩阵
    ├─ 投影 → 投影矩阵
    └─ 动画 → 插值变换
    """

    print(decision_tree)

linear_algebra_toolbox_summary()

跨学科影响

线性代数作为数学的基础分支，在现代科学技术中发挥着越来越重要的作用：

人工智能：深度学习的核心是矩阵运算
量子计算：量子态用向量表示，量子门用矩阵表示
经济学：投入产出模型、博弈论
生物信息学：基因序列分析、蛋白质结构预测
信号处理：傅里叶变换、滤波器设计

线性代数不仅仅是数学工具，更是理解和解决复杂问题的思维方式。通过矩阵和向量的抽象，我们能够处理高维数据、复杂网络和动态系统，这使得线性代数成为现代科学技术不可或缺的基础。

P Info

Project Cards Demo

Trading Chart Demo

Basic Skills

001. Genix Ventures Overview

P Info

Project Cards Demo

Trading Chart Demo

Basic Skills

001. Genix Ventures Overview

第14章：线性代数实际应用

第14章：线性代数实际应用

主成分分析（PCA）

理论基础

数学原理

图论中的线性代数

邻接矩阵和拉普拉斯矩阵

PageRank算法

马尔可夫链

转移矩阵

稳态分析

计算机图形学中的线性代数

三维变换

投影变换

本章总结

应用领域总结

数学工具箱

跨学科影响