Chapter 12: Inner Product Spaces and Orthogonality

Haiyue
27min

Chapter 12: Inner Product Spaces and Orthogonality

Learning Objectives
  • Understand the definition and properties of inner products
  • Master the concepts of orthogonality and orthonormality
  • Understand the Gram-Schmidt orthogonalization process
  • Master the properties of orthogonal matrices
  • Understand the principles of least squares method

Definition and Properties of Inner Products

Axiomatic Definition of Inner Products

In a real vector space VV, an inner product is a function ,:V×VR\langle \cdot, \cdot \rangle: V \times V \rightarrow \mathbb{R} satisfying:

  1. Symmetry: u,v=v,u\langle \mathbf{u}, \mathbf{v} \rangle = \langle \mathbf{v}, \mathbf{u} \rangle
  2. Linearity: au+bv,w=au,w+bv,w\langle a\mathbf{u} + b\mathbf{v}, \mathbf{w} \rangle = a\langle \mathbf{u}, \mathbf{w} \rangle + b\langle \mathbf{v}, \mathbf{w} \rangle
  3. Positive Definiteness: v,v0\langle \mathbf{v}, \mathbf{v} \rangle \geq 0, and v,v=0\langle \mathbf{v}, \mathbf{v} \rangle = 0 if and only if v=0\mathbf{v} = \mathbf{0}

Standard Inner Product

The standard inner product in Rn\mathbb{R}^n: u,v=uTv=u1v1+u2v2++unvn\langle \mathbf{u}, \mathbf{v} \rangle = \mathbf{u}^T\mathbf{v} = u_1v_1 + u_2v_2 + \cdots + u_nv_n

import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import qr, svd
from mpl_toolkits.mplot3d import Axes3D

def inner_product_basics():
    """Basic concepts and properties of inner products"""

    # Define vectors
    u = np.array([3, 4])
    v = np.array([1, 2])
    w = np.array([-2, 1])

    print("Vectors:")
    print(f"u = {u}")
    print(f"v = {v}")
    print(f"w = {w}")

    # Calculate inner products
    uv = np.dot(u, v)
    uw = np.dot(u, w)
    vw = np.dot(v, w)

    print(f"\nInner product calculations:")
    print(f"<u, v> = {uv}")
    print(f"<u, w> = {uw}")
    print(f"<v, w> = {vw}")

    # Verify inner product properties
    print(f"\nVerifying inner product properties:")

    # Symmetry
    print(f"Symmetry: <u, v> = <v, u>")
    print(f"  {np.dot(u, v)} = {np.dot(v, u)}: {np.dot(u, v) == np.dot(v, u)}")

    # Linearity
    a, b = 2, -3
    linear_left = np.dot(a*u + b*v, w)
    linear_right = a*np.dot(u, w) + b*np.dot(v, w)
    print(f"Linearity: <au + bv, w> = a<u, w> + b<v, w>")
    print(f"  {linear_left} = {linear_right}: {np.isclose(linear_left, linear_right)}")

    # Positive definiteness
    print(f"Positive definiteness: <u, u> = {np.dot(u, u)} > 0")

    # Norms derived from inner products
    norm_u = np.sqrt(np.dot(u, u))
    norm_v = np.sqrt(np.dot(v, v))
    norm_w = np.sqrt(np.dot(w, w))

    print(f"\nNorms derived from inner products:")
    print(f"||u|| = √<u, u> = {norm_u:.3f}")
    print(f"||v|| = √<v, v> = {norm_v:.3f}")
    print(f"||w|| = √<w, w> = {norm_w:.3f}")

    # Visualization
    plt.figure(figsize=(12, 5))

    # Geometric meaning of inner product
    plt.subplot(1, 2, 1)
    plt.arrow(0, 0, u[0], u[1], head_width=0.2, head_length=0.2,
              fc='red', ec='red', linewidth=2, label='u')
    plt.arrow(0, 0, v[0], v[1], head_width=0.2, head_length=0.2,
              fc='blue', ec='blue', linewidth=2, label='v')

    # Projection
    proj_v_on_u = (np.dot(v, u) / np.dot(u, u)) * u
    plt.arrow(0, 0, proj_v_on_u[0], proj_v_on_u[1], head_width=0.1, head_length=0.1,
              fc='green', ec='green', linewidth=2, linestyle='--', label='proj_u(v)')

    # Angle
    cos_theta = np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))
    theta = np.arccos(cos_theta)

    plt.xlim(-1, 4)
    plt.ylim(-1, 5)
    plt.grid(True, alpha=0.3)
    plt.legend()
    plt.title(f'Geometric Meaning of Inner Product\n<u,v> = ||u||||v||cos(θ) = {uv:.1f}\nθ = {theta*180/np.pi:.1f}°')
    plt.xlabel('x')
    plt.ylabel('y')

    # Inner products of different vectors
    plt.subplot(1, 2, 2)
    vectors = [u, v, w]
    labels = ['u', 'v', 'w']
    colors = ['red', 'blue', 'green']

    for vec, label, color in zip(vectors, labels, colors):
        plt.arrow(0, 0, vec[0], vec[1], head_width=0.2, head_length=0.2,
                 fc=color, ec=color, linewidth=2, label=label)

    plt.xlim(-3, 4)
    plt.ylim(-1, 5)
    plt.grid(True, alpha=0.3)
    plt.legend()
    plt.title('Vectors and Their Inner Product Relationships')
    plt.xlabel('x')
    plt.ylabel('y')

    plt.tight_layout()
    plt.show()

inner_product_basics()

Orthogonality and Orthogonal Complements

Definition of Orthogonality

Two vectors u\mathbf{u} and v\mathbf{v} are orthogonal, denoted as uv\mathbf{u} \perp \mathbf{v}, if and only if: u,v=0\langle \mathbf{u}, \mathbf{v} \rangle = 0

Orthogonal Complement

The orthogonal complement of a set SS, denoted SS^{\perp}, is defined as: S={vV:v,s=0 for all sS}S^{\perp} = \{\mathbf{v} \in V : \langle \mathbf{v}, \mathbf{s} \rangle = 0 \text{ for all } \mathbf{s} \in S\}

def orthogonality_demo():
    """Demonstration of orthogonality concepts and applications"""

    # Orthogonal vectors in three-dimensional space
    v1 = np.array([1, 2, 1])
    v2 = np.array([2, -1, 0])
    v3 = np.array([1, 0, -2])

    print("Vectors in three-dimensional space:")
    print(f"v1 = {v1}")
    print(f"v2 = {v2}")
    print(f"v3 = {v3}")

    # Check orthogonality
    dot_products = [
        (np.dot(v1, v2), "v1·v2"),
        (np.dot(v1, v3), "v1·v3"),
        (np.dot(v2, v3), "v2·v3")
    ]

    print(f"\nOrthogonality check:")
    for dot_prod, desc in dot_products:
        orthogonal = abs(dot_prod) < 1e-10
        print(f"{desc} = {dot_prod:.3f} {'(orthogonal)' if orthogonal else '(not orthogonal)'}")

    # Construct an orthogonal basis
    print(f"\nConstructing orthogonal basis:")

    # Start with v1
    u1 = v1 / np.linalg.norm(v1)
    print(f"u1 = v1/||v1|| = {u1}")

    # Make v2 orthogonal to u1
    proj_v2_u1 = np.dot(v2, u1) * u1
    u2_unnorm = v2 - proj_v2_u1
    u2 = u2_unnorm / np.linalg.norm(u2_unnorm)
    print(f"u2 = (v2 - proj_u1(v2))/||v2 - proj_u1(v2)|| = {u2}")

    # Make v3 orthogonal to u1 and u2
    proj_v3_u1 = np.dot(v3, u1) * u1
    proj_v3_u2 = np.dot(v3, u2) * u2
    u3_unnorm = v3 - proj_v3_u1 - proj_v3_u2
    u3 = u3_unnorm / np.linalg.norm(u3_unnorm)
    print(f"u3 = {u3}")

    # Verify orthogonality
    orthogonal_basis = np.column_stack([u1, u2, u3])
    gram_matrix = orthogonal_basis.T @ orthogonal_basis

    print(f"\nOrthogonal basis verification:")
    print(f"Gram matrix U^T U = \n{gram_matrix}")
    print(f"Is identity matrix: {np.allclose(gram_matrix, np.eye(3))}")

    # Visualize three-dimensional orthogonal vectors
    fig = plt.figure(figsize=(12, 5))

    # Original vectors
    ax1 = fig.add_subplot(121, projection='3d')
    origin = [0, 0, 0]

    vectors = [v1, v2, v3]
    labels = ['v1', 'v2', 'v3']
    colors = ['red', 'green', 'blue']

    for vec, label, color in zip(vectors, labels, colors):
        ax1.quiver(origin[0], origin[1], origin[2], vec[0], vec[1], vec[2],
                  color=color, arrow_length_ratio=0.1, linewidth=3, label=label)

    ax1.set_xlim(-2, 3)
    ax1.set_ylim(-2, 3)
    ax1.set_zlim(-2, 2)
    ax1.legend()
    ax1.set_title('Original Vectors')

    # Orthogonal basis
    ax2 = fig.add_subplot(122, projection='3d')

    orthogonal_vectors = [u1, u2, u3]
    labels = ['u1', 'u2', 'u3']

    for vec, label, color in zip(orthogonal_vectors, labels, colors):
        ax2.quiver(origin[0], origin[1], origin[2], vec[0], vec[1], vec[2],
                  color=color, arrow_length_ratio=0.1, linewidth=3, label=label)

    ax2.set_xlim(-1, 1)
    ax2.set_ylim(-1, 1)
    ax2.set_zlim(-1, 1)
    ax2.legend()
    ax2.set_title('Orthogonal Basis')

    plt.tight_layout()
    plt.show()

orthogonality_demo()

Gram-Schmidt Orthogonalization Process

Algorithm Steps

Given a linearly independent set of vectors {v1,v2,,vk}\{v_1, v_2, \ldots, v_k\}, the Gram-Schmidt process constructs an orthogonal set {u1,u2,,uk}\{u_1, u_2, \ldots, u_k\}:

  1. u1=v1u_1 = v_1
  2. u2=v2proju1(v2)u_2 = v_2 - \text{proj}_{u_1}(v_2)
  3. u3=v3proju1(v3)proju2(v3)u_3 = v_3 - \text{proj}_{u_1}(v_3) - \text{proj}_{u_2}(v_3)
  4. \vdots

Where the projection formula is: proju(v)=v,uu,uu\text{proj}_u(v) = \frac{\langle v, u \rangle}{\langle u, u \rangle}u

def gram_schmidt_process():
    """Gram-Schmidt orthogonalization process"""

    def gram_schmidt(vectors):
        """
        Gram-Schmidt orthogonalization algorithm
        Input: Linearly independent vectors (column vectors)
        Output: Orthogonal vectors, orthonormal vectors
        """
        V = vectors.copy()
        n, k = V.shape

        # Orthogonal vectors
        U = np.zeros_like(V)
        # Orthonormal vectors
        Q = np.zeros_like(V)

        for i in range(k):
            # Current vector
            ui = V[:, i].copy()

            # Subtract projections onto previous orthogonal vectors
            for j in range(i):
                proj = np.dot(V[:, i], U[:, j]) / np.dot(U[:, j], U[:, j]) * U[:, j]
                ui = ui - proj

            U[:, i] = ui
            Q[:, i] = ui / np.linalg.norm(ui)

        return U, Q

    # Test vector set
    A = np.array([[1, 1, 0],
                  [1, 0, 1],
                  [0, 1, 1]], dtype=float)

    print("Original vector set A:")
    print(A)
    print(f"Column vectors: v1={A[:,0]}, v2={A[:,1]}, v3={A[:,2]}")

    # Apply Gram-Schmidt process
    U, Q = gram_schmidt(A)

    print(f"\nOrthogonalized vector set U:")
    print(U)

    print(f"\nOrthonormalized vector set Q:")
    print(Q)

    # Verify orthogonality
    print(f"\nVerifying orthogonality:")
    U_dot = U.T @ U
    print(f"U^T U = \n{U_dot}")

    print(f"\nVerifying orthonormality:")
    Q_dot = Q.T @ Q
    print(f"Q^T Q = \n{Q_dot}")
    print(f"Is identity matrix: {np.allclose(Q_dot, np.eye(3))}")

    # Compare with NumPy's QR decomposition
    Q_numpy, R_numpy = qr(A)
    print(f"\nNumPy QR decomposition's Q matrix:")
    print(Q_numpy)

    # Note: signs may differ, but they span the same space
    print(f"\nDo our Q and NumPy's Q span the same space:")
    for i in range(3):
        # Check if Q[:, i] = ±Q_numpy[:, i]
        same_or_opposite = np.allclose(Q[:, i], Q_numpy[:, i]) or np.allclose(Q[:, i], -Q_numpy[:, i])
        print(f"Column {i+1}: {same_or_opposite}")

    # Visualization of the process
    fig = plt.figure(figsize=(15, 5))

    # Original vectors
    ax1 = fig.add_subplot(131, projection='3d')
    origin = [0, 0, 0]
    colors = ['red', 'green', 'blue']

    for i in range(3):
        ax1.quiver(origin[0], origin[1], origin[2],
                  A[0,i], A[1,i], A[2,i],
                  color=colors[i], arrow_length_ratio=0.1,
                  linewidth=3, label=f'v{i+1}')

    ax1.set_xlim(-0.5, 1.5)
    ax1.set_ylim(-0.5, 1.5)
    ax1.set_zlim(-0.5, 1.5)
    ax1.legend()
    ax1.set_title('Original Vector Set')

    # Orthogonal vectors
    ax2 = fig.add_subplot(132, projection='3d')

    for i in range(3):
        ax2.quiver(origin[0], origin[1], origin[2],
                  U[0,i], U[1,i], U[2,i],
                  color=colors[i], arrow_length_ratio=0.1,
                  linewidth=3, label=f'u{i+1}')

    ax2.set_xlim(-1, 1)
    ax2.set_ylim(-1, 1)
    ax2.set_zlim(-1, 1)
    ax2.legend()
    ax2.set_title('Orthogonal Vector Set')

    # Orthonormal vectors
    ax3 = fig.add_subplot(133, projection='3d')

    for i in range(3):
        ax3.quiver(origin[0], origin[1], origin[2],
                  Q[0,i], Q[1,i], Q[2,i],
                  color=colors[i], arrow_length_ratio=0.1,
                  linewidth=3, label=f'q{i+1}')

    ax3.set_xlim(-1, 1)
    ax3.set_ylim(-1, 1)
    ax3.set_zlim(-1, 1)
    ax3.legend()
    ax3.set_title('Orthonormal Vector Set')

    plt.tight_layout()
    plt.show()

    return U, Q

gram_schmidt_process()

Orthogonal Matrices

Definition and Properties

A matrix QQ is an orthogonal matrix if and only if: QTQ=QQT=IQ^TQ = QQ^T = I

Important properties of orthogonal matrices:

  1. det(Q)=±1\det(Q) = \pm 1
  2. Preserves vector length: Qx=x\|Q\mathbf{x}\| = \|\mathbf{x}\|
  3. Preserves angles: Qu,Qv=u,v\langle Q\mathbf{u}, Q\mathbf{v} \rangle = \langle \mathbf{u}, \mathbf{v} \rangle
def orthogonal_matrices():
    """Properties and applications of orthogonal matrices"""

    # Construct rotation matrix (typical example of orthogonal matrix)
    theta = np.pi/6  # 30 degrees
    rotation_2d = np.array([[np.cos(theta), -np.sin(theta)],
                           [np.sin(theta), np.cos(theta)]])

    print("2D Rotation Matrix (30 degrees):")
    print(rotation_2d)

    # Verify orthogonality
    print(f"\nR^T R = \n{rotation_2d.T @ rotation_2d}")
    print(f"Is identity matrix: {np.allclose(rotation_2d.T @ rotation_2d, np.eye(2))}")
    print(f"Determinant: {np.linalg.det(rotation_2d):.6f}")

    # 3D rotation matrix (around z-axis)
    phi = np.pi/4  # 45 degrees
    rotation_3d = np.array([[np.cos(phi), -np.sin(phi), 0],
                           [np.sin(phi), np.cos(phi), 0],
                           [0, 0, 1]])

    print(f"\n3D Rotation Matrix (around z-axis, 45 degrees):")
    print(rotation_3d)
    print(f"Is orthogonal: {np.allclose(rotation_3d.T @ rotation_3d, np.eye(3))}")

    # Reflection matrix
    reflection = np.array([[1, 0],
                          [0, -1]])  # Reflection about x-axis

    print(f"\nReflection Matrix (about x-axis):")
    print(reflection)
    print(f"Is orthogonal: {np.allclose(reflection.T @ reflection, np.eye(2))}")
    print(f"Determinant: {np.linalg.det(reflection):.0f}")

    # Orthogonal matrices preserve length and angles
    print(f"\nPreservation properties of orthogonal transformations:")

    # Test vectors
    u = np.array([3, 4])
    v = np.array([1, 2])

    # Transformed vectors
    u_rot = rotation_2d @ u
    v_rot = rotation_2d @ v

    # Check length preservation
    print(f"Original lengths: ||u|| = {np.linalg.norm(u):.3f}, ||v|| = {np.linalg.norm(v):.3f}")
    print(f"Transformed lengths: ||Ru|| = {np.linalg.norm(u_rot):.3f}, ||Rv|| = {np.linalg.norm(v_rot):.3f}")

    # Check angle preservation
    angle_original = np.arccos(np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)))
    angle_transformed = np.arccos(np.dot(u_rot, v_rot) / (np.linalg.norm(u_rot) * np.linalg.norm(v_rot)))

    print(f"Original angle: {angle_original * 180/np.pi:.1f} degrees")
    print(f"Transformed angle: {angle_transformed * 180/np.pi:.1f} degrees")

    # Visualize orthogonal transformations
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))

    # First row: 2D transformations
    # Original vectors
    ax1 = axes[0, 0]
    ax1.arrow(0, 0, u[0], u[1], head_width=0.2, head_length=0.2,
              fc='red', ec='red', linewidth=2, label='u')
    ax1.arrow(0, 0, v[0], v[1], head_width=0.2, head_length=0.2,
              fc='blue', ec='blue', linewidth=2, label='v')

    # Draw unit circle
    theta_circle = np.linspace(0, 2*np.pi, 100)
    circle = np.array([np.cos(theta_circle), np.sin(theta_circle)])
    ax1.plot(circle[0], circle[1], 'k--', alpha=0.5, label='Unit circle')

    ax1.set_xlim(-5, 5)
    ax1.set_ylim(-5, 5)
    ax1.set_aspect('equal')
    ax1.grid(True, alpha=0.3)
    ax1.legend()
    ax1.set_title('Original Vectors')

    # After rotation
    ax2 = axes[0, 1]
    ax2.arrow(0, 0, u_rot[0], u_rot[1], head_width=0.2, head_length=0.2,
              fc='red', ec='red', linewidth=2, label='Ru')
    ax2.arrow(0, 0, v_rot[0], v_rot[1], head_width=0.2, head_length=0.2,
              fc='blue', ec='blue', linewidth=2, label='Rv')

    # Rotated unit circle (still unit circle)
    circle_rot = rotation_2d @ circle
    ax2.plot(circle_rot[0], circle_rot[1], 'k--', alpha=0.5, label='Rotated unit circle')

    ax2.set_xlim(-5, 5)
    ax2.set_ylim(-5, 5)
    ax2.set_aspect('equal')
    ax2.grid(True, alpha=0.3)
    ax2.legend()
    ax2.set_title(f'After {theta*180/np.pi:.0f}° Rotation')

    # After reflection
    u_ref = reflection @ u
    v_ref = reflection @ v

    ax3 = axes[0, 2]
    ax3.arrow(0, 0, u_ref[0], u_ref[1], head_width=0.2, head_length=0.2,
              fc='red', ec='red', linewidth=2, label='Su')
    ax3.arrow(0, 0, v_ref[0], v_ref[1], head_width=0.2, head_length=0.2,
              fc='blue', ec='blue', linewidth=2, label='Sv')

    circle_ref = reflection @ circle
    ax3.plot(circle_ref[0], circle_ref[1], 'k--', alpha=0.5, label='Reflected unit circle')

    ax3.set_xlim(-5, 5)
    ax3.set_ylim(-5, 5)
    ax3.set_aspect('equal')
    ax3.grid(True, alpha=0.3)
    ax3.legend()
    ax3.set_title('After Reflection about x-axis')

    # Second row: 3D case (projected to 2D display)
    u3d = np.array([2, 3, 1])
    v3d = np.array([1, 1, 2])

    u3d_rot = rotation_3d @ u3d
    v3d_rot = rotation_3d @ v3d

    for i, (ax, title, vecs) in enumerate([(axes[1,0], '3D Original (xy projection)', [u3d, v3d]),
                                           (axes[1,1], '3D After Rotation (xy projection)', [u3d_rot, v3d_rot])]):
        for j, (vec, color, label) in enumerate(zip(vecs, ['red', 'blue'], ['u3d', 'v3d'])):
            ax.arrow(0, 0, vec[0], vec[1], head_width=0.1, head_length=0.1,
                    fc=color, ec=color, linewidth=2, label=f'{label}(xy)')

        ax.set_xlim(-4, 4)
        ax.set_ylim(-4, 4)
        ax.set_aspect('equal')
        ax.grid(True, alpha=0.3)
        ax.legend()
        ax.set_title(title)

    # Orthogonal matrix classification
    ax_class = axes[1, 2]
    ax_class.text(0.5, 0.8, 'Orthogonal Matrix Classification', fontsize=14, ha='center', weight='bold',
                  transform=ax_class.transAxes)

    classification = [
        'det(Q) = +1: Rotation matrices',
        '• Preserve orientation',
        '• Preserve handedness',
        '',
        'det(Q) = -1: Reflection matrices',
        '• Change orientation',
        '• Change handedness'
    ]

    for i, text in enumerate(classification):
        y_pos = 0.65 - i * 0.08
        weight = 'bold' if text.startswith('det') else 'normal'
        ax_class.text(0.1, y_pos, text, fontsize=10, transform=ax_class.transAxes, weight=weight)

    ax_class.set_xlim(0, 1)
    ax_class.set_ylim(0, 1)
    ax_class.axis('off')

    plt.tight_layout()
    plt.show()

orthogonal_matrices()

Least Squares Method

Problem Setup

For an overdetermined system Ax=bA\mathbf{x} = \mathbf{b} (m>nm > n), there is usually no exact solution. The least squares method finds the solution that minimizes Axb2\|A\mathbf{x} - \mathbf{b}\|^2.

Normal Equations

The least squares solution satisfies the normal equations: ATAx=ATbA^TA\mathbf{x} = A^T\mathbf{b}

Geometric Interpretation

The least squares solution x\mathbf{x}^* makes AxA\mathbf{x}^* the orthogonal projection of b\mathbf{b} onto the column space of AA.

def least_squares_method():
    """Theory and applications of least squares method"""

    # Generate example of overdetermined system
    np.random.seed(42)
    m, n = 6, 3  # 6 equations, 3 unknowns

    A = np.random.randn(m, n)
    x_true = np.array([2, -1, 3])  # True solution (assuming it exists)
    b_exact = A @ x_true

    # Add noise to make the system overdetermined
    noise = 0.1 * np.random.randn(m)
    b = b_exact + noise

    print(f"Overdetermined system Ax = b:")
    print(f"A shape: {A.shape}")
    print(f"A = \n{A}")
    print(f"b = {b}")

    # Method 1: Normal equations
    ATA = A.T @ A
    ATb = A.T @ b
    x_normal = np.linalg.solve(ATA, ATb)

    print(f"\nMethod 1 - Normal equations (A^T A)x = A^T b:")
    print(f"A^T A = \n{ATA}")
    print(f"A^T b = {ATb}")
    print(f"Least squares solution: x = {x_normal}")

    # Method 2: QR decomposition
    Q, R = qr(A)
    x_qr = np.linalg.solve(R, Q.T @ b)

    print(f"\nMethod 2 - QR decomposition:")
    print(f"Q shape: {Q.shape}")
    print(f"R shape: {R.shape}")
    print(f"Least squares solution: x = {x_qr}")

    # Method 3: SVD decomposition
    U, s, Vt = svd(A)
    x_svd = Vt.T @ (np.diag(1/s) @ (U.T @ b))

    print(f"\nMethod 3 - SVD decomposition:")
    print(f"Singular values: {s}")
    print(f"Least squares solution: x = {x_svd}")

    # Method 4: NumPy's lstsq
    x_numpy, residuals, rank, singular_values = np.linalg.lstsq(A, b, rcond=None)

    print(f"\nMethod 4 - NumPy lstsq:")
    print(f"Least squares solution: x = {x_numpy}")
    print(f"Sum of squared residuals: {residuals}")

    # Verify all methods give the same result
    print(f"\nVerifying consistency of different methods:")
    print(f"Normal equations vs QR: {np.allclose(x_normal, x_qr)}")
    print(f"Normal equations vs SVD: {np.allclose(x_normal, x_svd)}")
    print(f"Normal equations vs NumPy: {np.allclose(x_normal, x_numpy)}")

    # Geometric interpretation: projection
    print(f"\nGeometric interpretation - orthogonal projection:")

    # Projection of b onto column space of A
    proj_b = A @ x_normal
    residual = b - proj_b

    print(f"Projection proj_A(b) = {proj_b}")
    print(f"Residual r = b - proj_A(b) = {residual}")
    print(f"Residual length: ||r|| = {np.linalg.norm(residual):.6f}")

    # Verify residual is orthogonal to column space
    print(f"\nVerifying residual is orthogonal to column space:")
    for i in range(n):
        dot_product = np.dot(A[:, i], residual)
        print(f"Inner product of A's column {i+1} with residual: {dot_product:.2e}")

    # Linear regression application example
    print(f"\nApplication example: Linear regression")

    # Generate data points
    x_data = np.linspace(0, 10, 20)
    y_true = 2*x_data + 1  # True relationship: y = 2x + 1
    y_data = y_true + 0.5*np.random.randn(20)  # Add noise

    # Construct design matrix (including constant term)
    A_reg = np.column_stack([np.ones(len(x_data)), x_data])

    # Least squares fitting
    coeffs = np.linalg.solve(A_reg.T @ A_reg, A_reg.T @ y_data)

    print(f"Data fitting: y = ax + b")
    print(f"Estimated coefficients: b = {coeffs[0]:.3f}, a = {coeffs[1]:.3f}")
    print(f"True coefficients: b = 1.000, a = 2.000")

    # Visualization
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))

    # Residual analysis
    ax1 = axes[0, 0]
    residuals_all = b - A @ x_normal
    ax1.stem(range(len(residuals_all)), residuals_all, basefmt=' ')
    ax1.axhline(y=0, color='red', linestyle='--', alpha=0.7)
    ax1.set_xlabel('Equation number')
    ax1.set_ylabel('Residual')
    ax1.set_title('Residual Distribution')
    ax1.grid(True, alpha=0.3)

    # Geometric interpretation (2D projection)
    ax2 = axes[0, 1]

    # Show only the first two components
    b_2d = b[:2]
    A_2d = A[:2, :2]
    proj_2d = A_2d @ np.linalg.solve(A_2d.T @ A_2d, A_2d.T @ b_2d)
    residual_2d = b_2d - proj_2d

    ax2.arrow(0, 0, b_2d[0], b_2d[1], head_width=0.1, head_length=0.1,
              fc='blue', ec='blue', linewidth=2, label='b (original)')
    ax2.arrow(0, 0, proj_2d[0], proj_2d[1], head_width=0.1, head_length=0.1,
              fc='green', ec='green', linewidth=2, label='projection')
    ax2.arrow(proj_2d[0], proj_2d[1], residual_2d[0], residual_2d[1],
              head_width=0.1, head_length=0.1, fc='red', ec='red',
              linewidth=2, label='residual')

    # Draw column space (simplified as parallelogram)
    col1, col2 = A_2d[:, 0], A_2d[:, 1]
    vertices = np.array([[0, 0], col1, col1+col2, col2, [0, 0]])
    ax2.fill(vertices[:, 0], vertices[:, 1], alpha=0.2, color='gray', label='column space')

    ax2.set_xlim(-3, 4)
    ax2.set_ylim(-3, 4)
    ax2.set_aspect('equal')
    ax2.grid(True, alpha=0.3)
    ax2.legend()
    ax2.set_title('Geometric Interpretation (2D projection)')

    # Linear regression results
    ax3 = axes[1, 0]
    ax3.scatter(x_data, y_data, alpha=0.7, label='Data points')
    ax3.plot(x_data, y_true, 'g--', linewidth=2, label='True relationship')
    ax3.plot(x_data, A_reg @ coeffs, 'r-', linewidth=2, label='Fitted line')
    ax3.set_xlabel('x')
    ax3.set_ylabel('y')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    ax3.set_title('Linear Regression Example')

    # Condition number analysis
    ax4 = axes[1, 1]

    cond_number = np.linalg.cond(A)
    cond_ATA = np.linalg.cond(ATA)

    methods = ['Original matrix A', 'A^T A']
    cond_numbers = [cond_number, cond_ATA]

    bars = ax4.bar(methods, cond_numbers, color=['blue', 'red'], alpha=0.7)
    ax4.set_ylabel('Condition number')
    ax4.set_title('Numerical Stability Comparison')
    ax4.set_yscale('log')

    for bar, cond in zip(bars, cond_numbers):
        ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height(),
                f'{cond:.1e}', ha='center', va='bottom')

    ax4.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

least_squares_method()

Chapter Summary

Core Concepts Summary Table

ConceptDefinitionImportant PropertiesApplications
Inner Productu,v\langle u, v \rangleSymmetry, linearity, positive definitenessDefine angles and lengths
Orthogonalityu,v=0\langle u, v \rangle = 0Pythagorean theorem holdsSimplify calculations
Orthogonal BasisMutually orthogonal basisSimple projection formulasCoordinate calculations
Orthonormal BasisOrthogonal with unit lengthei,ej=δij\langle e_i, e_j \rangle = \delta_{ij}Optimal basis
Orthogonal MatrixQTQ=IQ^TQ = IPreserves length and anglesRotations and reflections

Gram-Schmidt Algorithm Flow

🔄 正在渲染 Mermaid 图表...

Inner product spaces and orthogonality theory provide geometric intuition for linear algebra, allowing us to discuss angles, lengths, distances and other geometric concepts in abstract vector spaces. These theories are not only mathematically important, but also have wide applications in engineering, data analysis, signal processing and other fields. Orthogonality is a powerful tool for simplifying complex problems, and the least squares method is the standard approach for handling overdetermined systems and data fitting.